'unlabeled-1.51.2.2.2'.
+++ /dev/null
-Kernelenv
-Makefile
-autoMakefile
-autoMakefile.in
-aclocal.m4
-autom4te.cache
-config.log
-config.status
-configure
-.*.cmd
-.depend
+++ /dev/null
-tbd Sun Microsystems, Inc.
- * version 1.6.6
- * Support for networks:
- socklnd - any kernel supported by Lustre,
- qswlnd - Qsnet kernel modules 5.20 and later,
- openiblnd - IbGold 1.8.2,
- o2iblnd - OFED 1.1, 1.2.0, 1.2.5, and 1.3
- viblnd - Voltaire ibhost 3.4.5 and later,
- ciblnd - Topspin 3.2.0,
- iiblnd - Infiniserv 3.3 + PathBits patch,
- gmlnd - GM 2.1.22 and later,
- mxlnd - MX 1.2.1 or later,
- ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
-
-Severity :
-Bugzilla :
-Description:
-Details :
-
-Severity : normal
-Bugzilla : 15272
-Description: ptl_send_rpc hits LASSERT when ptl_send_buf fails
-Details : only hits under out-of-memory situations
-
-
--------------------------------------------------------------------------------
-
-
-04-26-2008 Sun Microsystems, Inc.
- * version 1.6.5
- * Support for networks:
- socklnd - any kernel supported by Lustre,
- qswlnd - Qsnet kernel modules 5.20 and later,
- openiblnd - IbGold 1.8.2,
- o2iblnd - OFED 1.1 and 1.2.0, 1.2.5
- viblnd - Voltaire ibhost 3.4.5 and later,
- ciblnd - Topspin 3.2.0,
- iiblnd - Infiniserv 3.3 + PathBits patch,
- gmlnd - GM 2.1.22 and later,
- mxlnd - MX 1.2.1 or later,
- ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
-
-Severity : normal
-Bugzilla : 14322
-Description: excessive debug information removed
-Details : excessive debug information removed
-
-Severity : major
-Bugzilla : 15712
-Description: ksocknal_create_conn() hit ASSERTION during connection race
-Details : ksocknal_create_conn() hit ASSERTION during connection race
-
-Severity : major
-Bugzilla : 13983
-Description: ksocknal_send_hello() hit ASSERTION while connecting race
-Details : ksocknal_send_hello() hit ASSERTION while connecting race
-
-Severity : major
-Bugzilla : 14425
-Description: o2iblnd/ptllnd credit deadlock in a routed config.
-Details : o2iblnd/ptllnd credit deadlock in a routed config.
-
-Severity : normal
-Bugzilla : 14956
-Description: High load after starting lnet
-Details : gmlnd should sleep in rx thread in interruptible way. Otherwise,
- uptime utility reports high load that looks confusingly.
-
-Severity : normal
-Bugzilla : 14838
-Description: ksocklnd fails to establish connection if accept_port is high
-Details : PID remapping must not be done for active (outgoing) connections
-
---------------------------------------------------------------------------------
-
-2008-01-11 Sun Microsystems, Inc.
- * version 1.4.12
- * Support for networks:
- socklnd - any kernel supported by Lustre,
- qswlnd - Qsnet kernel modules 5.20 and later,
- openiblnd - IbGold 1.8.2,
- o2iblnd - OFED 1.1 and 1.2.0, 1.2.5
- viblnd - Voltaire ibhost 3.4.5 and later,
- ciblnd - Topspin 3.2.0,
- iiblnd - Infiniserv 3.3 + PathBits patch,
- gmlnd - GM 2.1.22 and later,
- mxlnd - MX 1.2.1 or later,
- ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
-Severity : normal
-Bugzilla : 14387
-Description: liblustre network error
-Details : liblustre clients should understand LNET_ACCEPT_PORT environment
- variable even if they don't start lnet acceptor.
-
-Severity : normal
-Bugzilla : 14300
-Description: Strange message from lnet (Ignoring prediction from the future)
-Details : Incorrect calculation of peer's last_alive value in ksocklnd
-
---------------------------------------------------------------------------------
-
-2007-12-07 Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.6.4
- * Support for networks:
- socklnd - any kernel supported by Lustre,
- qswlnd - Qsnet kernel modules 5.20 and later,
- openiblnd - IbGold 1.8.2,
- o2iblnd - OFED 1.1 and 1.2.0, 1.2.5.
- viblnd - Voltaire ibhost 3.4.5 and later,
- ciblnd - Topspin 3.2.0,
- iiblnd - Infiniserv 3.3 + PathBits patch,
- gmlnd - GM 2.1.22 and later,
- mxlnd - MX 1.2.1 or later,
- ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
-
-Severity : normal
-Bugzilla : 14238
-Description: ASSERTION(me == md->md_me) failed in lnet_match_md()
-
-Severity : normal
-Bugzilla : 12494
-Description: increase send queue size for ciblnd/openiblnd
-
-Severity : normal
-Bugzilla : 12302
-Description: new userspace socklnd
-Details : Old userspace tcpnal that resided in lnet/ulnds/socklnd replaced
- with new one - usocklnd.
-
-Severity : enhancement
-Bugzilla : 11686
-Description: Console message flood
-Details : Make cdls ratelimiting more tunable by adding several tunable in
- procfs /proc/sys/lnet/console_{min,max}_delay_centisecs and
- /proc/sys/lnet/console_backoff.
-
---------------------------------------------------------------------------------
-
-2007-09-27 Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.6.3
- * Support for networks:
- socklnd - any kernel supported by Lustre,
- qswlnd - Qsnet kernel modules 5.20 and later,
- openiblnd - IbGold 1.8.2,
- o2iblnd - OFED 1.1 and 1.2,
- viblnd - Voltaire ibhost 3.4.5 and later,
- ciblnd - Topspin 3.2.0,
- iiblnd - Infiniserv 3.3 + PathBits patch,
- gmlnd - GM 2.1.22 and later,
- mxlnd - MX 1.2.1 or later,
- ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
-
-Severity : normal
-Bugzilla : 12782
-Description: /proc/sys/lnet has non-sysctl entries
-Details : Updating dump_kernel/daemon_file/debug_mb to use sysctl variables
-
-Severity : major
-Bugzilla : 13236
-Description: TOE Kernel panic by ksocklnd
-Details : offloaded sockets provide their own implementation of sendpage,
- can't call tcp_sendpage() directly
-
-Severity : normal
-Bugzilla : 10778
-Description: kibnal_shutdown() doesn't finish; lconf --cleanup hangs
-Details : races between lnd_shutdown and peer creation prevent
- lnd_shutdown from finishing.
-
-Severity : normal
-Bugzilla : 13279
-Description: open files rlimit 1024 reached while liblustre testing
-Details : ulnds/socklnd must close open socket after unsuccessful
- 'say hello' attempt.
-
-Severity : major
-Bugzilla : 13482
-Description: build error
-Details : fix typos in gmlnd, ptllnd and viblnd
-
-------------------------------------------------------------------------------
-
-2007-07-30 Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.6.1
- * Support for networks:
- socklnd - kernels up to 2.6.16,
- qswlnd - Qsnet kernel modules 5.20 and later,
- openiblnd - IbGold 1.8.2,
- o2iblnd - OFED 1.1 and 1.2
- viblnd - Voltaire ibhost 3.4.5 and later,
- ciblnd - Topspin 3.2.0,
- iiblnd - Infiniserv 3.3 + PathBits patch,
- gmlnd - GM 2.1.22 and later,
- mxlnd - MX 1.2.1 or later,
- ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
-
-2007-06-21 Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.4.11
- * Support for networks:
- socklnd - kernels up to 2.6.16,
- qswlnd - Qsnet kernel modules 5.20 and later,
- openiblnd - IbGold 1.8.2,
- o2iblnd - OFED 1.1
- viblnd - Voltaire ibhost 3.4.5 and later,
- ciblnd - Topspin 3.2.0,
- iiblnd - Infiniserv 3.3 + PathBits patch,
- gmlnd - GM 2.1.22 and later,
- mxlnd - MX 1.2.1 or later,
- ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
-
-Severity : minor
-Bugzilla : 13288
-Description: Initialize cpumask before use
-
-Severity : major
-Bugzilla : 12014
-Description: ASSERTION failures when upgrading to the patchless zero-copy
- socklnd
-Details : This bug affects "rolling upgrades", causing an inconsistent
- protocol version negotiation and subsequent assertion failure
- during rolling upgrades after the first wave of upgrades.
-
-Severity : minor
-Bugzilla : 11223
-Details : Change "dropped message" CERRORs to D_NETERROR so they are
- logged instead of creating "console chatter" when a lustre
- timeout races with normal RPC completion.
-
-Severity : minor
-Details : lnet_clear_peer_table can wait forever if user forgets to
- clear a lazy portal.
-
-Severity : minor
-Details : libcfs_id2str should check pid against LNET_PID_ANY.
-
-Severity : major
-Bugzilla : 10916
-Description: added LNET self test
-Details : landing b_self_test
-
-Severity : minor
-Frequency : rare
-Bugzilla : 12227
-Description: cfs_duration_{u,n}sec() wrongly calculate nanosecond part of
- struct timeval.
-Details : do_div() macro is used incorrectly.
-
-2007-04-23 Cluster File Systems, Inc. <info@clusterfs.com>
-
-Severity : normal
-Bugzilla : 11680
-Description: make panic on lbug configurable
-
-Severity : major
-Bugzilla : 12316
-Description: Add OFED1.2 support to o2iblnd
-Details : o2iblnd depends on OFED's modules, if out-tree OFED's modules
- are installed (other than kernel's in-tree infiniband), there
- could be some problem while insmod o2iblnd (mismatch CRC of
- ib_* symbols).
- If extra Module.symvers is supported in kernel (i.e, 2.6.17),
- this link provides solution:
- https://bugs.openfabrics.org/show_bug.cgi?id=355
- if extra Module.symvers is not supported in kernel, we will
- have to run the script in bug 12316 to update
- $LINUX/module.symvers before building o2iblnd.
- More details about this are in bug 12316.
-
-------------------------------------------------------------------------------
-
-2007-04-01 Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.4.10 / 1.6.0
- * Support for networks:
- socklnd - kernels up to 2.6.16,
- qswlnd - Qsnet kernel modules 5.20 and later,
- openiblnd - IbGold 1.8.2,
- o2iblnd - OFED 1.1,
- viblnd - Voltaire ibhost 3.4.5 and later,
- ciblnd - Topspin 3.2.0,
- iiblnd - Infiniserv 3.3 + PathBits patch,
- gmlnd - GM 2.1.22 and later,
- mxlnd - MX 1.2.1 or later,
- ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
-
-Severity : minor
-Frequency : rare
-Description: Ptllnd didn't init kptllnd_data.kptl_idle_txs before it could be
- possibly accessed in kptllnd_shutdown. Ptllnd should init
- kptllnd_data.kptl_ptlid2str_lock before calling kptllnd_ptlid2str.
-
-Severity : normal
-Frequency : rare
-Description: gmlnd ignored some transmit errors when finalizing lnet messages.
-
-Severity : minor
-Frequency : rare
-Description: ptllnd logs a piece of incorrect debug info in kptllnd_peer_handle_hello.
-
-Severity : minor
-Frequency : rare
-Description: the_lnet.ln_finalizing was not set when the current thread is
- about to complete messages. It only affects multi-threaded
- user space LNet.
-
-Severity : normal
-Frequency : rare
-Bugzilla : 11472
-Description: Changed the default kqswlnd ntxmsg=512
-
-Severity : major
-Frequency : rare
-Bugzilla : 12458
-Description: Assertion failure in kernel ptllnd caused by posting passive
- bulk buffers before connection establishment complete.
-
-Severity : major
-Frequency : rare
-Bugzilla : 12445
-Description: A race in kernel ptllnd between deleting a peer and posting
- new communications for it could hang communications -
- manifesting as "Unexpectedly long timeout" messages.
-
-Severity : major
-Frequency : rare
-Bugzilla : 12432
-Description: Kernel ptllnd lock ordering issue could hang a node.
-
-Severity : major
-Frequency : rare
-Bugzilla : 12016
-Description: node crash on socket teardown race
-
-Severity : minor
-Frequency : 'lctl peer_list' issued on a mx net
-Bugzilla : 12237
-Description: Enable lctl's peer_list for MXLND
-
-Severity : major
-Frequency : after Ptllnd timeouts and portals congestion
-Bugzilla : 11659
-Description: Credit overflows
-Details : This was a bug in ptllnd connection establishment. The fix
- implements better peer stamps to disambiguate connection
- establishment and ensure both peers enter the credit flow
- state machine consistently.
-
-Severity : major
-Frequency : rare
-Bugzilla : 11394
-Description: kptllnd didn't propagate some network errors up to LNET
-Details : This bug was spotted while investigating 11394. The fix
- ensures network errors on sends and bulk transfers are
- propagated to LNET/lustre correctly.
-
-Severity : enhancement
-Bugzilla : 10316
-Description: Fixed console chatter in case of -ETIMEDOUT.
-
-Severity : enhancement
-Bugzilla : 11684
-Description: Added D_NETTRACE for recording network packet history
- (initially only for ptllnd). Also a separate userspace
- ptllnd facility to gather history which should really be
- covered by D_NETTRACE too, if only CDEBUG recorded history in
- userspace.
-
-Severity : major
-Frequency : rare
-Bugzilla : 11616
-Description: o2iblnd handle early RDMA_CM_EVENT_DISCONNECTED.
-Details : If the fabric is lossy, an RDMA_CM_EVENT_DISCONNECTED
- callback can occur before a connection has actually been
- established. This caused an assertion failure previously.
-
-Severity : enhancement
-Bugzilla : 11094
-Description: Multiple instances for o2iblnd
-Details : Allow multiple instances of o2iblnd to enable networking over
- multiple HCAs and routing between them.
-
-Severity : major
-Bugzilla : 11201
-Description: lnet deadlock in router_checker
-Details : turned ksnd_connd_lock, ksnd_reaper_lock, and ksock_net_t:ksnd_lock
- into BH locks to eliminate potential deadlock caused by
- ksocknal_data_ready() preempting code holding these locks.
-
-Severity : major
-Bugzilla : 11126
-Description: Millions of failed socklnd connection attempts cause a very slow FS
-Details : added a new route flag ksnr_scheduled to distinguish from
- ksnr_connecting, so that a peer connection request is only turned
- down for race concerns when an active connection to the same peer
- is under progress (instead of just being scheduled).
-
-------------------------------------------------------------------------------
-
-2007-02-09 Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.4.9
- * Support for networks:
- socklnd - kernels up to 2.6.16
- qswlnd - Qsnet kernel modules 5.20 and later
- openiblnd - IbGold 1.8.2
- o2iblnd - OFED 1.1
- viblnd - Voltaire ibhost 3.4.5 and later
- ciblnd - Topspin 3.2.0
- iiblnd - Infiniserv 3.3 + PathBits patch
- gmlnd - GM 2.1.22 and later
- mxlnd - MX 1.2.1 or later
- ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
- * bug fixes
-
-Severity : major on XT3
-Bugzilla : none
-Description: libcfs overwrites /proc/sys/portals
-Details : libcfs created a symlink from /proc/sys/portals to
- /proc/sys/lnet for backwards compatibility. This is no
- longer required and makes the Cray portals /proc variables
- inaccessible.
-
-Severity : minor
-Bugzilla : 11312
-Description: OFED FMR API change
-Details : This changes parameter usage to reflect a change in
- ib_fmr_pool_map_phys() between OFED 1.0 and OFED 1.1. Note
- that FMR support is only used in experimental versions of the
- o2iblnd - this change does not affect standard usage at all.
-
-Severity : enhancement
-Bugzilla : 11245
-Description: new ko2iblnd module parameter: ib_mtu
-Details : the default IB MTU of 2048 performs badly on 23108 Tavor
- HCAs. You can avoid this problem by setting the MTU to 1024
- using this module parameter.
-
-Severity : enhancement
-Bugzilla : 11118/11620
-Description: ptllnd small request message buffer alignment fix
-Details : Set the PTL_MD_LOCAL_ALIGN8 option on small message receives.
- Round up small message size on sends in case this option
- is not supported. 11620 was a defect in the initial
- implementation which effectively asserted all peers had to be
- running the correct protocol version which was fixed by always
- NAK-ing such requests and handling any misalignments they
- introduce.
-
-Severity : minor
-Frequency : rarely
-Description: When kib(nal|lnd)_del_peer() is called upon a peer whose
- ibp_tx_queue is not empty, kib(nal|lnd)_destroy_peer()'s
- 'LASSERT(list_empty(&peer->ibp_tx_queue))' will fail.
-
-Severity : enhancement
-Bugzilla : 11250
-Description: Patchless ZC(zero copy) socklnd
-Details : New protocol for socklnd, socklnd can support zero copy without
- kernel patch, it's compatible with old socklnd. Checksum is
- moved from tunables to modparams.
-
-Severity : minor
-Frequency : rarely
-Description: When ksocknal_del_peer() is called upon a peer whose
- ksnp_tx_queue is not empty, ksocknal_destroy_peer()'s
- 'LASSERT(list_empty(&peer->ksnp_tx_queue))' will fail.
-
-Severity : normal
-Frequency : when ptlrpc is under heavy use and runs out of request buffer
-Bugzilla : 11318
-Description: In lnet_match_blocked_msg(), md can be used without holding a
- ref on it.
-
-Severity : minor
-Frequency : very rarely
-Bugzilla : 10727
-Description: If ksocknal_lib_setup_sock() fails, a ref on peer is lost.
- If connd connects a route which has been closed by
- ksocknal_shutdown(), ksocknal_create_routes() may create new
- routes which hold references on the peer, causing shutdown
- process to wait for peer to disappear forever.
-
-Severity : enhancement
-Bugzilla : 11234
-Description: Dump XT3 portals traces on kptllnd timeout
-Details : Set the kptllnd module parameter "ptltrace_on_timeout=1" to
- dump Cray portals debug traces to a file. The kptllnd module
- parameter "ptltrace_basename", default "/tmp/lnet-ptltrace",
- is the basename of the dump file.
-
-Severity : major
-Frequency : infrequent
-Bugzilla : 11308
-Description: kernel ptllnd fix bug in connection re-establishment
-Details : Kernel ptllnd could produce protocol errors e.g. illegal
- matchbits and/or violate the credit flow protocol when trying
- to re-establish a connection with a peer after an error or
- timeout.
-
-Severity : enhancement
-Bugzilla : 10316
-Description: Allow /proc/sys/lnet/debug to be set symbolically
-Details : Allow debug and subsystem debug values to be read/set by name
- in addition to numerically, for ease of use.
-
-Severity : normal
-Frequency : only in configurations with LNET routers
-Bugzilla : 10316
-Description: routes automatically marked down and recovered
-Details : In configurations with LNET routers if a router fails routers
- now actively try to recover routes that are down, unless they
- are marked down by an administrator.
-
-------------------------------------------------------------------------------
-
-2006-12-09 Cluster File Systems, Inc. <info@clusterfs.com>
-
-Severity : critical
-Frequency : very rarely, in configurations with LNET routers and TCP
-Bugzilla : 10889
-Description: incorrect data written to files on OSTs
-Details : In certain high-load conditions incorrect data may be written
- to files on the OST when using TCP networks.
-
-------------------------------------------------------------------------------
-
-2006-07-31 Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.4.7
- - rework CDEBUG messages rate-limiting mechanism b=10375
- - add per-socket tunables for socklnd if the kernel is patched b=10327
-
-------------------------------------------------------------------------------
-
-2006-02-15 Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.4.6
- - fix use of portals/lnet pid to avoid dropping RPCs b=10074
- - iiblnd wasn't mapping all memory, resulting in comms errors b=9776
- - quiet LNET startup LNI message for liblustre b=10128
- - Better console error messages if 'ip2nets' can't match an IP address
- - Fixed overflow/use-before-set bugs in linux-time.h
- - Fixed ptllnd bug that wasn't initialising rx descriptors completely
- - LNET teardown failed an assertion about the route table being empty
- - Fixed a crash in LNetEQPoll(<invalid handle>)
- - Future protocol compatibility work (b_rls146_lnetprotovrsn)
- - improve debug message for liblustre/Catamount nodes (b=10116)
-
-2005-10-10 Cluster File Systems, Inc. <info@clusterfs.com>
- * Configuration change for the XT3
- The PTLLND is now used to run Lustre over Portals on the XT3.
- The configure option(s) --with-cray-portals are no longer
- used. Rather --with-portals=<path-to-portals-includes> is
- used to enable building on the XT3. In addition to enable
- XT3 specific features the option --enable-cray-xt3 must be
- used.
-
-2005-10-10 Cluster File Systems, Inc. <info@clusterfs.com>
- * Portals has been removed, replaced by LNET.
- LNET is new networking infrastructure for Lustre, it includes a
- reorganized network configuration mode (see the user
- documentation for full details) as well as support for routing
- between different network fabrics. Lustre Networking Devices
- (LNDS) for the supported network fabrics have also been created
- for this new infrastructure.
-
-2005-08-08 Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.4.4
- * bug fixes
-
-Severity : major
-Frequency : rare (large Voltaire clusters only)
-Bugzilla : 6993
-Description: the default number of reserved transmit descriptors was too low
- for some large clusters
-Details : As a workaround, the number was increased. A proper fix includes
- a run-time tunable.
-
-2005-06-02 Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.4.3
- * bug fixes
-
-Severity : major
-Frequency : occasional (large-scale events, cluster reboot, network failure)
-Bugzilla : 6411
-Description: too many error messages on console obscure actual problem and
- can slow down/panic server, or cause recovery to fail repeatedly
-Details : enable rate-limiting of console error messages, and some messages
- that were console errors now only go to the kernel log
-
-Severity : enhancement
-Bugzilla : 1693
-Description: add /proc/sys/portals/catastrophe entry which will report if
- that node has previously LBUGged
-
-2005-04-06 Cluster File Systems, Inc. <info@clusterfs.com>
- * bugs
- - update gmnal to use PTL_MTU, fix module refcounting (b=5786)
-
-2005-04-04 Cluster File Systems, Inc. <info@clusterfs.com>
- * bugs
- - handle error return code in kranal_check_fma_rx() (5915,6054)
-
-2005-02-04 Cluster File Systems, Inc. <info@clusterfs.com>
- * miscellania
- - update vibnal (Voltaire IB NAL)
- - update gmnal (Myrinet NAL), gmnalid
-
-2005-02-04 Eric Barton <eeb@bartonsoftware.com>
-
- * Landed portals:b_port_step as follows...
-
- - removed CFS_DECL_SPIN*
- just use 'spinlock_t' and initialise with spin_lock_init()
-
- - removed CFS_DECL_MUTEX*
- just use 'struct semaphore' and initialise with init_mutex()
-
- - removed CFS_DECL_RWSEM*
- just use 'struct rw_semaphore' and initialise with init_rwsem()
-
- - renamed cfs_sleep_chan -> cfs_waitq
- cfs_sleep_link -> cfs_waitlink
-
- - fixed race in linux version of arch-independent socknal
- (the ENOMEM/EAGAIN decision).
-
- - Didn't fix problems in Darwin version of arch-independent socknal
- (resetting socket callbacks, eager ack hack, ENOMEM/EAGAIN decision)
-
- - removed libcfs types from non-socknal header files (only some types
- in the header files had been changed; the .c files hadn't been
- updated at all).
+++ /dev/null
-EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/lnet/include
-# lnet/utils/debug.c wants <linux/version.h> from userspace. sigh.
-HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
-LIBREADLINE := @LIBREADLINE@
-# 2.5's makefiles aren't nice to cross dir libraries in host programs
-PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
+++ /dev/null
-EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/lnet/include
-HOSTCFLAGS := $(EXTRA_CFLAGS)
-# the kernel doesn't want us to build archives for host binaries :/
-PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
+++ /dev/null
-Each file in this distribution should contain a header stating the
-copyright owner(s), and the licensing terms for that module. Some
-files are not eligible for copyright protection, and contain neither.
-
-All files in this subtree are licensed under the terms and conditions
-of the GNU General Public License version 2.
-
-Reproduced below is the GPL v2, and Linus's clarifying statement from
-the Linux kernel source code:
-
-----------------------------------------
-
- NOTE! This copyright does *not* cover user programs that use kernel
- services by normal system calls - this is merely considered normal use
- of the kernel, and does *not* fall under the heading of "derived work".
- Also note that the GPL below is copyrighted by the Free Software
- Foundation, but the instance of code that it refers to (the Linux
- kernel) is copyrighted by me and others who actually wrote it.
-
- Linus Torvalds
-
-----------------------------------------
-
- GNU GENERAL PUBLIC LICENSE
- Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The licenses for most software are designed to take away your
-freedom to share and change it. By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users. This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must show them these terms so they know their
-rights.
-
- We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-\f
- GNU GENERAL PUBLIC LICENSE
- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
- 0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License. The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language. (Hereinafter, translation is included without limitation in
-the term "modification".) Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
- 1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
- 2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
- a) You must cause the modified files to carry prominent notices
- stating that you changed the files and the date of any change.
-
- b) You must cause any work that you distribute or publish, that in
- whole or in part contains or is derived from the Program or any
- part thereof, to be licensed as a whole at no charge to all third
- parties under the terms of this License.
-
- c) If the modified program normally reads commands interactively
- when run, you must cause it, when started running for such
- interactive use in the most ordinary way, to print or display an
- announcement including an appropriate copyright notice and a
- notice that there is no warranty (or else, saying that you provide
- a warranty) and that users may redistribute the program under
- these conditions, and telling the user how to view a copy of this
- License. (Exception: if the Program itself is interactive but
- does not normally print such an announcement, your work based on
- the Program is not required to print an announcement.)
-\f
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
- 3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
- a) Accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of Sections
- 1 and 2 above on a medium customarily used for software interchange; or,
-
- b) Accompany it with a written offer, valid for at least three
- years, to give any third party, for a charge no more than your
- cost of physically performing source distribution, a complete
- machine-readable copy of the corresponding source code, to be
- distributed under the terms of Sections 1 and 2 above on a medium
- customarily used for software interchange; or,
-
- c) Accompany it with the information you received as to the offer
- to distribute corresponding source code. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form with such
- an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it. For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable. However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-\f
- 4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License. Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
- 5. You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Program or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
- 6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
- 7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all. For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-\f
- 8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded. In such case, this License incorporates
-the limitation as if written in the body of this License.
-
- 9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation. If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
- 10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission. For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this. Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
- NO WARRANTY
-
- 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
- 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
- END OF TERMS AND CONDITIONS
-\f
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) 19yy <name of author>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
- Gnomovision version 69, Copyright (C) 19yy name of author
- Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, the commands you use may
-be called something other than `show w' and `show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary. Here is a sample; alter the names:
-
- Yoyodyne, Inc., hereby disclaims all copyright interest in the program
- `Gnomovision' (which makes passes at compilers) written by James Hacker.
-
- <signature of Ty Coon>, 1 April 1989
- Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs. If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Library General
-Public License instead of this License.
+++ /dev/null
-subdir-m += libcfs
-
-lnet-subdirs += lnet
-lnet-subdirs += klnds
-lnet-subdirs += selftest
-subdir-m += $(lnet-subdirs)
-
-@INCLUDE_RULES@
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-SUBDIRS = libcfs lnet klnds ulnds selftest doc utils include \
- autoconf
-
-sources:
- $(MAKE) sources -C libcfs
+++ /dev/null
-Makefile
-Makefile.in
+++ /dev/null
-EXTRA_DIST := lustre-lnet.m4
+++ /dev/null
-Makefile
-Makefile.in
-*.eps
-*.pdf
+++ /dev/null
-In this document I will try to draw the data structures and how they
-interrelate in the Portals 3 reference implementation. It is probably
-best shown with a drawing, so there may be an additional xfig or
-Postscript figure.
-
-
-MEMORY POOLS:
-------------
-
-First, a digression on memory allocation in the library. As mentioned
-in the NAL Writer's Guide, the library does not link against any
-standard C libraries and as such is unable to dynamically allocate
-memory on its own. It requires that the NAL implement a method
-for allocation that is appropriate for the protection domain in
-which the library lives. This is only called when a network
-interface is initialized to allocate the Portals object pools.
-
-These pools are preallocate blocks of objects that the library
-can rapidly make active and manage with a minimum of overhead.
-It is also cuts down on overhead for setting up structures
-since the NAL->malloc() callback does not need to be called
-for each object.
-
-The objects are maintained on a per-object type singly linked free
-list and contain a pointer to the next free object. This pointer
-is NULL if the object is not on the free list and is non-zero
-if it is on the list. The special sentinal value of 0xDEADBEEF
-is used to mark the end of the free list since NULL could
-indicate that the last object in the list is not free.
-
-When one of the lib_*_alloc() functions is called, the library
-returns the head of the free list and advances the head pointer
-to the next item on the list. The special case of 0xDEADBEEF is
-checked and a NULL pointer is returned if there are no more
-objects of this type available. The lib_*_free() functions
-are even simpler -- check to ensure that the object is not already
-free, set its next pointer to the current head and then set
-the head to be this newly freed object.
-
-Since C does not have templates, I did the next best thing and wrote
-the memory pool allocation code as a macro that expands based on the
-type of the argument. The mk_alloc(T) macro expands to
-write the _lib_T_alloc() and lib_T_free() functions.
-It requires that the object have a pointer of the type T named
-"next_free". There are also functions that map _lib_T_alloc()
-to lib_T_alloc() so that the library can add some extra
-functionality to the T constructor.
-
-
-
-LINKED LISTS:
-------------
-
-Many of the active Portals objects are stored in doubly linked lists
-when they are active. These are always implemented with the pointer
-to the next object and a pointer to the next pointer of the
-previous object. This avoids the "dummy head" object or
-special cases for inserting at the beginning or end of the list.
-The pointer manipulations are a little hairy at times, but
-I hope that they are understandable.
-
-The actual linked list code is implemented as macros in <lib-p30.h>,
-although the object has to know about
-
-
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-LYX2PDF = lyx --export pdf
-LYX2TXT = lyx --export text
-LYX2HTML = lyx --export html
-SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps
-
-if DOC
- DOCS = portals3.pdf
-else
- DOCS =
-endif
-
-IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps
-LYXFILES= portals3.lyx
-
-MAINTAINERCLEANFILES = $(IMAGES) $(DOCS) $(GENERATED)
-GENERATED =
-EXTRA_DIST = $(DOCS) $(IMAGES) $(LYXFILES)
-
-all: $(DOCS)
-
-# update date and version in document
-date := $(shell date +%x)
-tag := $(shell echo '$$Name: $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/')
-addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g'
-
-# Regenerate when the $(VERSION) or $Name: $ changes.
-.INTERMEDIATE: $(GENERATED)
-$(GENERATED) : %.lyx: %.lin Makefile
- $(addversion) $< > $@
-
-.lyx.pdf:
- @$(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n"
-
-.lyx.txt:
- @$(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n"
-.lyx.html:
- @$(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n"
-.fig.eps:
- -fig2dev -L eps $< > $@
-
-portals3.pdf portals3.txt portals3.html: $(IMAGES) portals3.lyx
-
-syncweb: portals3.pdf
-# cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf
-# ( cd /usr/src/www ; make lustre ; make synclustre )
-
+++ /dev/null
-This documents the life cycle of message as it arrives and is handled by
-a basic async, packetized NAL. There are four types of messages that have
-slightly different life cycles, so they are addressed independently.
-
-
-Put request
------------
-
-1. NAL notices that there is a incoming message header on the network
-and reads an ptl_hdr_t in from the wire.
-
-2. It may store additional NAL specific data that provides context
-for this event in a void* that it will interpret in some fashion
-later.
-
-3. The NAL calls lib_parse() with a pointer to the header and its
-private data structure.
-
-4. The library decodes the header and may build a message state
-object that describes the event to be written and the ACK to be
-sent, if any. It then calls nal->recv() with the private data
-that the NAL passed in, a pointer to the message state object
-and a translated user address.
-
- The NAL will have been given a chance to pretranslate
- all user addresses when the buffers are created. This
- process is described in the NAL-HOWTO.
-
-5. The NAL should restore what ever context it required from the
-private data pointer, begin receiving the bytes and possibly store
-some extra state of its own. It should return at this point.
-
-
-
-Get request
------------
-
-1. As with a Put, the NAL notices the incoming message header and
-passes it to lib_parse().
-
-2. The library decodes the header and calls nal->recv() with a
-zero byte length, offset and destination to instruct it to clean
-up the wire after reading the header. The private data will
-be passed in as well, allowing the NAL to retrieve any state
-or context that it requires.
-
-3. The library may build a message state object to possibly
-write an event log or invalidate a memory region.
-
-4. The library will build a ptl_msg_t header that specifies the
-Portals protocol information for delivery at the remote end.
-
-5. The library calls nal->send() with the pre-built header,
-the optional message state object, the four part address
-component, a translated user pointer + offset, and some
-other things.
-
-6. The NAL is to put the header on the wire or copy it at
-this point (since it off the stack). It should store some
-amount of state about its current position in the message and
-the destination address.
-
-7. And then return to the library.
-
-
-Reply request
--------------
-
-1. Starting at "The library decodes the header..."
-
-2. The library decodes the header and calls nal->recv()
-to bring in the rest of the message. Flow continues in
-exactly the same fashion as with all other receives.
-
-
-Ack request
------------
-
-1. The library decodes the header, builds the appropriate data
-structures for the event in a message state object and calls nal->recv()
-with a zero byte length, etc.
-
-
-Packet arrival
---------------
-
-1. The NAL should notice the arrival of a packet, retrieve whatever
-state it needs from the message ID or other NAL specific header data
-and place the data bytes directly into the user address that were
-given to nal->recv().
-
- How this happens is outside the scope of the Portals library
- and soley determined by the NAL...
-
-2. If this is the last packet in a message, the NAL should retrieve
-the lib_msg_t *cookie that it was given in the call to nal->recv()
-and pass it to lib_finalize(). lib_finalize() may call nal->send()
-to send an ACK, nal->write() to record an entry in the event log,
-nal->invalidate() to unregister a region of memory or do nothing at all.
-
-3. It should then clean up any remaining NAL specific state about
-the message and go back into the main loop.
-
-
-Outgoing packets
-----------------
-
-1. When the NAL has pending output, it should put the packets on
-the wire wrapped with whatever implementation specified wrappers.
-
-2. Once it has output all the packets of a message it should
-call lib_finalize() with the message state object that was
-handed to nal->send(). This will allows the library to clean
-up its state regarding the message and write any pending event
-entries.
-
-
-
+++ /dev/null
-This document is a first attempt at describing how to write a NAL
-for the Portals 3 library. It also defines the library architecture
-and the abstraction of protection domains.
-
-
-First, an overview of the architecture:
-
- Application
-
-----|----+--------
- |
- API === NAL (User space)
- |
----------+---|-----
- |
- LIB === NAL (Library space)
- |
----------+---|-----
-
- Physical wire (NIC space)
-
-
-Application
- API
-API-side NAL
-------------
-LIB-side NAL
- LIB
-LIB-side NAL
- wire
-
-Communication is through the indicated paths via well defined
-interfaces. The API and LIB portions are written to be portable
-across platforms and do not depend on the network interface.
-
-Communcation between the application and the API code is
-defined in the Portals 3 API specification. This is the
-user-visible portion of the interface and should be the most
-stable.
-
-
-
-API-side NAL:
-------------
-
-The user space NAL needs to implement only a few functions
-that are stored in a nal_t data structure and called by the
-API-side library:
-
- int forward( nal_t *nal,
- int index,
- void *args,
- size_t arg_len,
- void *ret,
- size_t ret_len
- );
-
-Most of the data structures in the portals library are held in
-the LIB section of the code, so it is necessary to forward API
-calls across the protection domain to the library. This is
-handled by the NAL's forward method. Once the argument and return
-blocks are on the remote side the NAL should call lib_dispatch()
-to invoke the appropriate API function.
-
- int validate( nal_t *nal,
- void *base,
- size_t extent,
- void **trans_base,
- void **trans_data
- );
-
-The validate method provides a means for the NAL to prevalidate
-and possibly pretranslate user addresses into a form suitable
-for fast use by the network card or kernel module. The trans_base
-pointer will be used by the library everytime it needs to
-refer to the block of memory. The trans_data result is a
-cookie that will be handed to the NAL along with the trans_base.
-
-The library never performs calculations on the trans_base value;
-it only computes offsets that are then handed to the NAL.
-
-
- int shutdown( nal_t *nal, int interface );
-
-Brings down the network interface. The remote NAL side should
-call lib_fini() to bring down the library side of the network.
-
- void yield( nal_t *nal );
-
-This allows the user application to gracefully give up the processor
-while busy waiting. Performance critical applications may not
-want to take the time to call this function, so it should be an
-option to the PtlEQWait call. Right now it is not implemented as such.
-
-Lastly, the NAL must implement a function named PTL_IFACE_*, where
-* is the name of the NAL such as PTL_IFACE_IP or PTL_IFACE_MYR.
-This initialization function is to set up communication with the
-library-side NAL, which should call lib_init() to bring up the
-network interface.
-
-
-
-LIB-side NAL:
-------------
-
-On the library-side, the NAL has much more responsibility. It
-is responsible for calling lib_dispatch() on behalf of the user,
-it is also responsible for bringing packets off the wire and
-pushing bits out. As on the user side, the methods are stored
-in a nal_cb_t structure that is defined on a per network
-interface basis.
-
-The calls to lib_dispatch() need to be examined. The prototype:
-
- void lib_dispatch(
- nal_cb_t *nal,
- void *private,
- int index,
- void *arg_block,
- void *ret_block
- );
-
-has two complications. The private field is a NAL-specific
-value that will be passed to any callbacks produced as a result
-of this API call. Kernel module implementations may use this
-for task structures, or perhaps network card data. It is ignored
-by the library.
-
-Secondly, the arg_block and ret_block must be in the same protection
-domain as the library. The NAL's two halves must communicate the
-sizes and perform the copies. After the call, the buffer pointed
-to by ret_block will be filled in and should be copied back to
-the user space. How this is to be done is NAL specific.
-
- int lib_parse(
- nal_cb_t *nal,
- ptl_hdr_t *hdr,
- void *private
- );
-
-This is the only other entry point into the library from the NAL.
-When the NAL detects an incoming message on the wire it should read
-sizeof(ptl_hdr_t) bytes and pass a pointer to the header to
-lib_parse(). It may set private to be anything that it needs to
-tie the incoming message to callbacks that are made as a result
-of this event.
-
-The method calls are:
-
- int (*send)(
- nal_cb_t *nal,
- void *private,
- lib_msg_t *cookie,
- ptl_hdr_t *hdr,
- int nid,
- int pid,
- int gid,
- int rid,
- user_ptr trans_base,
- user_ptr trans_data,
- size_t offset,
- size_t len
- );
-
-This is a tricky function -- it must support async output
-of messages as well as properly syncronized event log writing.
-The private field is the same that was passed into lib_dispatch()
-or lib_parse() and may be used to tie this call to the event
-that initiated the entry to the library.
-
-The cookie is a pointer to a library private value that must
-be passed to lib_finalize() once the message has been completely
-sent. It should not be examined by the NAL for any meaning.
-
-The four ID fields are passed in, although some implementations
-may not use all of them.
-
-The single base pointer has been replaced with the translated
-address that the API NAL generated in the api_nal->validate()
-call. The trans_data is unchanged and the offset is in bytes.
-
-
- int (*recv)(
- nal_cb_t *nal,
- void *private,
- lib_msg_t *cookie,
- user_ptr trans_base,
- user_ptr trans_data,
- size_t offset,
- size_t mlen,
- size_t rlen
- );
-
-This callback will only be called in response to lib_parse().
-The cookie, trans_addr and trans_data are as discussed in send().
-The NAL should read mlen bytes from the wire, deposit them into
-trans_base + offset and then discard (rlen - mlen) bytes.
-Once the entire message has been received the NAL should call
-lib_finalize() with the lib_msg_t *cookie.
-
-The special arguments of base=NULL, data=NULL, offset=0, mlen=0, rlen=0
-is used to indicate that the NAL should clean up the wire. This could
-be implemented as a blocking call, although having it return as quickly
-as possible is desirable.
-
- int (*write)(
- nal_cb_t *nal,
- void *private,
- user_ptr trans_addr,
- user_ptr trans_data,
- size_t offset,
-
- void *src_addr,
- size_t len
- );
-
-This is essentially a cross-protection domain memcpy(). The user address
-has been pretranslated by the api_nal->translate() call.
-
- void *(*malloc)(
- nal_cb_t *nal,
- size_t len
- );
-
- void (*free)(
- nal_cb_t *nal,
- void *buf
- );
-
-Since the NAL may be in a non-standard hosted environment it can
-not call malloc(). This allows the library side NAL to implement
-the system specific malloc(). In the current reference implementation
-the libary only calls nal->malloc() when the network interface is
-initialized and then calls free when it is brought down. The library
-maintains its own pool of objects for allocation so only one call to
-malloc is made per object type.
-
- void (*invalidate)(
- nal_cb_t *nal,
- user_ptr trans_base,
- user_ptr trans_data,
- size_t extent
- );
-
-User addresses are validated/translated at the user-level API NAL
-method, which is likely to push them to this level. Meanwhile,
-the library NAL will be notified when the library no longer
-needs the buffer. Overlapped buffers are not detected by the
-library, so the NAL should ref count each page involved.
-
-Unfortunately we have a few bugs when the invalidate method is
-called. It is still in progress...
-
- void (*printf)(
- nal_cb_t *nal,
- const char *fmt,
- ...
- );
-
-As with malloc(), the library does not have any way to do printf
-or printk. It is not necessary for the NAL to implement the this
-call, although it will make debugging difficult.
-
- void (*cli)(
- nal_cb_t *nal,
- unsigned long *flags
- );
-
- void (*sti)(
- nal_cb_t *nal,
- unsigned long *flags
- );
-
-These are used by the library to mark critical sections.
-
- int (*gidrid2nidpid)(
- nal_cb_t *nal,
- ptl_id_t gid,
- ptl_id_t rid,
- ptl_id_t *nid,
- ptl_id_t *pid
- );
-
-
- int (*nidpid2gidrid)(
- nal_cb_t *nal,
- ptl_id_t nid,
- ptl_id_t pid,
- ptl_id_t *gid,
- ptl_id_t *rid
- );
-
-Rolf added these. I haven't looked at how they have to work yet.
+++ /dev/null
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter
-100.00
-Single
--2
-1200 2
-6 1200 750 1650 1050
-2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
- 1650 1050 1650 750 1200 750 1200 1050 1650 1050
-4 1 0 100 0 0 10 0.0000 0 105 240 1425 952 FS0\001
--6
-6 1200 2325 1650 2625
-2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
- 1650 2625 1650 2325 1200 2325 1200 2625 1650 2625
-4 1 0 100 0 0 10 0.0000 0 105 240 1425 2527 FS3\001
--6
-6 1200 1800 1650 2100
-2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
- 1650 2100 1650 1800 1200 1800 1200 2100 1650 2100
-4 1 0 100 0 0 10 0.0000 0 105 240 1425 2002 FS2\001
--6
-6 1200 1275 1650 1575
-2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
- 1650 1575 1650 1275 1200 1275 1200 1575 1650 1575
-4 1 0 100 0 0 10 0.0000 0 105 240 1425 1477 FS1\001
--6
-6 450 750 900 1200
-5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 750.000 450 1050 675 1125 900 1050
-1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 825 225 75 450 900 900 750
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 450 825 450 1050
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 900 1050 900 825
--6
-6 450 2325 900 2775
-5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 2325.000 450 2625 675 2700 900 2625
-1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 2400 225 75 450 2475 900 2325
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 450 2400 450 2625
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 900 2625 900 2400
--6
-6 450 1800 900 2250
-5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1800.000 450 2100 675 2175 900 2100
-1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1875 225 75 450 1950 900 1800
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 450 1875 450 2100
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 900 2100 900 1875
--6
-6 450 1275 900 1725
-5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1275.000 450 1575 675 1650 900 1575
-1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1350 225 75 450 1425 900 1275
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 450 1350 450 1575
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 900 1575 900 1350
--6
-6 2250 750 3450 2625
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 2550 1200 3150 1200
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 2550 1500 3150 1500
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 2550 1800 3150 1800
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 2550 2100 3150 2100
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 2550 975 3150 975 3150 2625 2550 2625 2550 975
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 2550 2400 3150 2400
-4 1 0 100 0 0 10 0.0000 0 135 1185 2850 900 Application Buffer\001
--6
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
- 0 0 1.00 60.00 120.00
- 0 0 1.00 60.00 120.00
- 1650 2400 2550 1350
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
- 0 0 1.00 60.00 120.00
- 0 0 1.00 60.00 120.00
- 1650 1875 2550 1050
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
- 0 0 1.00 60.00 120.00
- 0 0 1.00 60.00 120.00
- 1650 1425 2550 1950
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
- 0 0 1.00 60.00 120.00
- 0 0 1.00 60.00 120.00
- 1650 900 2550 1650
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 900 900 1200 900
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 900 1425 1200 1425
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 900 1950 1200 1950
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
- 900 2475 1200 2475
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
- 0 0 1.00 60.00 120.00
- 0 0 1.00 60.00 120.00
- 1650 2025 2550 2250
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
- 0 0 1.00 60.00 120.00
- 0 0 1.00 60.00 120.00
- 1650 2550 2550 2475
-2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
- 1875 2850 1875 600 225 600 225 2850 1875 2850
-4 1 0 100 0 0 10 0.0000 0 105 1215 1050 525 Parallel File Server\001
+++ /dev/null
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter
-100.00
-Single
--2
-1200 2
-6 525 2175 1575 2925
-6 675 2287 1425 2812
-4 1 0 50 0 0 10 0.0000 4 105 255 1050 2437 MD\001
-4 1 0 50 0 0 10 0.0000 4 105 645 1050 2587 Exists and\001
-4 1 0 50 0 0 10 0.0000 4 135 555 1050 2737 Accepts?\001
--6
-2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
- 1575 2550 1050 2175 525 2550 1050 2925 1575 2550
--6
-6 3450 1275 4350 1725
-6 3600 1312 4200 1687
-4 1 0 100 0 0 10 0.0000 0 135 525 3900 1612 Message\001
-4 1 0 100 0 0 10 0.0000 0 105 465 3900 1462 Discard\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 3450 1275 4350 1275 4350 1725 3450 1725 3450 1275
--6
-6 4650 1275 5550 1725
-6 4725 1312 5475 1687
-4 1 0 100 0 0 10 0.0000 0 135 735 5100 1612 Drop Count\001
-4 1 0 100 0 0 10 0.0000 0 105 630 5100 1462 Increment\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 4650 1275 5550 1275 5550 1725 4650 1725 4650 1275
--6
-6 1350 525 2250 975
-6 1350 562 2250 937
-4 1 0 100 0 0 10 0.0000 0 135 795 1800 862 Match Entry\001
-4 1 0 100 0 0 10 0.0000 0 105 585 1800 712 Get Next\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 1350 525 2250 525 2250 975 1350 975 1350 525
--6
-6 525 1125 1575 1875
-2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
- 1575 1500 1050 1125 525 1500 1050 1875 1575 1500
-4 1 0 100 0 0 10 0.0000 0 105 465 1049 1552 Match?\001
--6
-6 2340 1237 2940 1687
-6 2340 1237 2940 1687
-4 1 0 100 0 0 10 0.0000 0 105 345 2640 1387 More\001
-4 1 0 100 0 0 10 0.0000 0 105 405 2640 1537 Match\001
-4 1 0 100 0 0 10 0.0000 0 105 510 2640 1687 Entries?\001
--6
--6
-6 525 3225 1575 3975
-6 675 3375 1425 3750
-4 1 0 50 0 0 10 0.0000 4 105 255 1050 3525 MD\001
-4 1 0 50 0 0 10 0.0000 4 105 615 1050 3720 has room?\001
--6
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
- 525 3600 1050 3225 1575 3600 1050 3975 525 3600
--6
-6 3300 3375 4350 3825
-6 3300 3412 4350 3787
-4 1 0 50 0 0 10 0.0000 4 105 735 3825 3562 Unlink MD\001
-4 1 0 50 0 0 10 0.0000 4 135 945 3825 3712 & Match Entry\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 3300 3375 4350 3375 4350 3825 3300 3825 3300 3375
--6
-6 1950 3225 3000 3975
-6 2250 3450 2700 3750
-4 1 0 50 0 0 10 0.0000 4 105 450 2475 3600 Unlink\001
-4 1 0 50 0 0 10 0.0000 4 105 315 2475 3750 full?\001
--6
-2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
- 3000 3600 2475 3225 1950 3600 2475 3975 3000 3600
--6
-6 3150 4500 4200 4950
-6 3150 4537 4200 4912
-4 1 0 50 0 0 10 0.0000 4 105 735 3675 4687 Unlink MD\001
-4 1 0 50 0 0 10 0.0000 4 135 945 3675 4837 & Match Entry\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 3150 4500 4200 4500 4200 4950 3150 4950 3150 4500
--6
-6 600 4500 1500 4950
-6 675 4537 1425 4912
-4 1 0 50 0 0 10 0.0000 4 135 615 1050 4837 Operation\001
-4 1 0 50 0 0 10 0.0000 4 105 525 1050 4687 Perform\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 600 4500 1500 4500 1500 4950 600 4950 600 4500
--6
-6 4650 4350 5700 5100
-6 4950 4537 5400 4912
-6 4950 4537 5400 4912
-4 1 0 50 0 0 10 0.0000 4 135 435 5175 4837 Queue?\001
-4 1 0 50 0 0 10 0.0000 4 105 360 5175 4687 Event\001
--6
--6
-2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
- 5700 4725 5175 4350 4650 4725 5175 5100 5700 4725
--6
-6 6000 4500 6900 4950
-6 6225 4575 6675 4875
-4 1 0 50 0 0 10 0.0000 4 105 360 6450 4875 Event\001
-4 1 0 50 0 0 10 0.0000 4 105 435 6450 4725 Record\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 6000 4500 6900 4500 6900 4950 6000 4950 6000 4500
--6
-6 1800 4350 2850 5100
-6 2100 4575 2550 4875
-4 1 0 50 0 0 10 0.0000 4 105 450 2325 4725 Unlink\001
-4 1 0 50 0 0 10 0.0000 4 105 450 2325 4875 thresh?\001
--6
-2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
- 2850 4725 2325 4350 1800 4725 2325 5100 2850 4725
--6
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1050 1875 1050 2175
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1575 1500 2100 1500
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1050 450 1050 1125
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1350 750 1050 750
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1050 2925 1050 3225
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 3150 1500 3450 1500
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 4350 1500 4650 1500
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
- 2100 1500 2625 1125 3150 1500 2625 1875 2100 1500
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1575 3600 1950 3600
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1050 3975 1050 4500
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 3000 3600 3300 3600
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1500 4725 1800 4725
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 5700 4725 6000 4725
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 2850 4725 3150 4725
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 4200 4725 4650 4725
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 6900 4725 7950 4725
-3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
- 0 0 1.00 60.00 120.00
- 1575 2550 1650 2550 1800 2550 1800 2400 1800 1500
- 0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5
- 0 0 1.00 60.00 120.00
- 2250 750 2475 750 2625 750 2625 900 2625 1125
- 0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5
- 0 0 1.00 60.00 120.00
- 7500 4725 7500 1650 7500 1500 7350 1500 5550 1500
- 0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5
- 0 0 1.00 60.00 120.00
- 2475 3225 2475 2400 2475 2250 2325 2250 1800 2250
- 0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5
- 0 0 1.00 60.00 120.00
- 3825 3375 3825 2175 3825 2025 3675 2025 1800 2025
- 0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8
- 0 0 1.00 60.00 120.00
- 2325 4350 2325 4275 2325 4125 2475 4125 4275 4125 4425 4125
- 4425 4275 4425 4725
- 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8
- 0 0 1.00 60.00 120.00
- 5175 4350 5175 4275 5175 4125 5325 4125 7125 4125 7275 4125
- 7275 4275 7275 4725
- 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000
-4 1 0 100 0 0 10 0.0000 0 75 150 1575 1425 no\001
-4 1 0 100 0 0 10 0.0000 0 135 360 825 525 Entry\001
-4 1 0 100 0 0 10 0.0000 0 75 150 1575 2475 no\001
-4 1 0 100 0 0 10 0.0000 0 105 195 1200 1950 yes\001
-4 1 0 100 0 0 10 0.0000 0 105 195 1200 3000 yes\001
-4 1 0 100 0 0 10 0.0000 0 105 195 2775 1050 yes\001
-4 1 0 100 0 0 10 0.0000 0 75 150 3225 1425 no\001
-4 1 0 100 0 0 10 0.0000 0 75 150 1650 3525 no\001
-4 1 0 100 0 0 10 0.0000 0 105 195 1200 4050 yes\001
-4 1 0 100 0 0 10 0.0000 0 105 195 3150 3525 yes\001
-4 1 0 100 0 0 10 0.0000 0 75 150 2625 3150 no\001
-4 1 0 100 0 0 10 0.0000 0 105 195 3000 4650 yes\001
-4 1 0 100 0 0 10 0.0000 0 105 195 5850 4650 yes\001
-4 1 0 100 0 0 10 0.0000 0 75 150 2475 4275 no\001
-4 1 0 100 0 0 10 0.0000 0 75 150 5325 4275 no\001
-4 1 0 50 0 0 10 0.0000 4 105 285 7800 4650 Exit\001
+++ /dev/null
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter
-100.00
-Single
--2
-1200 2
-6 2775 900 3525 1200
-4 0 0 100 0 0 10 0.0000 0 105 720 2775 1200 Translation\001
-4 0 0 100 0 0 10 0.0000 0 105 405 2850 1050 Portal\001
--6
-6 1350 1725 2175 2025
-4 0 0 100 0 0 10 0.0000 0 105 825 1350 2025 Transmission\001
-4 0 0 100 0 0 10 0.0000 0 105 285 1620 1875 Data\001
--6
-2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 900 525 2700 750
-2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 2700 825 2700 1275
-2 1 0 1 0 7 100 0 -1 3.000 0 0 7 1 0 2
- 0 0 1.00 60.00 120.00
- 2700 1350 900 1950
-2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5
- 2400 300 3600 300 3600 2250 2400 2250 2400 300
-2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5
- 0 300 1200 300 1200 2250 0 2250 0 300
-4 1 0 100 0 0 10 0.0000 4 135 495 1800 825 Request\001
-4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001
-4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001
+++ /dev/null
-% ---------------------------------------------------------------
-%
-% by Paolo.Ienne@di.epfl.ch
-%
-% ---------------------------------------------------------------
-%
-% no guarantee is given that the format corresponds perfectly to
-% IEEE 8.5" x 11" Proceedings, but most features should be ok.
-%
-% ---------------------------------------------------------------
-%
-% `ieee' from BibTeX standard bibliography style `abbrv'
-% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09.
-% Copyright (C) 1985, all rights reserved.
-% Copying of this file is authorized only if either
-% (1) you make absolutely no changes to your copy, including name, or
-% (2) if you do make changes, you name it something other than
-% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst.
-% This restriction helps ensure that all standard styles are identical.
-% The file btxbst.doc has the documentation for this style.
-
-ENTRY
- { address
- author
- booktitle
- chapter
- edition
- editor
- howpublished
- institution
- journal
- key
- month
- note
- number
- organization
- pages
- publisher
- school
- series
- title
- type
- volume
- year
- }
- {}
- { label }
-
-INTEGERS { output.state before.all mid.sentence after.sentence after.block }
-
-FUNCTION {init.state.consts}
-{ #0 'before.all :=
- #1 'mid.sentence :=
- #2 'after.sentence :=
- #3 'after.block :=
-}
-
-STRINGS { s t }
-
-FUNCTION {output.nonnull}
-{ 's :=
- output.state mid.sentence =
- { ", " * write$ }
- { output.state after.block =
- { add.period$ write$
- newline$
- "\newblock " write$
- }
- { output.state before.all =
- 'write$
- { add.period$ " " * write$ }
- if$
- }
- if$
- mid.sentence 'output.state :=
- }
- if$
- s
-}
-
-FUNCTION {output}
-{ duplicate$ empty$
- 'pop$
- 'output.nonnull
- if$
-}
-
-FUNCTION {output.check}
-{ 't :=
- duplicate$ empty$
- { pop$ "empty " t * " in " * cite$ * warning$ }
- 'output.nonnull
- if$
-}
-
-FUNCTION {output.bibitem}
-{ newline$
- "\bibitem{" write$
- cite$ write$
- "}" write$
- newline$
- ""
- before.all 'output.state :=
-}
-
-FUNCTION {fin.entry}
-{ add.period$
- write$
- newline$
-}
-
-FUNCTION {new.block}
-{ output.state before.all =
- 'skip$
- { after.block 'output.state := }
- if$
-}
-
-FUNCTION {new.sentence}
-{ output.state after.block =
- 'skip$
- { output.state before.all =
- 'skip$
- { after.sentence 'output.state := }
- if$
- }
- if$
-}
-
-FUNCTION {not}
-{ { #0 }
- { #1 }
- if$
-}
-
-FUNCTION {and}
-{ 'skip$
- { pop$ #0 }
- if$
-}
-
-FUNCTION {or}
-{ { pop$ #1 }
- 'skip$
- if$
-}
-
-FUNCTION {new.block.checka}
-{ empty$
- 'skip$
- 'new.block
- if$
-}
-
-FUNCTION {new.block.checkb}
-{ empty$
- swap$ empty$
- and
- 'skip$
- 'new.block
- if$
-}
-
-FUNCTION {new.sentence.checka}
-{ empty$
- 'skip$
- 'new.sentence
- if$
-}
-
-FUNCTION {new.sentence.checkb}
-{ empty$
- swap$ empty$
- and
- 'skip$
- 'new.sentence
- if$
-}
-
-FUNCTION {field.or.null}
-{ duplicate$ empty$
- { pop$ "" }
- 'skip$
- if$
-}
-
-FUNCTION {emphasize}
-{ duplicate$ empty$
- { pop$ "" }
- { "{\em " swap$ * "}" * }
- if$
-}
-
-INTEGERS { nameptr namesleft numnames }
-
-FUNCTION {format.names}
-{ 's :=
- #1 'nameptr :=
- s num.names$ 'numnames :=
- numnames 'namesleft :=
- { namesleft #0 > }
- { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't :=
- nameptr #1 >
- { namesleft #1 >
- { ", " * t * }
- { numnames #2 >
- { "," * }
- 'skip$
- if$
- t "others" =
- { " et~al." * }
- { " and " * t * }
- if$
- }
- if$
- }
- 't
- if$
- nameptr #1 + 'nameptr :=
- namesleft #1 - 'namesleft :=
- }
- while$
-}
-
-FUNCTION {format.authors}
-{ author empty$
- { "" }
- { author format.names }
- if$
-}
-
-FUNCTION {format.editors}
-{ editor empty$
- { "" }
- { editor format.names
- editor num.names$ #1 >
- { ", editors" * }
- { ", editor" * }
- if$
- }
- if$
-}
-
-FUNCTION {format.title}
-{ title empty$
- { "" }
- { title "t" change.case$ }
- if$
-}
-
-FUNCTION {n.dashify}
-{ 't :=
- ""
- { t empty$ not }
- { t #1 #1 substring$ "-" =
- { t #1 #2 substring$ "--" = not
- { "--" *
- t #2 global.max$ substring$ 't :=
- }
- { { t #1 #1 substring$ "-" = }
- { "-" *
- t #2 global.max$ substring$ 't :=
- }
- while$
- }
- if$
- }
- { t #1 #1 substring$ *
- t #2 global.max$ substring$ 't :=
- }
- if$
- }
- while$
-}
-
-FUNCTION {format.date}
-{ year empty$
- { month empty$
- { "" }
- { "there's a month but no year in " cite$ * warning$
- month
- }
- if$
- }
- { month empty$
- 'year
- { month " " * year * }
- if$
- }
- if$
-}
-
-FUNCTION {format.btitle}
-{ title emphasize
-}
-
-FUNCTION {tie.or.space.connect}
-{ duplicate$ text.length$ #3 <
- { "~" }
- { " " }
- if$
- swap$ * *
-}
-
-FUNCTION {either.or.check}
-{ empty$
- 'pop$
- { "can't use both " swap$ * " fields in " * cite$ * warning$ }
- if$
-}
-
-FUNCTION {format.bvolume}
-{ volume empty$
- { "" }
- { "volume" volume tie.or.space.connect
- series empty$
- 'skip$
- { " of " * series emphasize * }
- if$
- "volume and number" number either.or.check
- }
- if$
-}
-
-FUNCTION {format.number.series}
-{ volume empty$
- { number empty$
- { series field.or.null }
- { output.state mid.sentence =
- { "number" }
- { "Number" }
- if$
- number tie.or.space.connect
- series empty$
- { "there's a number but no series in " cite$ * warning$ }
- { " in " * series * }
- if$
- }
- if$
- }
- { "" }
- if$
-}
-
-FUNCTION {format.edition}
-{ edition empty$
- { "" }
- { output.state mid.sentence =
- { edition "l" change.case$ " edition" * }
- { edition "t" change.case$ " edition" * }
- if$
- }
- if$
-}
-
-INTEGERS { multiresult }
-
-FUNCTION {multi.page.check}
-{ 't :=
- #0 'multiresult :=
- { multiresult not
- t empty$ not
- and
- }
- { t #1 #1 substring$
- duplicate$ "-" =
- swap$ duplicate$ "," =
- swap$ "+" =
- or or
- { #1 'multiresult := }
- { t #2 global.max$ substring$ 't := }
- if$
- }
- while$
- multiresult
-}
-
-FUNCTION {format.pages}
-{ pages empty$
- { "" }
- { pages multi.page.check
- { "pages" pages n.dashify tie.or.space.connect }
- { "page" pages tie.or.space.connect }
- if$
- }
- if$
-}
-
-FUNCTION {format.vol.num.pages}
-{ volume field.or.null
- number empty$
- 'skip$
- { "(" number * ")" * *
- volume empty$
- { "there's a number but no volume in " cite$ * warning$ }
- 'skip$
- if$
- }
- if$
- pages empty$
- 'skip$
- { duplicate$ empty$
- { pop$ format.pages }
- { ":" * pages n.dashify * }
- if$
- }
- if$
-}
-
-FUNCTION {format.chapter.pages}
-{ chapter empty$
- 'format.pages
- { type empty$
- { "chapter" }
- { type "l" change.case$ }
- if$
- chapter tie.or.space.connect
- pages empty$
- 'skip$
- { ", " * format.pages * }
- if$
- }
- if$
-}
-
-FUNCTION {format.in.ed.booktitle}
-{ booktitle empty$
- { "" }
- { editor empty$
- { "In " booktitle emphasize * }
- { "In " format.editors * ", " * booktitle emphasize * }
- if$
- }
- if$
-}
-
-FUNCTION {empty.misc.check}
-{ author empty$ title empty$ howpublished empty$
- month empty$ year empty$ note empty$
- and and and and and
- key empty$ not and
- { "all relevant fields are empty in " cite$ * warning$ }
- 'skip$
- if$
-}
-
-FUNCTION {format.thesis.type}
-{ type empty$
- 'skip$
- { pop$
- type "t" change.case$
- }
- if$
-}
-
-FUNCTION {format.tr.number}
-{ type empty$
- { "Technical Report" }
- 'type
- if$
- number empty$
- { "t" change.case$ }
- { number tie.or.space.connect }
- if$
-}
-
-FUNCTION {format.article.crossref}
-{ key empty$
- { journal empty$
- { "need key or journal for " cite$ * " to crossref " * crossref *
- warning$
- ""
- }
- { "In {\em " journal * "\/}" * }
- if$
- }
- { "In " key * }
- if$
- " \cite{" * crossref * "}" *
-}
-
-FUNCTION {format.crossref.editor}
-{ editor #1 "{vv~}{ll}" format.name$
- editor num.names$ duplicate$
- #2 >
- { pop$ " et~al." * }
- { #2 <
- 'skip$
- { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
- { " et~al." * }
- { " and " * editor #2 "{vv~}{ll}" format.name$ * }
- if$
- }
- if$
- }
- if$
-}
-
-FUNCTION {format.book.crossref}
-{ volume empty$
- { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
- "In "
- }
- { "Volume" volume tie.or.space.connect
- " of " *
- }
- if$
- editor empty$
- editor field.or.null author field.or.null =
- or
- { key empty$
- { series empty$
- { "need editor, key, or series for " cite$ * " to crossref " *
- crossref * warning$
- "" *
- }
- { "{\em " * series * "\/}" * }
- if$
- }
- { key * }
- if$
- }
- { format.crossref.editor * }
- if$
- " \cite{" * crossref * "}" *
-}
-
-FUNCTION {format.incoll.inproc.crossref}
-{ editor empty$
- editor field.or.null author field.or.null =
- or
- { key empty$
- { booktitle empty$
- { "need editor, key, or booktitle for " cite$ * " to crossref " *
- crossref * warning$
- ""
- }
- { "In {\em " booktitle * "\/}" * }
- if$
- }
- { "In " key * }
- if$
- }
- { "In " format.crossref.editor * }
- if$
- " \cite{" * crossref * "}" *
-}
-
-FUNCTION {article}
-{ output.bibitem
- format.authors "author" output.check
- new.block
- format.title "title" output.check
- new.block
- crossref missing$
- { journal emphasize "journal" output.check
- format.vol.num.pages output
- format.date "year" output.check
- }
- { format.article.crossref output.nonnull
- format.pages output
- }
- if$
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {book}
-{ output.bibitem
- author empty$
- { format.editors "author and editor" output.check }
- { format.authors output.nonnull
- crossref missing$
- { "author and editor" editor either.or.check }
- 'skip$
- if$
- }
- if$
- new.block
- format.btitle "title" output.check
- crossref missing$
- { format.bvolume output
- new.block
- format.number.series output
- new.sentence
- publisher "publisher" output.check
- address output
- }
- { new.block
- format.book.crossref output.nonnull
- }
- if$
- format.edition output
- format.date "year" output.check
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {booklet}
-{ output.bibitem
- format.authors output
- new.block
- format.title "title" output.check
- howpublished address new.block.checkb
- howpublished output
- address output
- format.date output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {inbook}
-{ output.bibitem
- author empty$
- { format.editors "author and editor" output.check }
- { format.authors output.nonnull
- crossref missing$
- { "author and editor" editor either.or.check }
- 'skip$
- if$
- }
- if$
- new.block
- format.btitle "title" output.check
- crossref missing$
- { format.bvolume output
- format.chapter.pages "chapter and pages" output.check
- new.block
- format.number.series output
- new.sentence
- publisher "publisher" output.check
- address output
- }
- { format.chapter.pages "chapter and pages" output.check
- new.block
- format.book.crossref output.nonnull
- }
- if$
- format.edition output
- format.date "year" output.check
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {incollection}
-{ output.bibitem
- format.authors "author" output.check
- new.block
- format.title "title" output.check
- new.block
- crossref missing$
- { format.in.ed.booktitle "booktitle" output.check
- format.bvolume output
- format.number.series output
- format.chapter.pages output
- new.sentence
- publisher "publisher" output.check
- address output
- format.edition output
- format.date "year" output.check
- }
- { format.incoll.inproc.crossref output.nonnull
- format.chapter.pages output
- }
- if$
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {inproceedings}
-{ output.bibitem
- format.authors "author" output.check
- new.block
- format.title "title" output.check
- new.block
- crossref missing$
- { format.in.ed.booktitle "booktitle" output.check
- format.bvolume output
- format.number.series output
- format.pages output
- address empty$
- { organization publisher new.sentence.checkb
- organization output
- publisher output
- format.date "year" output.check
- }
- { address output.nonnull
- format.date "year" output.check
- new.sentence
- organization output
- publisher output
- }
- if$
- }
- { format.incoll.inproc.crossref output.nonnull
- format.pages output
- }
- if$
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {conference} { inproceedings }
-
-FUNCTION {manual}
-{ output.bibitem
- author empty$
- { organization empty$
- 'skip$
- { organization output.nonnull
- address output
- }
- if$
- }
- { format.authors output.nonnull }
- if$
- new.block
- format.btitle "title" output.check
- author empty$
- { organization empty$
- { address new.block.checka
- address output
- }
- 'skip$
- if$
- }
- { organization address new.block.checkb
- organization output
- address output
- }
- if$
- format.edition output
- format.date output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {mastersthesis}
-{ output.bibitem
- format.authors "author" output.check
- new.block
- format.title "title" output.check
- new.block
- "Master's thesis" format.thesis.type output.nonnull
- school "school" output.check
- address output
- format.date "year" output.check
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {misc}
-{ output.bibitem
- format.authors output
- title howpublished new.block.checkb
- format.title output
- howpublished new.block.checka
- howpublished output
- format.date output
- new.block
- note output
- fin.entry
- empty.misc.check
-}
-
-FUNCTION {phdthesis}
-{ output.bibitem
- format.authors "author" output.check
- new.block
- format.btitle "title" output.check
- new.block
- "PhD thesis" format.thesis.type output.nonnull
- school "school" output.check
- address output
- format.date "year" output.check
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {proceedings}
-{ output.bibitem
- editor empty$
- { organization output }
- { format.editors output.nonnull }
- if$
- new.block
- format.btitle "title" output.check
- format.bvolume output
- format.number.series output
- address empty$
- { editor empty$
- { publisher new.sentence.checka }
- { organization publisher new.sentence.checkb
- organization output
- }
- if$
- publisher output
- format.date "year" output.check
- }
- { address output.nonnull
- format.date "year" output.check
- new.sentence
- editor empty$
- 'skip$
- { organization output }
- if$
- publisher output
- }
- if$
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {techreport}
-{ output.bibitem
- format.authors "author" output.check
- new.block
- format.title "title" output.check
- new.block
- format.tr.number output.nonnull
- institution "institution" output.check
- address output
- format.date "year" output.check
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {unpublished}
-{ output.bibitem
- format.authors "author" output.check
- new.block
- format.title "title" output.check
- new.block
- note "note" output.check
- format.date output
- fin.entry
-}
-
-FUNCTION {default.type} { misc }
-
-MACRO {jan} {"Jan."}
-
-MACRO {feb} {"Feb."}
-
-MACRO {mar} {"Mar."}
-
-MACRO {apr} {"Apr."}
-
-MACRO {may} {"May"}
-
-MACRO {jun} {"June"}
-
-MACRO {jul} {"July"}
-
-MACRO {aug} {"Aug."}
-
-MACRO {sep} {"Sept."}
-
-MACRO {oct} {"Oct."}
-
-MACRO {nov} {"Nov."}
-
-MACRO {dec} {"Dec."}
-
-MACRO {acmcs} {"ACM Comput. Surv."}
-
-MACRO {acta} {"Acta Inf."}
-
-MACRO {cacm} {"Commun. ACM"}
-
-MACRO {ibmjrd} {"IBM J. Res. Dev."}
-
-MACRO {ibmsj} {"IBM Syst.~J."}
-
-MACRO {ieeese} {"IEEE Trans. Softw. Eng."}
-
-MACRO {ieeetc} {"IEEE Trans. Comput."}
-
-MACRO {ieeetcad}
- {"IEEE Trans. Comput.-Aided Design Integrated Circuits"}
-
-MACRO {ipl} {"Inf. Process. Lett."}
-
-MACRO {jacm} {"J.~ACM"}
-
-MACRO {jcss} {"J.~Comput. Syst. Sci."}
-
-MACRO {scp} {"Sci. Comput. Programming"}
-
-MACRO {sicomp} {"SIAM J. Comput."}
-
-MACRO {tocs} {"ACM Trans. Comput. Syst."}
-
-MACRO {tods} {"ACM Trans. Database Syst."}
-
-MACRO {tog} {"ACM Trans. Gr."}
-
-MACRO {toms} {"ACM Trans. Math. Softw."}
-
-MACRO {toois} {"ACM Trans. Office Inf. Syst."}
-
-MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."}
-
-MACRO {tcs} {"Theoretical Comput. Sci."}
-
-READ
-
-FUNCTION {sortify}
-{ purify$
- "l" change.case$
-}
-
-INTEGERS { len }
-
-FUNCTION {chop.word}
-{ 's :=
- 'len :=
- s #1 len substring$ =
- { s len #1 + global.max$ substring$ }
- 's
- if$
-}
-
-FUNCTION {sort.format.names}
-{ 's :=
- #1 'nameptr :=
- ""
- s num.names$ 'numnames :=
- numnames 'namesleft :=
- { namesleft #0 > }
- { nameptr #1 >
- { " " * }
- 'skip$
- if$
- s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't :=
- nameptr numnames = t "others" = and
- { "et al" * }
- { t sortify * }
- if$
- nameptr #1 + 'nameptr :=
- namesleft #1 - 'namesleft :=
- }
- while$
-}
-
-FUNCTION {sort.format.title}
-{ 't :=
- "A " #2
- "An " #3
- "The " #4 t chop.word
- chop.word
- chop.word
- sortify
- #1 global.max$ substring$
-}
-
-FUNCTION {author.sort}
-{ author empty$
- { key empty$
- { "to sort, need author or key in " cite$ * warning$
- ""
- }
- { key sortify }
- if$
- }
- { author sort.format.names }
- if$
-}
-
-FUNCTION {author.editor.sort}
-{ author empty$
- { editor empty$
- { key empty$
- { "to sort, need author, editor, or key in " cite$ * warning$
- ""
- }
- { key sortify }
- if$
- }
- { editor sort.format.names }
- if$
- }
- { author sort.format.names }
- if$
-}
-
-FUNCTION {author.organization.sort}
-{ author empty$
- { organization empty$
- { key empty$
- { "to sort, need author, organization, or key in " cite$ * warning$
- ""
- }
- { key sortify }
- if$
- }
- { "The " #4 organization chop.word sortify }
- if$
- }
- { author sort.format.names }
- if$
-}
-
-FUNCTION {editor.organization.sort}
-{ editor empty$
- { organization empty$
- { key empty$
- { "to sort, need editor, organization, or key in " cite$ * warning$
- ""
- }
- { key sortify }
- if$
- }
- { "The " #4 organization chop.word sortify }
- if$
- }
- { editor sort.format.names }
- if$
-}
-
-FUNCTION {presort}
-{ type$ "book" =
- type$ "inbook" =
- or
- 'author.editor.sort
- { type$ "proceedings" =
- 'editor.organization.sort
- { type$ "manual" =
- 'author.organization.sort
- 'author.sort
- if$
- }
- if$
- }
- if$
- " "
- *
- year field.or.null sortify
- *
- " "
- *
- title field.or.null
- sort.format.title
- *
- #1 entry.max$ substring$
- 'sort.key$ :=
-}
-
-ITERATE {presort}
-
-SORT
-
-STRINGS { longest.label }
-
-INTEGERS { number.label longest.label.width }
-
-FUNCTION {initialize.longest.label}
-{ "" 'longest.label :=
- #1 'number.label :=
- #0 'longest.label.width :=
-}
-
-FUNCTION {longest.label.pass}
-{ number.label int.to.str$ 'label :=
- number.label #1 + 'number.label :=
- label width$ longest.label.width >
- { label 'longest.label :=
- label width$ 'longest.label.width :=
- }
- 'skip$
- if$
-}
-
-EXECUTE {initialize.longest.label}
-
-ITERATE {longest.label.pass}
-
-FUNCTION {begin.bib}
-{ preamble$ empty$
- 'skip$
- { preamble$ write$ newline$ }
- if$
- "\begin{thebibliography}{" longest.label *
- "}\setlength{\itemsep}{-1ex}\small" * write$ newline$
-}
-
-EXECUTE {begin.bib}
-
-EXECUTE {init.state.consts}
-
-ITERATE {call.type$}
-
-FUNCTION {end.bib}
-{ newline$
- "\end{thebibliography}" write$ newline$
-}
-
-EXECUTE {end.bib}
-
-% end of file ieee.bst
-% ---------------------------------------------------------------
+++ /dev/null
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter
-100.00
-Single
--2
-1200 2
-6 150 1650 900 2025
-4 1 0 100 0 0 10 0.0000 0 135 735 525 1800 Unexpected\001
-4 1 0 100 0 0 10 0.0000 0 135 585 525 1995 Messages\001
--6
-6 150 150 900 525
-4 1 0 100 0 0 10 0.0000 0 135 615 525 300 Preposted\001
-4 1 0 100 0 0 10 0.0000 0 105 525 525 495 Receives\001
--6
-6 2550 4125 3150 4725
-4 1 0 100 0 0 10 0.0000 0 135 600 2850 4275 Length=0\001
-4 1 0 100 0 0 10 0.0000 0 105 540 2850 4470 Truncate\001
-4 1 0 100 0 0 10 0.0000 0 105 480 2850 4665 No Ack\001
--6
-6 1050 1575 1950 1875
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 1050 1575 1950 1575 1950 1875 1050 1875 1050 1575
-4 1 0 100 0 0 10 0.0000 0 105 780 1500 1725 Match Short\001
--6
-6 5400 1575 6300 2175
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 5400 1575 6300 1575 6300 2175 5400 2175 5400 1575
-4 1 0 100 0 0 10 0.0000 0 105 405 5850 1875 Buffer\001
--6
-6 5400 2400 6300 3000
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 5400 2400 6300 2400 6300 3000 5400 3000 5400 2400
-4 1 0 100 0 0 10 0.0000 0 105 405 5850 2700 Buffer\001
--6
-6 1050 2400 1950 2700
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 1050 2400 1950 2400 1950 2700 1050 2700 1050 2400
-4 1 0 100 0 0 10 0.0000 0 105 780 1500 2550 Match Short\001
--6
-6 1050 825 1950 1125
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 1050 825 1950 825 1950 1125 1050 1125 1050 825
-4 1 0 100 0 0 10 0.0000 0 105 765 1500 975 Match None\001
--6
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1500 1125 1500 1575
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 3225 2025 4050 3375
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
- 150 675 6600 675
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
- 150 1350 6600 1350
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 2400 4125 3300 4125 3300 4725 2400 4725 2400 4125
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 3225 4500 4050 3675
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 3225 1725 5400 1725
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 3225 2550 5400 2550
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 3225 2850 4050 3450
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1500 1800 1500 2400
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 2400 825 3300 825 3300 1275 2400 1275 2400 825
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1500 2625 1500 4125
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 1050 4125 1950 4125 1950 4425 1050 4425 1050 4125
-2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1500 300 1500 825
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1875 975 2400 975
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1875 1725 2400 1725
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1875 2550 2400 2550
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1875 4275 2400 4275
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 2400 1575 3300 1575 3300 2175 2400 2175 2400 1575
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 2400 2400 3300 2400 3300 3000 2400 3000 2400 2400
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 4050 3300 5250 3300 5250 3750 4050 3750 4050 3300
-4 1 0 100 0 0 10 0.0000 0 105 885 1500 150 Match Entries\001
-4 1 0 100 0 0 10 0.0000 0 135 1290 2850 150 Memory Descriptors\001
-4 1 0 100 0 0 10 0.0000 0 135 1065 5850 150 Memory Regions\001
-4 1 0 100 0 0 10 0.0000 0 135 825 4500 150 Event Queues\001
-4 1 0 100 0 0 10 0.0000 0 105 585 525 1050 RcvMark\001
-4 1 0 100 0 0 10 0.0000 0 105 330 2850 1102 None\001
-4 1 0 100 0 0 10 0.0000 0 135 705 1500 4275 Match Any\001
-4 1 0 50 0 0 10 0.0000 0 150 810 2850 1725 max_offset=\001
-4 1 0 50 0 0 10 0.0000 0 150 840 2850 1875 n - short_len\001
-4 1 0 50 0 0 10 0.0000 0 150 810 2850 2550 max_offset=\001
-4 1 0 50 0 0 10 0.0000 0 150 840 2850 2700 n - short_len\001
-4 1 0 50 0 0 10 0.0000 0 105 405 2850 2100 unlink\001
-4 1 0 50 0 0 10 0.0000 0 105 405 2850 2925 unlink\001
-4 1 0 100 0 0 10 0.0000 0 135 930 4650 3675 Message Queue\001
-4 1 0 100 0 0 10 0.0000 0 135 735 4650 3525 Unexpected\001
+++ /dev/null
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter
-100.00
-Single
--2
-1200 2
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 1350 900 1650 900 1650 1200 1350 1200 1350 900
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 1800 1350 2100 1350 2100 1650 1800 1650 1800 1350
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 2250 1800 2550 1800 2550 2100 2250 2100 2250 1800
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
- 4200 375 4200 2100
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 525 600 1125 600 1125 2100 525 2100 525 600
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 4425 1275 4875 1275 4875 1950 4425 1950 4425 1275
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 2550 1200 3150 1200 3150 1500 2550 1500 2550 1200
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 3000 1425 4425 1425
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 3600 825 3750 825 3750 1125 3600 1125 3600 825
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 2025 1425 2550 1425
-2 2 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
- 4425 750 4875 750 4875 1125 4425 1125 4425 750
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 3675 975 4425 975
-3 0 0 1 0 7 100 0 -1 0.000 0 1 0 2
- 0 0 1.00 60.00 120.00
- 825 1050 1350 1050
- 0.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
- 0 0 1.00 60.00 120.00
- 1500 1125 1500 1350 1500 1500 1650 1500 1800 1500
- 0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
- 0 0 1.00 60.00 120.00
- 1950 1575 1950 1800 1950 1950 2100 1950 2250 1950
- 0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2
- 525 975 1125 975
- 0.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2
- 525 1125 1125 1125
- 0.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 1 0 7
- 0 0 1.00 60.00 120.00
- 3000 1275 3150 1275 3300 1275 3300 1125 3300 975 3450 975
- 3600 975
- 0.000 1.000 1.000 1.000 1.000 1.000 0.000
-4 0 0 100 0 0 10 0.0000 0 105 690 1275 750 Match List\001
-4 1 0 100 0 0 10 0.0000 0 105 780 825 525 Portal Table\001
-4 2 0 100 0 0 10 0.0000 0 135 825 4050 2025 Library Space\001
-4 0 0 100 0 0 10 0.0000 0 135 1110 4350 2175 Application Space\001
-4 1 0 100 0 0 10 0.0000 0 135 660 2850 1050 Descriptor\001
-4 1 0 100 0 0 10 0.0000 0 135 540 2850 825 Memory\001
-4 1 0 100 0 0 10 0.0000 0 135 765 3750 675 Event Queue\001
-4 1 0 100 0 0 10 0.0000 0 135 495 4650 675 Regions\001
-4 1 0 100 0 0 10 0.0000 0 135 540 4650 525 Memory\001
+++ /dev/null
-@Article{ Cplant,
- title = { {M}assively {P}arallel {C}omputing with
- {C}ommodity {C}omponents },
- author = { Ron Brightwell and David S. Greenberg and Arthur
- B. Maccabe and Rolf Riesen },
- journal = { Parallel Computing },
- volume = { 26 },
- month = { February },
- pages = { 243-266 },
- year = { 2000 }
-}
-
-@Manual{ Portals,
- organization = { Sandia National Laboratories },
- title = { {P}uma {P}ortals },
- note = { http://www.cs.sandia.gov/puma/portals },
- year = { 1997 }
-}
-
-@Techreport{ VIA,
- title = { {V}irtual {I}nterface {A}rchitecture
- {S}pecification {V}ersion 1.0 },
- author = { {Compaq, Microsoft, and Intel} },
- institution = { Compaq, Microsoft, and Intel },
- month = { December },
- year = { 1997 }
-}
-
-@Techreport{ ST,
- title = { {I}nformation {T}echnology - {S}cheduled
- {T}ransfer {P}rotocol - {W}orking {D}raft 2.0 },
- author = { {Task Group of Technical Committee T11} },
- institution = { Accredited Standards Committee NCITS },
- month = { July },
- year = { 1998 }
-}
-
-@Manual{ TFLOPS,
- organization = { Sandia National Laboratories },
- title = { ASCI Red },
- note = { http://www.sandia.gov/ASCI/TFLOP },
- year = { 1996 }
-}
-
-@Techreport{ GM,
- title = { The {GM} {M}essage {P}assing {S}ystem },
- author = { {Myricom, Inc.} },
- institution = { {Myricom, Inc.} },
- year = { 1997 },
-}
-
-@Article{ MPIstandard,
- title = { {MPI}: {A} {M}essage-{P}assing {I}nterface standard },
- author = { {Message Passing Interface Forum} },
- journal = { The International Journal of Supercomputer Applications
- and High Performance Computing },
- volume = { 8 },
- year = { 1994 }
-}
-
-@Inproceedings{ PumaOS,
- author = "Lance Shuler and Chu Jong and Rolf Riesen and
- David van Dresser and Arthur B. Maccabe and
- Lee Ann Fisk and T. Mack Stallcup",
- booktitle = "Proceeding of the 1995 Intel Supercomputer
- User's Group Conference",
- title = "The {P}uma Operating System for Massively Parallel Computers",
- organization = "Intel Supercomputer User's Group",
- year = 1995
-}
-
-@InProceedings{ SUNMOS,
-author = "Arthur B. Maccabe and Kevin S. McCurley and Rolf Riesen and
- Stephen R. Wheat",
-title = "{SUNMOS} for the {Intel} {Paragon}: A Brief User's Guide",
-booktitle = "Proceedings of the {Intel} Supercomputer Users' Group. 1994
- Annual North America Users' Conference.",
-year = 1994,
-pages = "245--251",
-month = "June",
-location = "ftp.cs.sandia.gov /pub/sunmos/papers/ISUG94-1.ps"
-}
-
-@InProceedings { PumaMPI,
- title = { Design and Implementation of {MPI} on {P}uma Portals },
- author = { Ron Brightwell and Lance Shuler },
- booktitle = { Proceedings of the Second MPI Developer's Conference },
- pages = { 18-25 },
- month = { July },
- year = { 1996 }
-}
-
-@Inproceedings{ FM2,
- author = { Mario Lauria and Scott Pakin and Andrew Chien },
- title = { {E}fficient {L}ayering for {H}igh {S}peed
- {C}ommunication: {F}ast {M}essages 2.x },
- Booktitle = { Proceedings of the IEEE International Symposium
- on High Performance Distributed Computing },
- year = { 1998 }
-}
-
-@Manual { CraySHMEM,
- title = "SHMEM Technical Note for C, SG-2516 2.3",
- organization = "Cray Research, Inc.",
- month = "October",
- year = 1994
-}
-
-@Manual { MPI2,
- title = "{MPI}-2: {E}xtensions to the {M}essage-{P}assing {I}nterface",
- organization = "Message Passing Interface Forum",
- note = "http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html",
- month = "July",
- year = 1997
-}
-
-@InProceedings { PMMPI,
- title = { {The Design and Implementation of Zero Copy MPI Using
- Commodity Hardware with a High Performance Network} },
- author = { Francis O'Carroll and Hiroshi Tezuka and Atsushi Hori
- and Yutaka Ishikawa },
- booktitle = { Proceedings of the ICS },
- year = { 1998 }
-}
+++ /dev/null
-#LyX 1.2 created this file. For more info see http://www.lyx.org/
-\lyxformat 220
-\textclass report
-\begin_preamble
-\usepackage{fullpage}
-\renewenvironment{comment}%
-{\begin{quote}\textbf{Discussion}: \slshape}%
-{\end{quote}}
-\pagestyle{myheadings}
-\end_preamble
-\language american
-\inputencoding auto
-\fontscheme pslatex
-\graphics default
-\paperfontsize 10
-\spacing single
-\papersize letterpaper
-\paperpackage a4
-\use_geometry 0
-\use_amsmath 0
-\use_natbib 0
-\use_numerical_citations 0
-\paperorientation portrait
-\secnumdepth 2
-\tocdepth 2
-\paragraph_separation indent
-\defskip medskip
-\quotes_language english
-\quotes_times 2
-\papercolumns 1
-\papersides 2
-\paperpagestyle headings
-
-\layout Title
-
-The Portals 3.2 Message Passing Interface
-\newline
- Revision 1.1
-\layout Author
-
-Ron Brightwell
-\begin_inset Foot
-collapsed true
-
-\layout Standard
-
-R.
- Brightwell and R.
- Riesen are with the Scalable Computing Systems Department, Sandia National
- Laboratories, P.O.
- Box 5800, Albuquerque, NM\SpecialChar ~
-\SpecialChar ~
-87111-1110, bright@cs.sandia.gov, rolf@cs.sandia.gov.
-\end_inset
-
-, Arthur B.
- Maccabe
-\begin_inset Foot
-collapsed true
-
-\layout Standard
-
-A.
- B.
- Maccabe is with the Computer Science Department, University of New Mexico,
- Albuquerque, NM\SpecialChar ~
-\SpecialChar ~
-87131-1386, maccabe@cs.unm.edu.
-\end_inset
-
-, Rolf Riesen and Trammell Hudson
-\layout Abstract
-
-This report presents a specification for the Portals 3.2 message passing
- interface.
- Portals 3.2 is intended to allow scalable, high-performance network communicatio
-n between nodes of a parallel computing system.
- Specifically, it is designed to support a parallel computing platform composed
- of clusters of commodity workstations connected by a commodity system area
- network fabric.
- In addition, Portals 3.2 is well suited to massively parallel processing
- and embedded systems.
- Portals 3.2 represents an adaption of the data movement layer developed
- for massively parallel processing platforms, such as the 4500-node Intel
- TeraFLOPS machine.
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-clearpage
-\backslash
-pagenumbering{roman}
-\backslash
-setcounter{page}{3}
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset LatexCommand \tableofcontents{}
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-cleardoublepage
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset FloatList figure
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-cleardoublepage
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset FloatList table
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-cleardoublepage
-\end_inset
-
-
-\layout Chapter*
-
-Summary of Changes for Revision 1.1
-\layout Enumerate
-
-Updated version number to 3.2 throughout the document
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sub:PtlGetId}
-
-\end_inset
-
-: added
-\family typewriter
-PTL_SEGV
-\family default
- to error list for
-\shape italic
-PtlGetId
-\shape default
-.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset
-
-: added
-\family typewriter
-PTL_ML_TOOLONG
-\family default
- to error list for
-\shape italic
-PtlMEAttach
-\shape default
-.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:meunlink}
-
-\end_inset
-
-: removed text referring to a list of associated memory descriptors.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:mdfree}
-
-\end_inset
-
-: added text to describe unlinking a free-floating memory descriptor.
-\layout Enumerate
-
-Table
-\begin_inset LatexCommand \ref{tab:types}
-
-\end_inset
-
-: added entry for
-\family typewriter
-ptl_seq_t
-\family default
-.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset
-
-:
-\begin_deeper
-\layout Enumerate
-
-added definition of
-\family typewriter
-max_offset
-\family default
-.
-\layout Enumerate
-
-added text to clarify
-\family typewriter
-PTL_MD_MANAGE_REMOTE
-\family default
-.
-\end_deeper
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:mdattach}
-
-\end_inset
-
-: modified text for
-\family typewriter
-unlink_op
-\family default
-.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:niinit}
-
-\end_inset
-
-: added text to clarify multiple calls to
-\shape italic
-PtlNIInit
-\shape default
-.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:mdattach}
-
-\end_inset
-
-: added text to clarify
-\family typewriter
-unlink_nofit
-\family default
-.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:receiving}
-
-\end_inset
-
-: removed text indicating that an MD will reject a message if the associated
- EQ is full.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:mdfree}
-
-\end_inset
-
-: added
-\family typewriter
-PTL_MD_INUSE
-\family default
- error code and text to indicate that only MDs with no pending operations
- can be unlinked.
-\layout Enumerate
-
-Table
-\begin_inset LatexCommand \ref{tab:retcodes}
-
-\end_inset
-
-: added
-\family typewriter
-PTL_MD_INUSE
-\family default
- return code.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:event-type}
-
-\end_inset
-
-: added user id field, MD handle field, and NI specific failure field to
- the
-\family typewriter
-ptl_event_t
-\family default
- structure.
-\layout Enumerate
-
-Table
-\begin_inset LatexCommand \ref{tab:types}
-
-\end_inset
-
-: added
-\family typewriter
-ptl_ni_fail_t
-\family default
-.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:event-type}
-
-\end_inset
-
-: added
-\family typewriter
-PTL_EVENT_UNLINK
-\family default
- event type.
-\layout Enumerate
-
-Table
-\begin_inset LatexCommand \ref{tab:func}
-
-\end_inset
-
-: removed
-\shape slanted
-PtlTransId
-\shape default
-.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset
-
-, Section
-\begin_inset LatexCommand \ref{sec:meinsert}
-
-\end_inset
-
-, Section
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-: listed allowable constants with relevant fields.
-\layout Enumerate
-
-Table
-\begin_inset LatexCommand \ref{tab:func}
-
-\end_inset
-
-: added
-\shape italic
-PtlMEAttachAny
-\shape default
- function.
-\layout Enumerate
-
-Table
-\begin_inset LatexCommand \ref{tab:retcodes}
-
-\end_inset
-
-: added
-\family typewriter
-PTL_PT_FULL
-\family default
- return code for
-\shape italic
-PtlMEAttachAny
-\shape default
-.
-\layout Enumerate
-
-Table
-\begin_inset LatexCommand \ref{tab:oconsts}
-
-\end_inset
-
-: updated to reflect new event types.
-\layout Enumerate
-
-Section
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset
-
-: added
-\family typewriter
-ptl_nid_t
-\family default
-,
-\family typewriter
-ptl_pid_t
-\family default
-, and
-\family typewriter
-ptl_uid_t
-\family default
-.
-\layout Chapter*
-
-Summary of Changes for Version 3.1
-\layout Section*
-
-Thread Issues
-\layout Standard
-
-The most significant change to the interface from version 3.0 to 3.1 involves
- the clarification of how the interface interacts with multi-threaded applicatio
-ns.
- We adopted a generic thread model in which processes define an address
- space and threads share the address space.
- Consideration of the API in the light of threads lead to several clarifications
- throughout the document:
-\layout Enumerate
-
-Glossary:
-\begin_deeper
-\layout Enumerate
-
-added a definition for
-\emph on
-thread
-\emph default
-,
-\layout Enumerate
-
-reworded the definition for
-\emph on
-process
-\emph default
-.
-
-\end_deeper
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:apiover}
-
-\end_inset
-
-: added section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:threads}
-
-\end_inset
-
- to describe the multi-threading model used by the Portals API.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ptlinit}
-
-\end_inset
-
-:
-\emph on
-PtlInit
-\emph default
- must be called at least once and may be called any number of times.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ptlfini}
-
-\end_inset
-
-:
-\emph on
-PtlFini
-\emph default
- should be called once as the process is terminating and not as each thread
- terminates.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:pid}
-
-\end_inset
-
-: Portals does not define thread ids.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset
-
-: network interfaces are associated with processes, not threads.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:niinit}
-
-\end_inset
-
-:
-\emph on
-PtlNIInit
-\emph default
- must be called at least once and may be called any number of times.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:eqget}
-
-\end_inset
-
-:
-\emph on
-PtlEQGet
-\emph default
- returns
-\family typewriter
-PTL_EQ_EMPTY
-\family default
- if a thread is blocked on
-\emph on
-PtlEQWait
-\emph default
-.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:eqwait}
-
-\end_inset
-
-: waiting threads are awakened in FIFO order.
-
-\layout Standard
-
-Two functions,
-\emph on
-PtlNIBarrier
-\emph default
- and
-\emph on
-PtlEQCount
-\emph default
- were removed from the API.
-
-\emph on
-PtlNIBarrier
-\emph default
- was defined to block the calling process until all of the processes in
- the application group had invoked
-\emph on
-PtlNIBarrier
-\emph default
-.
- We now consider this functionality, along with the concept of groups (see
- the discussion under
-\begin_inset Quotes eld
-\end_inset
-
-other changes
-\begin_inset Quotes erd
-\end_inset
-
-), to be part of the runtime system, not part of the Portals API.
-
-\emph on
-PtlEQCount
-\emph default
- was defined to return the number of events in an event queue.
- Because external operations may lead to new events being added and other
- threads may remove events, the value returned by
-\emph on
-PtlEQCount
-\emph default
- would have to be a hint about the number of events in the event queue.
-\layout Section*
-
-Handling small, unexpected messages
-\layout Standard
-
-Another set of changes relates to handling small unexpected messages in
- MPI.
- In designing version 3.0, we assumed that each unexpected message would
- be placed in a unique memory descriptor.
- To avoid the need to process a long list of memory descriptors, we moved
- the memory descriptors out of the match list and hung them off of a single
- match list entry.
- In this way, large unexpected messages would only encounter a single
-\begin_inset Quotes eld
-\end_inset
-
-short message
-\begin_inset Quotes erd
-\end_inset
-
- match list entry before encountering the
-\begin_inset Quotes eld
-\end_inset
-
-long message
-\begin_inset Quotes erd
-\end_inset
-
- match list entry.
- Experience with this strategy identified resource management problems with
- this approach.
- In particular, a long sequence of very short (or zero length) messages
- could quickly exhaust the memory descriptors constructed for handling unexpecte
-d messages.
- Our new strategy involves the use of several very large memory descriptors
- for small unexpected messages.
- Consecutive unexpected messages will be written into the first of these
- memory descriptors until the memory descriptor fills up.
- When the first of the
-\begin_inset Quotes eld
-\end_inset
-
-small memory
-\begin_inset Quotes erd
-\end_inset
-
- descriptors fills up, it will be unlinked and subsequent short messages
- will be written into the next
-\begin_inset Quotes eld
-\end_inset
-
-short message
-\begin_inset Quotes erd
-\end_inset
-
- memory descriptor.
- In this case, a
-\begin_inset Quotes eld
-\end_inset
-
-short message
-\begin_inset Quotes erd
-\end_inset
-
- memory descriptor will be declared full when it does not have sufficient
- space for the largest small unexpected message.
-\layout Standard
-
-This lead to two significant changes.
- First, each match list entry now has a single memory descriptor rather
- than a list of memory descriptors.
- Second, in addition to exceeding the operation threshold, a memory descriptor
- can be unlinked when the local offset exceeds a specified value.
- These changes have lead to several changes in this document:
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{subsec:paddress}
-
-\end_inset
-
-:
-\begin_deeper
-\layout Enumerate
-
-removed references to the memory descriptor list,
-\layout Enumerate
-
-changed the portals address translation description to indicate that unlinking
- a memory descriptor implies unlinking the associated match list entry--match
- list entries can no longer be unlinked independently from the memory descriptor.
-
-\end_deeper
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset
-
-:
-\begin_deeper
-\layout Enumerate
-
-removed unlink from argument list,
-\layout Enumerate
-
-removed description of
-\family typewriter
-ptl_unlink
-\family default
- type,
-\layout Enumerate
-
-changed wording of the error condition when the Portal table index already
- has an associated match list.
-
-\end_deeper
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:meinsert}
-
-\end_inset
-
-: removed unlink from argument list.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset
-
-: added
-\family typewriter
-max_offset
-\family default
-.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdattach}
-
-\end_inset
-
-:
-\begin_deeper
-\layout Enumerate
-
-added description of
-\family typewriter
-ptl_unlink
-\family default
- type,
-\layout Enumerate
-
-removed reference to memory descriptor lists,
-\layout Enumerate
-
-changed wording of the error condition when match list entry already has
- an associated memory descriptor,
-\layout Enumerate
-
-changed the description of the
-\family typewriter
-unlink
-\family default
- argument.
-
-\end_deeper
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset
-
-: removed
-\family typewriter
-PtlMDInsert
-\family default
- operation.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdbind}
-
-\end_inset
-
-: removed references to memory descriptor list.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdfree}
-
-\end_inset
-
-: removed reference to memory descriptor list.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:summary}
-
-\end_inset
-
-: removed references to PtlMDInsert.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:semantics}
-
-\end_inset
-
-: removed reference to memory descriptor list.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:exmpi}
-
-\end_inset
-
-: revised the MPI example to reflect the changes to the interface.
-
-\layout Standard
-
-Several changes have been made to improve the general documentation of the
- interface.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset
-
-: documented the special value
-\family typewriter
-PTL_EQ_NONE
-\family default
-.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset
-
-: documented the special value
-\family typewriter
-PTL_ID_ANY
-\family default
-.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdbind}
-
-\end_inset
-
-: documented the return value
-\family typewriter
-PTL_INV_EQ
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdupdate}
-
-\end_inset
-
-: clarified the description of the
-\emph on
-PtlMDUpdate
-\emph default
- function.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:implvals}
-
-\end_inset
-
-: introduced a new section to document the implementation defined values.
-
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:summary}
-
-\end_inset
-
-: modified Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:oconsts}
-
-\end_inset
-
- to indicate where each constant is introduced and where it is used.
-
-\layout Section*
-
-Other changes
-\layout Subsection*
-
-Implementation defined limits (Section
-\begin_inset LatexCommand \ref{sec:niinit}
-
-\end_inset
-
-)
-\layout Standard
-
-The earlier version provided implementation defined limits for the maximum
- number of match entries, the maximum number of memory descriptors, etc.
- Rather than spanning the entire implementation, these limits are now associated
- with individual network interfaces.
-\layout Subsection*
-
-Added User Ids (Section
-\begin_inset LatexCommand \ref{sec:uid}
-
-\end_inset
-
-)
-\layout Standard
-
-Group Ids had been used to simplify access control entries.
- In particular, a process could allow access for all of the processes in
- a group.
- User Ids have been introduced to regain this functionality.
- We use user ids to fill this role.
-\layout Subsection*
-
-Removed Group Ids and Rank Ids (Section
-\begin_inset LatexCommand \ref{sec:pid}
-
-\end_inset
-
-)
-\layout Standard
-
-The earlier version of Portals had two forms for addressing processes: <node
- id, process id> and <group id, rank id>.
- A process group was defined as the collection processes created during
- application launch.
- Each process in the group was given a unique rank id in the range 0 to
-
-\begin_inset Formula $n-1$
-\end_inset
-
- where
-\begin_inset Formula $n$
-\end_inset
-
- was the number of processes in the group.
- We removed groups because they are better handled in the runtime system.
-\layout Subsection*
-
-Match lists (Section
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset
-
-)
-\layout Standard
-
-It is no longer illegal to have an existing match entry when calling PtlMEAttach.
- A position argument was added to the list of arguments supplied to
-\emph on
-PtlMEAttach
-\emph default
- to specify whether the new match entry is prepended or appended to the
- existing list.
- If there is no existing match list, the position argument is ignored.
-\layout Subsection*
-
-Unlinking Memory Descriptors (Section
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset
-
-)
-\layout Standard
-
-Previously, a memory descriptor could be unlinked if the offset exceeded
- a threshold upon the completion of an operation.
- In this version, the unlinking is delayed until there is a matching operation
- which requires more memory than is currently available in the descriptor.
- In addition to changes in section, this lead to a revision of Figure\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:flow}
-
-\end_inset
-
-.
-\layout Subsection*
-
-Split Phase Operations and Events (Section
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset
-
-)
-\layout Standard
-
-Previously, there were five types of events:
-\family typewriter
-PTL_EVENT_PUT
-\family default
-,
-\family typewriter
-PTL_EVENT_GET
-\family default
-,
-\family typewriter
-PTL_EVENT_REPLY
-\family default
-,
-\family typewriter
-PTL_EVENT_SENT
-\family default
-, and
-\family typewriter
-PTL_EVENT_ACK.
-
-\family default
-The first four of these reflected the completion of potentially long operations.
- We have introduced new event types to reflect the fact that long operations
- have a distinct starting point and a distinct completion point.
- Moreover, the completion may be successful or unsuccessful.
-\layout Standard
-
-In addition to providing a mechanism for reporting failure to higher levels
- of software, this split provides an opportunity for for improved ordering
- semantics.
- Previously, if one process intiated two operations (e.g., two put operations)
- on a remote process, these operations were guaranteed to complete in the
- same order that they were initiated.
- Now, we only guarantee that the initiation events are delivered in the
- same order.
- In particular, the operations do not need to complete in the order that
- they were intiated.
-\layout Subsection*
-
-Well known proces ids (Section
-\begin_inset LatexCommand \ref{sec:niinit}
-
-\end_inset
-
-)
-\layout Standard
-
-To support the notion of
-\begin_inset Quotes eld
-\end_inset
-
-well known process ids,
-\begin_inset Quotes erd
-\end_inset
-
- we added a process id argument to the arguments for PtlNIInit.
-\layout Chapter*
-
-Glossary
-\layout Description
-
-API Application Programming Interface.
- A definition of the functions and semantics provided by library of functions.
-
-\layout Description
-
-Initiator A
-\emph on
-process
-\emph default
- that initiates a message operation.
-
-\layout Description
-
-Message An application-defined unit of data that is exchanged between
-\emph on
-processes
-\emph default
-.
-
-\layout Description
-
-Message\SpecialChar ~
-Operation Either a put operation, which writes data, or a get operation,
- which reads data.
-
-\layout Description
-
-Network A network provides point-to-point communication between
-\emph on
-nodes
-\emph default
-.
- Internally, a network may provide multiple routes between endpoints (to
- improve fault tolerance or to improve performance characteristics); however,
- multiple paths will not be exposed outside of the network.
-
-\layout Description
-
-Node A node is an endpoint in a
-\emph on
-network
-\emph default
-.
- Nodes provide processing capabilities and memory.
- A node may provide multiple processors (an SMP node) or it may act as a
-
-\emph on
-gateway
-\emph default
- between networks.
-
-\layout Description
-
-Process A context of execution.
- A process defines a virtual memory (VM) context.
- This context is not shared with other processes.
- Several threads may share the VM context defined by a process.
-
-\layout Description
-
-Target A
-\emph on
-process
-\emph default
- that is acted upon by a message operation.
-
-\layout Description
-
-Thread A context of execution that shares a VM context with other threads.
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-cleardoublepage
-\layout Standard
-
-\backslash
-setcounter{page}{1}
-\backslash
-pagenumbering{arabic}
-\end_inset
-
-
-\layout Chapter
-
-Introduction
-\begin_inset LatexCommand \label{sec:intro}
-
-\end_inset
-
-
-\layout Section
-
-Overview
-\layout Standard
-
-This document describes an application programming interface for message
- passing between nodes in a system area network.
- The goal of this interface is to improve the scalability and performance
- of network communication by defining the functions and semantics of message
- passing required for scaling a parallel computing system to ten thousand
- nodes.
- This goal is achieved by providing an interface that will allow a quality
- implementation to take advantage of the inherently scalable design of Portals.
-\layout Standard
-
-This document is divided into several sections:
-\layout Description
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:intro}
-
-\end_inset
-
----Introduction This section describes the purpose and scope of the Portals
- API.
-
-\layout Description
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:apiover}
-
-\end_inset
-
----An\SpecialChar ~
-Overview\SpecialChar ~
-of\SpecialChar ~
-the\SpecialChar ~
-Portals\SpecialChar ~
-3.1\SpecialChar ~
-API This section gives a brief overview of the
- Portals API.
- The goal is to introduce the key concepts and terminology used in the descripti
-on of the API.
-
-\layout Description
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:api}
-
-\end_inset
-
----The\SpecialChar ~
-Portals\SpecialChar ~
-3.2\SpecialChar ~
-API This section describes the functions and semantics of
- the Portals application programming interface.
-
-\layout Description
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:semantics}
-
-\end_inset
-
---The\SpecialChar ~
-Semantics\SpecialChar ~
-of\SpecialChar ~
-Message\SpecialChar ~
-Transmission This section describes the semantics
- of message transmission.
- In particular, the information transmitted in each type of message and
- the processing of incoming messages.
-
-\layout Description
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:examples}
-
-\end_inset
-
----Examples This section presents several examples intended to illustrates
- the use of the Portals API.
-
-\layout Section
-
-Purpose
-\layout Standard
-
-Existing message passing technologies available for commodity cluster networking
- hardware do not meet the scalability goals required by the Cplant\SpecialChar ~
-
-\begin_inset LatexCommand \cite{Cplant}
-
-\end_inset
-
- project at Sandia National Laboratories.
- The goal of the Cplant project is to construct a commodity cluster that
- can scale to the order of ten thousand nodes.
- This number greatly exceeds the capacity for which existing message passing
- technologies have been designed and implemented.
-\layout Standard
-
-In addition to the scalability requirements of the network, these technologies
- must also be able to support a scalable implementation of the Message Passing
- Interface (MPI)\SpecialChar ~
-
-\begin_inset LatexCommand \cite{MPIstandard}
-
-\end_inset
-
- standard, which has become the
-\shape italic
-de facto
-\shape default
- standard for parallel scientific computing.
- While MPI does not impose any scalability limitations, existing message
- passing technologies do not provide the functionality needed to allow implement
-ations of MPI to meet the scalability requirements of Cplant.
-\layout Standard
-
-The following are properties of a network architecture that do not impose
- any inherent scalability limitations:
-\layout Itemize
-
-Connectionless - Many connection-oriented architectures, such as VIA\SpecialChar ~
-
-\begin_inset LatexCommand \cite{VIA}
-
-\end_inset
-
- and TCP/IP sockets, have limitations on the number of peer connections
- that can be established.
-
-\layout Itemize
-
-Network independence - Many communication systems depend on the host processor
- to perform operations in order for messages in the network to be consumed.
- Message consumption from the network should not be dependent on host processor
- activity, such as the operating system scheduler or user-level thread scheduler.
-
-\layout Itemize
-
-User-level flow control - Many communication systems manage flow control
- internally to avoid depleting resources, which can significantly impact
- performance as the number of communicating processes increases.
-
-\layout Itemize
-
-OS Bypass - High performance network communication should not involve memory
- copies into or out of a kernel-managed protocol stack.
-
-\layout Standard
-
-The following are properties of a network architecture that do not impose
- scalability limitations for an implementation of MPI:
-\layout Itemize
-
-Receiver-managed - Sender-managed message passing implementations require
- a persistent block of memory to be available for every process, requiring
- memory resources to increase with job size and requiring user-level flow
- control mechanisms to manage these resources.
-
-\layout Itemize
-
-User-level Bypass - While OS Bypass is necessary for high-performance, it
- alone is not sufficient to support the Progress Rule of MPI asynchronous
- operations.
-
-\layout Itemize
-
-Unexpected messages - Few communication systems have support for receiving
- messages for which there is no prior notification.
- Support for these types of messages is necessary to avoid flow control
- and protocol overhead.
-
-\layout Section
-
-Background
-\layout Standard
-
-Portals was originally designed for and implemented on the nCube machine
- as part of the SUNMOS (Sandia/UNM OS)\SpecialChar ~
-
-\begin_inset LatexCommand \cite{SUNMOS}
-
-\end_inset
-
- and Puma\SpecialChar ~
-
-\begin_inset LatexCommand \cite{PumaOS}
-
-\end_inset
-
- lightweight kernel development projects.
- Portals went through two design phases, the latter of which is used on
- the 4500-node Intel TeraFLOPS machine\SpecialChar ~
-
-\begin_inset LatexCommand \cite{TFLOPS}
-
-\end_inset
-
-.
- Portals have been very successful in meeting the needs of such a large
- machine, not only as a layer for a high-performance MPI implementation\SpecialChar ~
-
-\begin_inset LatexCommand \cite{PumaMPI}
-
-\end_inset
-
-, but also for implementing the scalable run-time environment and parallel
- I/O capabilities of the machine.
-\layout Standard
-
-The second generation Portals implementation was designed to take full advantage
- of the hardware architecture of large MPP machines.
- However, efforts to implement this same design on commodity cluster technology
- identified several limitations, due to the differences in network hardware
- as well as to shortcomings in the design of Portals.
-\layout Section
-
-Scalability
-\layout Standard
-
-The primary goal in the design of Portals is scalability.
- Portals are designed specifically for an implementation capable of supporting
- a parallel job running on tens of thousands of nodes.
- Performance is critical only in terms of scalability.
- That is, the level of message passing performance is characterized by how
- far it allows an application to scale and not by how it performs in micro-bench
-marks (e.g., a two node bandwidth or latency test).
-\layout Standard
-
-The Portals API is designed to allow for scalability, not to guarantee it.
- Portals cannot overcome the shortcomings of a poorly designed application
- program.
- Applications that have inherent scalability limitations, either through
- design or implementation, will not be transformed by Portals into scalable
- applications.
- Scalability must be addressed at all levels.
- Portals do not inhibit scalability, but do not guarantee it either.
-\layout Standard
-
-To support scalability, the Portals interface maintains a minimal amount
- of state.
- Portals provide reliable, ordered delivery of messages between pairs of
- processes.
- They are connectionless: a process is not required to explicitly establish
- a point-to-point connection with another process in order to communicate.
- Moreover, all buffers used in the transmission of messages are maintained
- in user space.
- The target process determines how to respond to incoming messages, and
- messages for which there are no buffers are discarded.
-\layout Section
-
-Communication Model
-\layout Standard
-
-Portals combine the characteristics of both one-side and two-sided communication.
- They define a
-\begin_inset Quotes eld
-\end_inset
-
-matching put
-\begin_inset Quotes erd
-\end_inset
-
- operation and a
-\begin_inset Quotes eld
-\end_inset
-
-matching get
-\begin_inset Quotes erd
-\end_inset
-
- operation.
- The destination of a put (or send) is not an explicit address; instead,
- each message contains a set of match bits that allow the receiver to determine
- where incoming messages should be placed.
- This flexibility allows Portals to support both traditional one-sided operation
-s and two-sided send/receive operations.
-\layout Standard
-
-Portals allows the target to determine whether incoming messages are acceptable.
- A target process can choose to accept message operations from any specific
- process or can choose to ignore message operations from any specific process.
-\layout Section
-
-Zero Copy, OS Bypass and Application Bypass
-\layout Standard
-
-In traditional system architectures, network packets arrive at the network
- interface card (NIC), are passed through one or more protocol layers in
- the operating system, and eventually copied into the address space of the
- application.
- As network bandwidth began to approach memory copy rates, reduction of
- memory copies became a critical concern.
- This concern lead to the development of zero-copy message passing protocols
- in which message copies are eliminated or pipelined to avoid the loss of
- bandwidth.
-\layout Standard
-
-A typical zero-copy protocol has the NIC generate an interrupt for the CPU
- when a message arrives from the network.
- The interrupt handler then controls the transfer of the incoming message
- into the address space of the appropriate application.
- The interrupt latency, the time from the initiation of an interrupt until
- the interrupt handler is running, is fairly significant.
- To avoid this cost, some modern NICs have processors that can be programmed
- to implement part of a message passing protocol.
- Given a properly designed protocol, it is possible to program the NIC to
- control the transfer of incoming messages, without needing to interrupt
- the CPU.
- Because this strategy does not need to involve the OS on every message
- transfer, it is frequently called
-\begin_inset Quotes eld
-\end_inset
-
-OS Bypass.
-\begin_inset Quotes erd
-\end_inset
-
- ST\SpecialChar ~
-
-\begin_inset LatexCommand \cite{ST}
-
-\end_inset
-
-, VIA\SpecialChar ~
-
-\begin_inset LatexCommand \cite{VIA}
-
-\end_inset
-
-, FM\SpecialChar ~
-
-\begin_inset LatexCommand \cite{FM2}
-
-\end_inset
-
-, GM\SpecialChar ~
-
-\begin_inset LatexCommand \cite{GM}
-
-\end_inset
-
-, and Portals are examples of OS Bypass protocols.
-\layout Standard
-
-Many protocols that support OS Bypass still require that the application
- actively participate in the protocol to ensure progress.
- As an example, the long message protocol of PM requires that the application
- receive and reply to a request to put or get a long message.
- This complicates the runtime environment, requiring a thread to process
- incoming requests, and significantly increases the latency required to
- initiate a long message protocol.
- The Portals message passing protocol does not require activity on the part
- of the application to ensure progress.
- We use the term
-\begin_inset Quotes eld
-\end_inset
-
-Application Bypass
-\begin_inset Quotes erd
-\end_inset
-
- to refer to this aspect of the Portals protocol.
-\layout Section
-
-Faults
-\layout Standard
-
-Given the number of components that we are dealing with and the fact that
- we are interested in supporting applications that run for very long times,
- failures are inevitable.
- The Portals API recognizes that the underlying transport may not be able
- to successfully complete an operation once it has been initiated.
- This is reflected in the fact that the Portals API reports three types
- of events: events indicating the initiation of an operation, events indicating
- the successful completion of an operation, and events indicating the unsuccessf
-ul completion of an operation.
- Every initiation event is eventually followed by a successful completion
- event or an unsuccessful completion event.
-\layout Standard
-
-Between the time an operation is started and the time that the operation
- completes (successfully or unsuccessfully), any memory associated with
- the operation should be considered volatile.
- That is, the memory may be changed in unpredictable ways while the operation
- is progressing.
- Once the operation completes, the memory associated with the operation
- will not be subject to further modification (from this operation).
- Notice that unsuccessful operations may alter memory in an essentially
- unpredictable fashion.
-\layout Chapter
-
-An Overview of the Portals API
-\begin_inset LatexCommand \label{sec:apiover}
-
-\end_inset
-
-
-\layout Standard
-
-In this section, we give a conceptual overview of the Portals API.
- The goal is to provide a context for understanding the detailed description
- of the API presented in the next section.
-\layout Section
-
-Data Movement
-\begin_inset LatexCommand \label{sec:dmsemantics}
-
-\end_inset
-
-
-\layout Standard
-
-A Portal represents an opening in the address space of a process.
- Other processes can use a Portal to read (get) or write (put) the memory
- associated with the portal.
- Every data movement operation involves two processes, the
-\series bold
-initiator
-\series default
- and the
-\series bold
-target
-\series default
-.
- The initiator is the process that initiates the data movement operation.
- The target is the process that responds to the operation by either accepting
- the data for a put operation, or replying with the data for a get operation.
-\layout Standard
-
-In this discussion, activities attributed to a process may refer to activities
- that are actually performed by the process or
-\emph on
-on behalf of the process
-\emph default
-.
- The inclusiveness of our terminology is important in the context of
-\emph on
-application bypass
-\emph default
-.
- In particular, when we note that the target sends a reply in the case of
- a get operation, it is possible that reply will be generated by another
- component in the system, bypassing the application.
-\layout Standard
-
-Figures\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:put}
-
-\end_inset
-
- and
-\begin_inset LatexCommand \ref{fig:get}
-
-\end_inset
-
- present graphical interpretations of the Portal data movement operations:
- put and get.
- In the case of a put operation, the initiator sends a put request message
- containing the data to the target.
- The target translates the Portal addressing information in the request
- using its local Portal structures.
- When the request has been processed, the target optionally sends an acknowledge
-ment message.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center
-
-\begin_inset Graphics FormatVersion 1
- filename put.eps
- display color
- size_type 0
- rotateOrigin center
- lyxsize_type 1
- lyxwidth 218pt
- lyxheight 119pt
-\end_inset
-
-
-\layout Caption
-
-Portal Put (Send)
-\begin_inset LatexCommand \label{fig:put}
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-In the case of a get operation, the initiator sends a get request to the
- target.
- As with the put operation, the target translates the Portal addressing
- information in the request using its local Portal structures.
- Once it has translated the Portal addressing information, the target sends
- a reply that includes the requested data.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center
-
-\begin_inset Graphics FormatVersion 1
- filename get.eps
- display color
- size_type 0
- rotateOrigin center
- lyxsize_type 1
- lyxwidth 218pt
- lyxheight 119pt
-\end_inset
-
-
-\layout Caption
-
-Portal Get
-\begin_inset LatexCommand \label{fig:get}
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-We should note that Portal address translations are only performed on nodes
- that respond to operations initiated by other nodes.
- Acknowledgements and replies to get operations bypass the portals address
- translation structures.
-\layout Section
-
-Portal Addressing
-\begin_inset LatexCommand \label{subsec:paddress}
-
-\end_inset
-
-
-\layout Standard
-
-One-sided data movement models (e.g., shmem\SpecialChar ~
-
-\begin_inset LatexCommand \cite{CraySHMEM}
-
-\end_inset
-
-, ST\SpecialChar ~
-
-\begin_inset LatexCommand \cite{ST}
-
-\end_inset
-
-, MPI-2\SpecialChar ~
-
-\begin_inset LatexCommand \cite{MPI2}
-
-\end_inset
-
-) typically use a triple to address memory on a remote node.
- This triple consists of a process id, memory buffer id, and offset.
- The process id identifies the target process, the memory buffer id specifies
- the region of memory to be used for the operation, and the offset specifies
- an offset within the memory buffer.
-\layout Standard
-
-In addition to the standard address components (process id, memory buffer
- id, and offset), a Portal address includes a set of match bits.
- This addressing model is appropriate for supporting one-sided operations
- as well as traditional two-sided message passing operations.
- Specifically, the Portals API provides the flexibility needed for an efficient
- implementation of MPI-1, which defines two-sided operations with one-sided
- completion semantics.
-\layout Standard
-
-Figure\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:portals}
-
-\end_inset
-
- presents a graphical representation of the structures used by a target
- in the interpretation of a Portal address.
- The process id is used to route the message to the appropriate node and
- is not reflected in this diagram.
- The memory buffer id, called the
-\series bold
-portal id
-\series default
-, is used as an index into the Portal table.
- Each element of the Portal table identifies a match list.
- Each element of the match list specifies two bit patterns: a set of
-\begin_inset Quotes eld
-\end_inset
-
-don't care
-\begin_inset Quotes erd
-\end_inset
-
- bits, and a set of
-\begin_inset Quotes eld
-\end_inset
-
-must match
-\begin_inset Quotes erd
-\end_inset
-
- bits.
- In addition to the two sets of match bits, each match list element has
- at most one memory descriptor.
- Each memory descriptor identifies a memory region and an optional event
- queue.
- The memory region specifies the memory to be used in the operation and
- the event queue is used to record information about these operations.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center
-
-\begin_inset Graphics FormatVersion 1
- filename portals.eps
- display color
- size_type 0
- rotateOrigin center
- lyxsize_type 1
- lyxwidth 305pt
- lyxheight 106pt
-\end_inset
-
-
-\layout Caption
-
-Portal Addressing Structures
-\begin_inset LatexCommand \label{fig:portals}
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-Figure\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:flow}
-
-\end_inset
-
- illustrates the steps involved in translating a Portal address, starting
- from the first element in a match list.
- If the match criteria specified in the match list entry are met and the
- memory descriptor list accepts the operation
-\begin_inset Foot
-collapsed true
-
-\layout Standard
-
-Memory descriptors can reject operations because a threshold has been exceeded
- or because the memory region does not have sufficient space, see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset
-
-
-\end_inset
-
-, the operation (put or get) is performed using the memory region specified
- in the memory descriptor.
- If the memory descriptor specifies that it is to be unlinked when a threshold
- has been exceeded, the match list entry is removed from the match list
- and the resources associated with the memory descriptor and match list
- entry are reclaimed.
- Finally, if there is an event queue specified in the memory descriptor,
- the operation is logged in the event queue.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center
-
-\begin_inset Graphics FormatVersion 1
- filename flow_new.eps
- display color
- size_type 0
- rotateOrigin center
- lyxsize_type 1
- lyxwidth 447pt
- lyxheight 282pt
-\end_inset
-
-
-\layout Caption
-
-Portals Address Translation
-\begin_inset LatexCommand \label{fig:flow}
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-If the match criteria specified in the match list entry are not met, or
- there is no memory descriptor associated with the match list entry, or
- the memory descriptor associated with the match list entry rejects the
- operation, the address translation continues with the next match list entry.
- If the end of the match list has been reached, the address translation
- is aborted and the incoming requested is discarded.
-\layout Section
-
-Access Control
-\layout Standard
-
-A process can control access to its portals using an access control list.
- Each entry in the access control list specifies a process id and a Portal
- table index.
- The access control list is actually an array of entries.
- Each incoming request includes an index into the access control list (i.e.,
- a
-\begin_inset Quotes eld
-\end_inset
-
-cookie
-\begin_inset Quotes erd
-\end_inset
-
- or hint).
- If the id of the process issuing the request doesn't match the id specified
- in the access control list entry or the Portal table index specified in
- the request doesn't match the Portal table index specified in the access
- control list entry, the request is rejected.
- Process identifiers and Portal table indexes may include wild card values
- to increase the flexibility of this mechanism.
-
-\layout Standard
-
-Two aspects of this design merit further discussion.
- First, the model assumes that the information in a message header, the
- sender's id in particular, is trustworthy.
- In most contexts, we assume that the entity that constructs the header
- is trustworthy; however, using cryptographic techniques, we could easily
- devise a protocol that would ensure the authenticity of the sender.
-\layout Standard
-
-Second, because the access check is performed by the receiver, it is possible
- that a malicious process will generate thousands of messages that will
- be denied by the receiver.
- This could saturate the network and/or the receiver, resulting in a
-\emph on
-denial of service
-\emph default
- attack.
- Moving the check to the sender using capabilities, would remove the potential
- for this form of attack.
- However, the solution introduces the complexities of capability management
- (exchange of capabilities, revocation, protections, etc).
-\layout Section
-
-Multi-threaded Applications
-\begin_inset LatexCommand \label{sec:threads}
-
-\end_inset
-
-
-\layout Standard
-
-The Portals API supports a generic view of multi-threaded applications.
- From the perspective of the Portals API, an application program is defined
- by a set of processes.
- Each process defines a unique address space.
- The Portals API defines access to this address space from other processes
- (using portals addressing and the data movement operations).
- A process may have one or more
-\emph on
-threads
-\emph default
- executing in its address space.
-
-\layout Standard
-
-With the exception of
-\emph on
-PtlEQWait
-\emph default
- every function in the Portals API is non-blocking and atomic with respect
- to both other threads and external operations that result from data movement
- operations.
- While individual operations are atomic, sequences of these operations may
- be interleaved between different threads and with external operations.
- The Portals API does not provide any mechanisms to control this interleaving.
- It is expected that these mechanisms will be provided by the API used to
- create threads.
-\layout Chapter
-
-The Portals API
-\begin_inset LatexCommand \label{sec:api}
-
-\end_inset
-
-
-\layout Section
-
-Naming Conventions
-\begin_inset LatexCommand \label{sec:conv}
-
-\end_inset
-
-
-\layout Standard
-
-The Portals API defines two types of entities: functions and types.
- Function always start with
-\emph on
-Ptl
-\emph default
- and use mixed upper and lower case.
- When used in the body of this report, function names appear in italic face,
- e.g.,
-\emph on
-PtlInit
-\emph default
-.
- The functions associated with an object type will have names that start
- with
-\emph on
-Ptl
-\emph default
-, followed by the two letter object type code shown in Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:objcodes}
-
-\end_inset
-
-.
- As an example, the function
-\emph on
-PtlEQAlloc
-\emph default
- allocates resources for an event queue.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Object Type Codes
-\begin_inset LatexCommand \label{tab:objcodes}
-
-\end_inset
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-medskip
-\newline
-
-\end_inset
-
-
-\layout Standard
-\align center
-
-\size small
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="5" columns="3">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\emph on
-xx
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Name
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Section
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-EQ
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Event Queue
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- MD
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Memory Descriptor
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- ME
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Match list Entry
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- NI
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Network Interface
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-Type names use lower case with underscores to separate words.
- Each type name starts with
-\family typewriter
-ptl
-\family default
-_ and ends with
-\family typewriter
-_t
-\family default
-.
- When used in the body of this report, type names appear in a fixed font,
- e.g.,
-\family typewriter
-ptl_match_bits_t
-\family default
-.
-\layout Standard
-
-Names for constants use upper case with underscores to separate words.
- Each constant name starts with
-\family typewriter
-PTL_
-\family default
-.
- When used in the body of this report, type names appear in a fixed font,
- e.g.,
-\family typewriter
-PTL_OK
-\family default
-.
-\layout Section
-
-Base Types
-\layout Standard
-
-The Portals API defines a variety of base types.
- These types represent a simple renaming of the base types provided by the
- C programming language.
- In most cases these new type names have been introduced to improve type
- safety and to avoid issues arising from differences in representation sizes
- (e.g., 16-bit or 32-bit integers).
-\layout Subsection
-
-Sizes
-\begin_inset LatexCommand \label{sec:size-t}
-
-\end_inset
-
-
-\layout Standard
-
-The type
-\family typewriter
-ptl_size_t
-\family default
- is an unsigned 64-bit integral type used for representing sizes.
-\layout Subsection
-
-Handles
-\begin_inset LatexCommand \label{sec:handle-type}
-
-\end_inset
-
-
-\layout Standard
-
-Objects maintained by the API are accessed through handles.
- Handle types have names of the form
-\family typewriter
-ptl_handle_
-\emph on
-xx
-\emph default
-_t
-\family default
-, where
-\emph on
-xx
-\emph default
- is one of the two letter object type codes shown in Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:objcodes}
-
-\end_inset
-
-.
- For example, the type
-\family typewriter
-ptl_handle_ni_t
-\family default
- is used for network interface handles.
-\layout Standard
-
-Each type of object is given a unique handle type to enhance type checking.
- The type,
-\family typewriter
-ptl_handle_any_t
-\family default
-, can be used when a generic handle is needed.
- Every handle value can be converted into a value of type
-\family typewriter
-ptl_handle_any_t
-\family default
- without loss of information.
-\layout Standard
-
-Handles are not simple values.
- Every portals object is associated with a specific network interface and
- an identifier for this interface (along with an object identifier) is part
- of the handle for the object.
-\layout Standard
-
-The special value
-\family typewriter
-PTL_EQ_NONE
-\family default
-, of type
-\family typewriter
-ptl_handle_eq_t
-\family default
-, is used to indicate the absence of an event queue.
- See sections
-\begin_inset LatexCommand \ref{sec:mdfree}
-
-\end_inset
-
- and\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdupdate}
-
-\end_inset
-
- for uses of this value.
-\layout Subsection
-
-Indexes
-\begin_inset LatexCommand \label{sec:index-type}
-
-\end_inset
-
-
-\layout Standard
-
-The types
-\family typewriter
-ptl_pt_index_t
-\family default
- and
-\family typewriter
-ptl_ac_index_t
-\family default
- are integral types used for representing Portal table indexes and access
- control tables indexes, respectively.
- See section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:niinit}
-
-\end_inset
-
- for limits on values of these types.
-\layout Subsection
-
-Match Bits
-\begin_inset LatexCommand \label{sec:mb-type}
-
-\end_inset
-
-
-\layout Standard
-
-The type
-\family typewriter
-ptl_match_bits_t
-\family default
- is capable of holding unsigned 64-bit integer values.
-\layout Subsection
-
-Network Interfaces
-\begin_inset LatexCommand \label{sec:ni-type}
-
-\end_inset
-
-
-\layout Standard
-
-The type
-\family typewriter
-ptl_interface_t
-\family default
- is an integral type used for identifying different network interfaces.
- Users will need to consult the local documentation to determine appropriate
- values for the interfaces available.
- The special value
-\family typewriter
-PTL_IFACE_DEFAULT
-\family default
- identifies the default interface.
-\layout Subsection
-
-Identifiers
-\begin_inset LatexCommand \label{sec:id-type}
-
-\end_inset
-
-
-\layout Standard
-
-The type
-\family typewriter
-ptl_nid_t
-\family default
- is an integral type used for representing node ids
-\family typewriter
-, ptl_pid_t
-\family default
- is an integral type for representing process ids, and
-\family typewriter
-ptl_uid_t
-\family default
-is an integral type for representing user ids.
-\layout Standard
-
-The special values
-\family typewriter
-PTL_PID_ANY
-\family default
- matches any process identifier, PTL_NID_ANY matches any node identifier,
- and
-\family typewriter
-PTL_UID_ANY
-\family default
- matches any user identifier.
- See sections
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset
-
- and\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:acentry}
-
-\end_inset
-
- for uses of these values.
-\layout Subsection
-
-Status Registers
-\begin_inset LatexCommand \label{sec:stat-type}
-
-\end_inset
-
-
-\layout Standard
-
-Each network interface maintains an array of status registers that can be
- accessed using the
-\family typewriter
-PtlNIStatus
-\family default
- function (see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:nistatus}
-
-\end_inset
-
-).
- The type
-\family typewriter
-ptl_sr_index_t
-\family default
- defines the types of indexes that can be used to access the status registers.
- The only index defined for all implementations is
-\family typewriter
-PTL_SR_DROP_COUNT
-\family default
- which identifies the status register that counts the dropped requests for
- the interface.
- Other indexes (and registers) may be defined by the implementation.
-\layout Standard
-
-The type
-\family typewriter
-ptl_sr_value_t
-\family default
- defines the types of values held in status registers.
- This is a signed integer type.
- The size is implementation dependent, but must be at least 32 bits.
-\layout Section
-
-Initialization and Cleanup
-\begin_inset LatexCommand \label{sec:init}
-
-\end_inset
-
-
-\layout Standard
-
-The Portals API includes a function,
-\emph on
-PtlInit
-\emph default
-, to initialize the library and a function,
-\emph on
-PtlFini
-\emph default
-, to cleanup after the application is done using the library.
-\layout Subsection
-
-PtlInit
-\begin_inset LatexCommand \label{sec:ptlinit}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlInit( int *max_interfaces );
-\layout Standard
-\noindent
-The
-\emph on
-PtlInit
-\emph default
- function initializes the Portals library.
- PtlInit must be called at least once by a process before any thread makes
- a Portals function call, but may be safely called more than once.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_FAIL Indicates an error during initialization.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-max_interfaces
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="1" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="5in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-max_interfaces
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold the maximum number of interfaces
- that can be initialized.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlFini
-\begin_inset LatexCommand \label{sec:ptlfini}
-
-\end_inset
-
-
-\layout LyX-Code
-
-void PtlFini( void );
-\layout Standard
-\noindent
-The
-\emph on
-PtlFini
-\emph default
- function cleans up after the Portals library is no longer needed by a process.
- After this function is called, calls to any of the functions defined by
- the Portal API or use of the structures set up by the Portals API will
- result in undefined behavior.
- This function should be called once and only once during termination by
- a process.
- Typically, this function will be called in the exit sequence of a process.
- Individual threads should not call PtlFini when they terminate.
-\layout Section
-
-Network Interfaces
-\begin_inset LatexCommand \label{sec:ni}
-
-\end_inset
-
-
-\layout Standard
-
-The Portals API supports the use of multiple network interfaces.
- However, each interface is treated as an independent entity.
- Combining interfaces (e.g.,
-\begin_inset Quotes eld
-\end_inset
-
-bonding
-\begin_inset Quotes erd
-\end_inset
-
- to create a higher bandwidth connection) must be implemented by the application
- or embedded in the underlying network.
- Interfaces are treated as independent entities to make it easier to cache
- information on individual network interface cards.
-\layout Standard
-
-Once initialized, each interface provides a Portal table, an access control
- table, and a collection of status registers.
- See Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset
-
- for a discussion of updating Portal table entries using the
-\emph on
-PtlMEAttach
-\emph default
- function.
- See Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ac}
-
-\end_inset
-
- for a discussion of the initialization and updating of entries in the access
- control table.
- See Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:nistatus}
-
-\end_inset
-
- for a discussion of the
-\emph on
-PtlNIStatus
-\emph default
- function which can be used to determine the value of a status register.
-\layout Standard
-
-Every other type of Portal object (e.g., memory descriptor, event queue, or
- match list entry) is associated with a specific network interface.
- The association to a network interface is established when the object is
- created and is encoded in the handle for the object.
-\layout Standard
-
-Each network interface is initialized and shutdown independently.
- The initialization routine,
-\emph on
-PtlNIInit
-\emph default
-, returns a handle for an interface object which is used in all subsequent
- Portal operations.
- The
-\emph on
-PtlNIFini
-\emph default
- function is used to shutdown an interface and release any resources that
- are associated with the interface.
- Network interface handles are associated with processes, not threads.
- All threads in a process share all of the network interface handles.
-\layout Standard
-
-The Portals API also defines the
-\emph on
-PtlNIStatus
-\emph default
- function to query the status registers for a network interface, the
-\emph on
-PtlNIDist
-\emph default
- function to determine the
-\begin_inset Quotes eld
-\end_inset
-
-distance
-\begin_inset Quotes erd
-\end_inset
-
- to another process, and the
-\emph on
-PtlNIHandle
-\emph default
- function to determine the network interface that an object is associated
- with.
-\layout Subsection
-
-PtlNIInit
-\begin_inset LatexCommand \label{sec:niinit}
-
-\end_inset
-
-
-\layout LyX-Code
-
-typedef struct {
-\newline
- int max_match_entries;
-\newline
- int max_mem_descriptors;
-\newline
- int max_event_queues;
-\newline
- ptl_ac_index_t max_atable_index;
-\newline
- ptl_pt_index_t max_ptable_index;
-\newline
-} ptl_ni_limits_t;
-\newline
-
-\newline
-int PtlNIInit( ptl_interface_t interface
-\newline
- ptl_pid_t pid,
-\newline
- ptl_ni_limits_t* desired,
-\newline
- ptl_ni_limits_t* actual,
-\newline
- ptl_handle_ni_t* handle );
-\layout Standard
-
-Values of type
-\family typewriter
-ptl_ni_limits_t
-\family default
- include the following members:
-\layout Description
-
-max_match_entries Maximum number of match entries that can be allocated
- at any one time.
-\layout Description
-
-max_mem_descriptors Maximum number of memory descriptors that can be allocated
- at any one time.
-\layout Description
-
-max_event_queues Maximum number of event queues that can be allocated at
- any one time.
-\layout Description
-
-max_atable_index Largest access control table index for this interface,
- valid indexes range from zero to
-\family typewriter
-max_atable_index
-\family default
-, inclusive.
-\layout Description
-
-max_ptable_index Largest Portal table index for this interface, valid indexes
- range from zero to
-\family typewriter
-max_ptable_index
-\family default
-, inclusive.
-\layout Standard
-\noindent
-The
-\emph on
-PtlNIInit
-\emph default
- function is used to initialized the Portals API for a network interface.
- This function must be called at least once by each process before any other
- operations that apply to the interface by any process or thread.
- For subsequent calls to
-\shape italic
-PtlNIInit
-\shape default
- from within the same process (either by different threads or the same thread),
- the desired limits will be ignored and the call will return the existing
- NI handle.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INIT_DUP Indicates a duplicate initialization of
-\family typewriter
-interface
-\family default
-.
-
-\layout Description
-
-PTL_INIT_INV Indicates that
-\family typewriter
-interface
-\family default
- is not a valid network interface.
-
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to initialize the
- interface.
-
-\layout Description
-
-PTL_INV_PROC Indicates that
-\family typewriter
-pid
-\family default
- is not a valid process id.
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-actual
-\family default
-or
-\family typewriter
- handle
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="5" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-interface
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Identifies the network interface to be initialized.
- (See section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ni-type}
-
-\end_inset
-
- for a discussion of values used to identify network interfaces.)
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-pid
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Identifies the desired process id (for well known process ids).
- The value
-\family typewriter
-PTL_PID_ANY
-\family default
- may be used to have the process id assigned by the underlying library.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-desired
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-If non-NULL, points to a structure that holds the desired limits.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-actual
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, the location pointed to by actual will hold the actual
- limits.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-handle
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold a handle for the interface.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Comment
-
-The use of desired is implementation dependent.
- In particular, an implementation may choose to ignore this argument.
-\layout Subsection
-
-PtlNIFini
-\begin_inset LatexCommand \label{sec:nifini}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlNIFini( ptl_handle_ni_t interface );
-\layout Standard
-\noindent
-The
-\emph on
-PtlNIFini
-\emph default
- function is used to release the resources allocated for a network interface.
- Once the
-\emph on
-PtlNIFini
-\emph default
- operation has been started, the results of pending API operations (e.g.,
- operations initiated by another thread) for this interface are undefined.
- Similarly, the effects of incoming operations (puts and gets) or return
- values (acknowledgements and replies) for this interface are undefined.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_NI Indicates that
-\family typewriter
-interface
-\family default
- is not a valid network interface handle.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="1" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-interface
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-A handle for the interface to shutdown.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlNIStatus
-\begin_inset LatexCommand \label{sec:nistatus}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlNIStatus( ptl_handle_ni_t interface,
-\newline
- ptl_sr_index_t status_register,
-\newline
- ptl_sr_value_t* status );
-\layout Standard
-\noindent
-The
-\emph on
-PtlNIStatus
-\emph default
- function returns the value of a status register for the specified interface.
- (See section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:stat-type}
-
-\end_inset
-
- for more information on status register indexes and status register values.)
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_NI Indicates that
-\family typewriter
-interface
-\family default
- is not a valid network interface handle.
-
-\layout Description
-
-PTL_INV_SR_INDX Indicates that
-\family typewriter
-status_register
-\family default
- is not a valid status register.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-status
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="3" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-interface
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the interface to use.
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-status_register
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-An index for the status register to read.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-status
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold the current value of the status
- register.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Comment
-
-The only status register that must be defined is a drop count register (
-\family typewriter
-PTL_SR_DROP_COUNT
-\family default
-).
- Implementations may define additional status registers.
- Identifiers for the indexes associated with these registers should start
- with the prefix
-\family typewriter
-PTL_SR_
-\family default
-.
-\layout Subsection
-
-PtlNIDist
-\layout LyX-Code
-
-int PtlNIDist( ptl_handle_ni_t interface,
-\newline
- ptl_process_id_t process,
-\newline
- unsigned long* distance );
-\layout Standard
-\noindent
-The
-\emph on
-PtlNIDist
-\emph default
- function returns the distance to another process using the specified interface.
- Distances are only defined relative to an interface.
- Distance comparisons between different interfaces on the same process may
- be meaningless.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_NI Indicates that
-\family typewriter
-interface
-\family default
- is not a valid network interface handle.
-
-\layout Description
-
-PTL_INV_PROC Indicates that
-\family typewriter
-process
-\family default
- is not a valid process identifier.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-distance
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="3" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-interface
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the interface to use.
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-process
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-An identifier for the process whose distance is being requested.
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-distance
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold the distance to the remote
- process.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Comment
-
-This function should return a static measure of distance.
- Examples include minimum latency, the inverse of available bandwidth, or
- the number of switches between the two endpoints.
-\layout Subsection
-
-PtlNIHandle
-\layout LyX-Code
-
-int PtlNIHandle( ptl_handle_any_t handle,
-\newline
- ptl_handle_ni_t* interface );
-\layout Standard
-\noindent
-The
-\emph on
-PtlNIHandle
-\emph default
- function returns a handle for the network interface with which the object
- identified by
-\family typewriter
-handle
-\family default
- is associated.
- If the object identified by
-\family typewriter
-handle
-\family default
- is a network interface, this function returns the same value it is passed.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_HANDLE Indicates that
-\family typewriter
-handle
-\family default
- is not a valid handle.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-interface
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="2" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-handle
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the object.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-interface
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold a handle for the network interface
- associated with
-\family typewriter
-handle
-\family default
-.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Comment
-
-Every handle should encode the network interface and the object id relative
- to this handle.
- Both are presumably encoded using integer values.
-\layout Section
-
-User Identification
-\begin_inset LatexCommand \label{sec:uid}
-
-\end_inset
-
-
-\layout Standard
-
-Every process runs on behalf of a user.
-
-\layout Subsection
-
-PtlGetUid
-\layout LyX-Code
-
-int PtlGetUid( ptl_handle_ni_t ni_handle,
-\newline
- ptl_uid_t* uid );
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_INV_NI Indicates that
-\family typewriter
-ni_handle
-\family default
- is not a valid network interface handle.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-interface
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="2" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="5in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-handle
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A network interface handle.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-id
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold the user id for the calling
- process.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Comment
-
-Note that user identifiers are dependent on the network interface(s).
- In particular, if a node has multiple interfaces, a process may have multiple
- user identifiers.
-\layout Section
-
-Process Identification
-\begin_inset LatexCommand \label{sec:pid}
-
-\end_inset
-
-
-\layout Standard
-
-Processes that use the Portals API, can be identified using a node id and
- process id.
- Every node accessible through a network interface has a unique node identifier
- and every process running on a node has a unique process identifier.
- As such, any process in the computing system can be identified by its node
- id and process id.
-
-\layout Standard
-
-The Portals API defines a type,
-\family typewriter
-ptl_process_id_t
-\family default
- for representing process ids and a function,
-\emph on
-PtlGetId
-\emph default
-, which can be used to obtain the id of the current process.
-\layout Comment
-
-The portals API does not include thread identifiers.
- Messages are delivered to processes (address spaces) not threads (contexts
- of execution).
-\layout Subsection
-
-The Process Id Type
-\begin_inset LatexCommand \label{sec:pid-type}
-
-\end_inset
-
-
-\layout LyX-Code
-
-typedef struct {
-\newline
- ptl_nid_t nid; /* node id */
-\newline
- ptl_pid_t pid; /* process id */
-\newline
-} ptl_process_id_t;
-\layout Standard
-\noindent
-The
-\family typewriter
-ptl_process_id_t
-\family default
- type uses two identifiers to represent a process id: a node id and a process
- id.
-
-\layout Subsection
-
-PtlGetId
-\begin_inset LatexCommand \label{sub:PtlGetId}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlGetId( ptl_handle_ni_t ni_handle,
-\newline
- ptl_process_id_t* id );
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_INV_NI Indicates that
-\family typewriter
-ni_handle
-\family default
- is not a valid network interface handle.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-id
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="2" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="5in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-handle
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A network interface handle.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-id
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold the id for the calling process.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Comment
-
-Note that process identifiers are dependent on the network interface(s).
- In particular, if a node has multiple interfaces, it may have multiple
- node identifiers.
-\layout Section
-
-Match List Entries and Match Lists
-\begin_inset LatexCommand \label{sec:me}
-
-\end_inset
-
-
-\layout Standard
-
-A match list is a chain of match list entries.
- Each match list entry includes a memory descriptor and a set of match criteria.
- The match criteria can be used to reject incoming requests based on process
- id or the match bits provided in the request.
- A match list is created using the
-\emph on
-PtlMEAttach
-\emph default
- or
-\shape italic
-PtlMEAttachAny
-\shape default
- functions, which create a match list consisting of a single match list
- entry, attaches the match list to the specified Portal index, and returns
- a handle for the match list entry.
- Match entries can be dynamically inserted and removed from a match list
- using the
-\emph on
-PtlMEInsert
-\emph default
- and
-\emph on
-PtlMEUnlink
-\emph default
- functions.
-\layout Subsection
-
-PtlMEAttach
-\begin_inset LatexCommand \label{sec:meattach}
-
-\end_inset
-
-
-\layout LyX-Code
-
-typedef enum { PTL_RETAIN, PTL_UNLINK } ptl_unlink_t;
-\newline
-
-\layout LyX-Code
-
-typedef enum { PTL_INS_BEFORE, PTL_INS_AFTER } ptl_ins_pos_t;
-\newline
-
-\layout LyX-Code
-
-int PtlMEAttach( ptl_handle_ni_t interface,
-\newline
- ptl_pt_index_t index,
-\newline
- ptl_process_id_t matchid,
-\newline
- ptl_match_bits_t match_bits,
-\newline
- ptl_match_bits_t ignorebits,
-\newline
- ptl_unlink_t unlink,
-\newline
- ptl_ins_pos_t position,
-\newline
- ptl_handle_me_t* handle );
-\layout Standard
-\noindent
-Values of the type
-\family typewriter
-ptl_ins_pos_t
-\family default
- are used to control where a new item is inserted.
- The value
-\family typewriter
-PTL_INS_BEFORE
-\family default
- is used to insert the new item before the current item or before the head
- of the list.
- The value
-\family typewriter
-PTL_INS_AFTER
-\family default
- is used to insert the new item after the current item or after the last
- item in the list.
-
-\layout Standard
-
-The
-\emph on
-PtlMEAttach
-\emph default
- function creates a match list consisting of a single entry and attaches
- this list to the Portal table for
-\family typewriter
-interface
-\family default
-.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_INV_NI Indicates that
-\family typewriter
-interface
-\family default
- is not a valid network interface handle.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_PTINDEX Indicates that
-\family typewriter
-index
-\family default
- is not a valid Portal table index.
-
-\layout Description
-
-PTL_INV_PROC Indicates that
-\family typewriter
-matchid
-\family default
- is not a valid process identifier.
-
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- match list entry.
-
-\layout Description
-
-PTL_ML_TOOLONG Indicates that the resulting match list is too long.
- The maximum length for a match list is defined by the interface.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="7" columns="3">
-<features>
-<column alignment="left" valignment="top" width="0.8in">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.75in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-interface
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the interface to use.
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-index
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The Portal table index where the match list should be attached.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-matchid
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Specifies the match criteria for the process id of the requestor.
- The constants
-\family typewriter
-PTL_PID_ANY
-\family default
- and
-\family typewriter
-PTL_NID_ANY
-\family default
- can be used to wildcard either of the ids in the
-\family typewriter
-ptl_process_id_t
-\family default
- structure.
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-match_bits, ignorebits
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Specify the match criteria to apply to the match bits in the incoming request.
- The
-\family typewriter
-ignorebits
-\family default
- are used to mask out insignificant bits in the incoming match bits.
- The resulting bits are then compared to the match list entry's match
- bits to determine if the incoming request meets the match criteria.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-unlink
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Indicates the match list entry should be unlinked when the last memory descripto
-r associated with this match list entry is unlinked.
- (Note, the check for unlinking a match entry only occurs when a memory
- descriptor is unlinked.)
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-position
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Indicates whether the new match entry should be prepended or appended to
- the existing match list.
- If there is no existing list, this argument is ignored and the new match
- entry becomes the only entry in the list.
- Allowed constants:
-\family typewriter
-PTL_INS_BEFORE
-\family default
-,
-\family typewriter
-PTL_INS_AFTER
-\family default
-.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-handle
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold a handle for the newly created
- match list entry.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlMEAttachAny
-\begin_inset LatexCommand \label{sec:attachany}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlMEAttachAny( ptl_handle_ni_t interface,
-\newline
- ptl_pt_index_t *index,
-\newline
- ptl_process_id_t matchid,
-\newline
- ptl_match_bits_t match_bits,
-\newline
- ptl_match_bits_t ignorebits,
-\newline
- ptl_unlink_t unlink,
-\newline
- ptl_handle_me_t* handle );
-\layout Standard
-
-The
-\emph on
-PtlMEAttachAny
-\emph default
- function creates a match list consisting of a single entry and attaches
- this list to an unused Portal table entry for
-\family typewriter
-interface
-\family default
-.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_INV_NI Indicates that
-\family typewriter
-interface
-\family default
- is not a valid network interface handle.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_PROC Indicates that
-\family typewriter
-matchid
-\family default
- is not a valid process identifier.
-
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- match list entry.
-
-\layout Description
-
-PTL_PT_FULL Indicates that there are no free entries in the Portal table.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="4" columns="3">
-<features>
-<column alignment="left" valignment="top" width="0.8in">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.75in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-interface
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the interface to use.
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-index
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On succesfful return, this location will hold the Portal index where the
- match list has been attached.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-matchid, match_bits, ignorebits, unlink
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-See the discussion for
-\shape italic
-PtlMEAttach
-\shape default
-.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-handle
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold a handle for the newly created
- match list entry.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlMEInsert
-\begin_inset LatexCommand \label{sec:meinsert}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlMEInsert( ptl_handle_me_t current,
-\newline
- ptl_process_id_t matchid,
-\newline
- ptl_match_bits_t match_bits,
-\newline
- ptl_match_bits_t ignorebits,
-\newline
- ptl_ins_pos_t position,
-\newline
- ptl_handle_me_t* handle );
-\layout Standard
-
-The
-\emph on
-PtlMEInsert
-\emph default
- function creates a new match list entry and inserts this entry into the
- match list containing
-\family typewriter
-current
-\family default
-.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_PROC Indicates that
-\family typewriter
-matchid
-\family default
- is not a valid process identifier.
-
-\layout Description
-
-PTL_INV_ME Indicates that
-\family typewriter
-current
-\family default
- is not a valid match entry handle.
-
-\layout Description
-
-PTL_ML_TOOLONG Indicates that the resulting match list is too long.
- The maximum length for a match list is defined by the interface.
-
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- match entry.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="4" columns="3">
-<features>
-<column alignment="left" valignment="top" width="0.8in">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-current
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for a match entry.
- The new match entry will be inserted immediately before or immediately
- after this match entry.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-matchid
-\family default
-,
-\family typewriter
-match_bits
-\family default
-,
-\family typewriter
-ignorebits
-\family default
-,
-\family typewriter
-unlink
-\family default
-
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-See the discussion for
-\emph on
-PtlMEAttach
-\emph default
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-position
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Indicates whether the new match entry should be inserted before or after
- the
-\family typewriter
-current
-\family default
- entry.
- Allowed constants:
-\family typewriter
-PTL_INS_BEFORE
-\family default
-,
-\family typewriter
-PTL_INS_AFTER
-\family default
-.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\family typewriter
-handle
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-See the discussion for
-\emph on
-PtlMEAttach
-\emph default
-.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlMEUnlink
-\begin_inset LatexCommand \label{sec:meunlink}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlMEUnlink( ptl_handle_me_t entry );
-\layout Standard
-\noindent
-The
-\emph on
-PtlMEUnlink
-\emph default
- function can be used to unlink a match entry from a match list.
- This operation also releases any resources associated with the match entry
- (including the associated memory descriptor).
- It is an error to use the match entry handle after calling
-\emph on
-PtlMEUnlink
-\emph default
-.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_ME Indicates that
-\family typewriter
-entry
-\family default
- is not a valid match entry handle.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="1" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-entry
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-A handle for the match entry to be unlinked.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Section
-
-Memory Descriptors
-\begin_inset LatexCommand \label{sec:md}
-
-\end_inset
-
-
-\layout Standard
-
-A memory descriptor contains information about a region of an application
- process' memory and an event queue where information about the operations
- performed on the memory descriptor are recorded.
- The Portals API provides two operations to create memory descriptors:
-\emph on
-PtlMDAttach
-\emph default
-, and
-\emph on
-PtlMDBind
-\emph default
-; an operation to update a memory descriptor,
-\emph on
-PtlMDUpdate
-\emph default
-; and an operation to unlink and release the resources associated with a
- memory descriptor,
-\emph on
-PtlMDUnlink
-\emph default
-.
-\layout Subsection
-
-The Memory Descriptor Type
-\begin_inset LatexCommand \label{sec:md-type}
-
-\end_inset
-
-
-\layout LyX-Code
-
-typedef struct {
-\newline
- void* start;
-\newline
- ptl_size_t length;
-\newline
- int threshold;
-\newline
- unsigned int max_offset;
-\newline
- unsigned int options;
-\newline
- void* user_ptr;
-\newline
- ptl_handle_eq_t eventq;
-\newline
-} ptl_md_t;
-\layout Standard
-\noindent
-The
-\family typewriter
-ptl_md_t
-\family default
- type defines the application view of a memory descriptor.
- Values of this type are used to initialize and update the memory descriptors.
-\layout Subsubsection
-
-Members
-\layout Description
-
-start,\SpecialChar ~
-length Specify the memory region associated with the memory descriptor.
- The
-\family typewriter
-start
-\family default
- member specifies the starting address for the memory region and the
-\family typewriter
-length
-\family default
- member specifies the length of the region.
- The
-\family typewriter
-start member
-\family default
- can be NULL provided that the
-\family typewriter
-length
-\family default
- member is zero.
- (Zero length buffers are useful to record events.) There are no alignment
- restrictions on the starting address or the length of the region; although,
- unaligned messages may be slower (i.e., lower bandwidth and/or longer latency)
- on some implementations.
-
-\layout Description
-
-threshold Specifies the maximum number of operations that can be performed
- on the memory descriptor.
- An operation is any action that could possibly generate an event (see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
- for the different types of events).
- In the usual case, the threshold value is decremented for each operation
- on the memory descriptor.
- When the threshold value is zero, the memory descriptor is
-\emph on
-inactive
-\emph default
-, and does not respond to operations.
- A memory descriptor can have an initial threshold value of zero to allow
- for manipulation of an inactive memory descriptor by the local process.
- A threshold value of
-\family typewriter
-PTL_MD_THRESH_INF
-\family default
- indicates that there is no bound on the number of operations that may be
- applied to a memory descriptor.
- Note that local operations (e.g.,
-\emph on
-PtlMDUpdate
-\emph default
-) are not applied to the threshold count.
-
-\layout Description
-
-max_offset Specifies the maximum local offset of a memory descriptor.
- When the local offset of a memory descriptor exceeds this maximum, the
- memory descriptor becomes
-\shape italic
-inactive
-\shape default
- and does not respond to further operations.
-\layout Description
-
-options Specifies the behavior of the memory descriptor.
- There are five options that can be selected: enable put operations (yes
- or no), enable get operations (yes or no), offset management (local or
- remote), message truncation (yes or no), and acknowledgement (yes or no).
- Values for this argument can be constructed using a bitwise or of the following
- values:
-\begin_deeper
-\begin_deeper
-\layout Description
-
-PTL_MD_OP_PUT Specifies that the memory descriptor will respond to
-\emph on
-put
-\emph default
- operations.
- By default, memory descriptors reject
-\emph on
-put
-\emph default
- operations.
-
-\layout Description
-
-PTL_MD_OP_GET Specifies that the memory descriptor will respond to
-\emph on
-get
-\emph default
- operations.
- By default, memory descriptors reject
-\emph on
-get
-\emph default
- operations.
-
-\layout Description
-
-PTL_MD_MANAGE_REMOTE Specifies that the offset used in accessing the memory
- region is provided by the incoming request.
- By default, the offset is maintained locally.
- When the offset is maintained locally, the offset is incremented by the
- length of the request so that the next operation (put and/or get) will
- access the next part of the memory region.
-\layout Description
-
-PTL_MD_TRUNCATE Specifies that the length provided in the incoming request
- can be reduced to match the memory available in the region.
- (The memory available in a memory region is determined by subtracting the
- offset from the length of the memory region.) By default, if the length
- in the incoming operation is greater than the amount of memory available,
- the operation is rejected.
-
-\layout Description
-
-PTL_MD_ACK_DISABLE Specifies that an acknowledgement should
-\emph on
-not
-\emph default
- be sent for incoming
-\emph on
-put
-\emph default
- operations, even if requested.
- By default, acknowledgements are sent for
-\emph on
-put
-\emph default
- operations that request an acknowledgement.
- Acknowledgements are never sent for
-\emph on
-get
-\emph default
- operations.
- The value sent in the reply serves as an implicit acknowledgement.
-
-\end_deeper
-\layout Standard
-
-
-\series bold
-Note
-\series default
-: It is not considered an error to have a memory descriptor that does not
- respond to either
-\emph on
-put
-\emph default
- or
-\emph on
-get
-\emph default
- operations: Every memory descriptor responds to
-\emph on
-reply
-\emph default
- operations.
- Nor is it considered an error to have a memory descriptor that responds
- to both
-\emph on
-put
-\emph default
- and
-\emph on
-get
-\emph default
- operations.
-
-\end_deeper
-\layout Description
-
-user_ptr A user-specified value that is associated with the memory descriptor.
- The value does not need to be a pointer, but must fit in the space used
- by a pointer.
- This value (along with other values) is recorded in events associated with
- operations on this memory descriptor.
-\begin_inset Foot
-collapsed true
-
-\layout Standard
-
-Tying the memory descriptor to a user-defined value can be useful when multiple
- memory descriptor share the same event queue or when the memory descriptor
- needs to be associated with a data structure maintained by the application.
- For example, an MPI implementation can set the
-\family typewriter
-user_ptr
-\family default
- argument to the value of an MPI Request.
- This direct association allows for processing of memory descriptor's by
- the MPI implementation without a table lookup or a search for the appropriate
- MPI Request.
-\end_inset
-
-
-\layout Description
-
-eventq A handle for the event queue used to log the operations performed
- on the memory region.
- If this argument is
-\family typewriter
-PTl_EQ_NONE
-\family default
-, operations performed on this memory descriptor are not logged.
-
-\layout Subsection
-
-PtlMDAttach
-\begin_inset LatexCommand \label{sec:mdattach}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlMDAttach( ptl_handle_me_t match,
-\newline
- ptl_md_t mem_desc,
-\newline
- ptl_unlink_t unlink_op,
-\newline
- ptl_unlink_t unlink_nofit,
-\newline
- ptl_handle_md_t* handle );
-\layout Standard
-\noindent
-Values of the type
-\family typewriter
-ptl_unlink_t
-\family default
- are used to control whether an item is unlinked from a list.
- The value
-\family typewriter
-PTL_UNLINK
-\family default
- enables unlinking.
- The value
-\family typewriter
-PTL_RETAIN
-\family default
- disables unlinking.
-\layout Standard
-
-The
-\emph on
-PtlMDAttach
-\emph default
- operation is used to create a memory descriptor and attach it to a match
- list entry.
- An error code is returned if this match list entry already has an associated
- memory descriptor.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INUSE Indicates that
-\family typewriter
-match
-\family default
- already has a memory descriptor attached.
-
-\layout Description
-
-PTL_INV_ME Indicates that
-\family typewriter
-match
-\family default
- is not a valid match entry handle.
-
-\layout Description
-
-PTL_ILL_MD Indicates that
-\family typewriter
-mem_desc
-\family default
- is not a legal memory descriptor.
- This may happen because the memory region defined in
-\family typewriter
-mem_desc
-\family default
- is invalid or because the network interface associated with the
-\family typewriter
-eventq
-\family default
- in
-\family typewriter
-mem_desc
-\family default
- is not the same as the network interface associated with
-\family typewriter
-match
-\family default
-.
-
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- memory descriptor.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="5" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-match
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the match entry that the memory descriptor will be associated
- with.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Provides initial values for the application visible parts of a memory descriptor.
- Other than its use for initialization, there is no linkage between this
- structure and the memory descriptor maintained by the API.
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-unlink_op
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A flag to indicate whether the memory descriptor is unlinked when it becomes
- inactive, either because the operation threshold drops to zero or because
- the maximum offset has been exceeded.
- (Note, the check for unlinking a memory descriptor only occurs after a
- the completion of a successful operation.
- If the threshold is set to zero during initialization or using
-\emph on
-PtlMDUpdate
-\emph default
-, the memory descriptor is
-\series bold
-not
-\series default
- unlinked.)
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-unlink_nofit
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A flag to indicate whether the memory descriptor is unlinked when the space
- remaining in the memory descriptor is not sufficient for a matching operation.
- If an incoming message arrives arrives at a memory descriptor that does
- not have sufficient space and the
-\series bold
-PTL_MD_TRUNCATE
-\series default
- operation is not specified, the memory descriptor will be unlinked.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-handle
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold a handle for the newly created
- memory descriptor.
- The
-\family typewriter
-handle
-\family default
- argument can be NULL, in which case the handle will not be returned.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlMDBind
-\begin_inset LatexCommand \label{sec:mdbind}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlMDBind( ptl_handle_ni_t interface,
-\newline
- ptl_md_t mem_desc,
-\newline
- ptl_handle_md_t* handle );
-\layout Standard
-\noindent
-The
-\emph on
-PtlMDBind
-\emph default
- operation is used to create a
-\begin_inset Quotes eld
-\end_inset
-
-free floating
-\begin_inset Quotes erd
-\end_inset
-
- memory descriptor, i.e., a memory descriptor that is not associated with
- a match list entry.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_NI Indicates that
-\family typewriter
-interface
-\family default
- is not a valid match entry handle.
-
-\layout Description
-
-PTL_ILL_MD Indicates that
-\family typewriter
-mem_desc
-\family default
- is not a legal memory descriptor.
- This may happen because the memory region defined in
-\family typewriter
-mem_desc
-\family default
- is invalid or because the network interface associated with the
-\family typewriter
-eventq
-\family default
- in
-\family typewriter
-mem_desc
-\family default
- is not the same as the network interface,
-\family typewriter
-interface
-\family default
-.
-
-\layout Description
-
-PTL_INV_EQ Indicates that the event queue associated with
-\family typewriter
-mem_desc
-\family default
- is not valid.
-
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- memory descriptor.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-handle
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="3" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-interface
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the network interface with which the memory descriptor will
- be associated.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Provides initial values for the application visible parts of a memory descriptor.
- Other than its use for initialization, there is no linkage between this
- structure and the memory descriptor maintained by the API.
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-handle
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold a handle for the newly created
- memory descriptor.
- The
-\family typewriter
-handle
-\family default
- argument must be a valid address and cannot be NULL.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlMDUnlink
-\begin_inset LatexCommand \label{sec:mdfree}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlMDUnlink( ptl_handle_md_t mem_desc );
-\layout Standard
-\noindent
-The
-\emph on
-PtlMDUnlink
-\emph default
- function unlinks the memory descriptor from any match list entry it may
- be linked to and releases the resources associated with a memory descriptor.
- (This function does not free the memory region associated with the memory
- descriptor.) This function also releases the resources associated with a
- floating memory descriptor.
- Only memory descriptors with no pending operations may be unlinked.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_MD Indicates that
-\family typewriter
-mem_desc
-\family default
- is not a valid memory descriptor handle.
-\layout Description
-
-PTL_MD_INUSE Indicates that
-\family typewriter
-mem_desc
-\family default
- has pending operations and cannot be unlinked.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="1" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the memory descriptor to be released.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlMDUpdate
-\begin_inset LatexCommand \label{sec:mdupdate}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlMDUpdate( ptl_handle_md_t mem_desc,
-\newline
- ptl_md_t* old_md,
-\newline
- ptl_md_t* new_md,
-\newline
- ptl_handle_eq_t testq );
-\layout Standard
-\noindent
-The
-\emph on
-PtlMDUpdate
-\emph default
- function provides a conditional, atomic update operation for memory descriptors.
- The memory descriptor identified by
-\family typewriter
-mem_desc
-\family default
- is only updated if the event queue identified by
-\family typewriter
-testq
-\family default
- is empty.
- The intent is to only enable updates to the memory descriptor when no new
- messages have arrived since the last time the queue was checked.
- See section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:exmpi}
-
-\end_inset
-
- for an example of how this function can be used.
-\layout Standard
-
-If
-\family typewriter
-new
-\family default
- is not NULL the memory descriptor identified by handle will be updated
- to reflect the values in the structure pointed to by
-\family typewriter
-new
-\family default
- if
-\family typewriter
-testq
-\family default
- has the value
-\family typewriter
-PTL_EQ_NONE
-\family default
- or if the event queue identified by
-\family typewriter
-testq
-\family default
- is empty.
- If
-\family typewriter
-old
-\family default
- is not NULL, the current value of the memory descriptor identified by
-\family typewriter
-mem_desc
-\family default
- is recorded in the location identified by
-\family typewriter
-old
-\family default
-.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_NOUPDATE Indicates that the update was not performed because
-\family typewriter
-testq
-\family default
- was not empty.
-
-\layout Description
-
-PTL_INV_MD Indicates that
-\family typewriter
-mem_desc
-\family default
- is not a valid memory descriptor handle.
-
-\layout Description
-
-PTL_ILL_MD Indicates that the value pointed to by
-\family typewriter
-new
-\family default
- is not a legal memory descriptor (e.g., the memory region specified by the
- memory descriptor may be invalid).
-
-\layout Description
-
-PTL_INV_EQ Indicates that
-\family typewriter
-testq
-\family default
- is not a valid event queue handle.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-new
-\family default
- or
-\family typewriter
-old
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="4" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the memory descriptor to update.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-old_md
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-If
-\family typewriter
-old_md
-\family default
- is not the value
-\family typewriter
-NULL
-\family default
-, the current value of the memory descriptor will be stored in the location
- identified by
-\family typewriter
-old
-\family default
-_md.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-new_md
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-If
-\family typewriter
-new_md
-\family default
- is not the value
-\family typewriter
-NULL
-\family default
-, this argument provides the new values for the memory descriptor, if the
- update is performed.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-testq
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for an event queue used to predicate the update.
- If
-\family typewriter
-testq
-\family default
- is equal to
-\family typewriter
-PTL_EQ_NONE
-\family default
-, the update is performed unconditionally.
- Otherwise, the update is performed if and only if
-\family typewriter
-testq
-\family default
- is empty.
- If the update is not performed, the function returns the value
-\family typewriter
-PTL_NOUPDATE
-\family default
-.
- (Note, the
-\family typewriter
-testq
-\family default
- argument does not need to be the same as the event queue associated with
- the memory descriptor.)
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Standard
-
-The conditional update can be used to ensure that the memory descriptor
- has not changed between the time it was examined and the time it is updated.
- In particular, it is needed to support an MPI implementation where the
- activity of searching an unexpected message queue and posting a receive
- must be atomic.
-\layout Section
-
-Events and Event Queues
-\begin_inset LatexCommand \label{sec:eq}
-
-\end_inset
-
-
-\layout Standard
-
-Event queues are used to log operations performed on memory descriptors.
- They can also be used to hold acknowledgements for completed
-\emph on
-put
-\emph default
- operations and to note when the data specified in a
-\emph on
-put
-\emph default
- operation has been sent (i.e., when it is safe to reuse the buffer that holds
- this data).
- Multiple memory descriptors can share a single event queue.
-\layout Standard
-
-In addition to the
-\family typewriter
-ptl_handle_eq_t
-\family default
- type, the Portals API defines two types associated with events: The
-\family typewriter
-
-\newline
-ptl_event_kind_t
-\family default
- type defines the kinds of events that can be stored in an event queue.
- The
-\family typewriter
-ptl_event_t
-\family default
- type defines a structure that holds the information associated with an
- event.
-\layout Standard
-
-The Portals API also provides four functions for dealing with event queues:
- The
-\emph on
-PtlEQAlloc
-\emph default
- function is used to allocate the API resources needed for an event queue,
- the
-\emph on
-PtlEQFree
-\emph default
- function is used to release these resources, the
-\emph on
-PtlEQGet
-\emph default
- function can be used to get the next event from an event queue, and the
-
-\emph on
-PtlEQWait
-\emph default
- function can be used to block a process (or thread) until an event queue
- has at least one event.
-\layout Subsection
-
-Kinds of Events
-\begin_inset LatexCommand \label{sec:ek-type}
-
-\end_inset
-
-
-\layout LyX-Code
-
-typedef enum {
-\newline
- PTL_EVENT_GET_START, PTL_EVENT_GET_END, PTL_EVENT_GET_FAIL,
-\newline
- PTL_EVENT_PUT_START, PTL_EVENT_PUT_END, PTL_EVENT_PUT_FAIL,
-\newline
- PTL_EVENT_REPLY_START, PTL_EVENT_REPLY_END, PTL_EVENT_REPLY_FAIL,
-\newline
- PTL_EVENT_SEND_START, PTL_EVENT_SEND_END, PTL_EVENT_SEND_FAIL,
-\newline
- PTL_EVENT_ACK,
-\newline
- PTL_EVENT_UNLINK
-\newline
-} ptl_event_kind_t;
-\layout Standard
-\noindent
-The Portals API defines fourteen types of events that can be logged in an
- event queue:
-\layout Description
-
-PTL_EVENT_GET_START A remote
-\emph on
-get
-\emph default
- operation has been started on the memory descriptor.
- The memory region associated with this descriptor should not be altered
- until the corresponding END or FAIL event is logged.
-\layout Description
-
-PTL_EVENT_GET_END A previously initiated
-\emph on
-get
-\emph default
- operation completed successfully.
- This event is logged after the reply has been sent by the local node.
- As such, the process could free the memory descriptor once it sees this
- event.
-
-\layout Description
-
-PTL_EVENT_GET_FAIL A previously initiated
-\emph on
-get
-\emph default
- operation completed unsuccessfully.
- This event is logged after the reply has been sent by the local node.
- As such, the process could free the memory descriptor once it sees this
- event.
-
-\layout Description
-
-PTL_EVENT_PUT_START A remote
-\emph on
-put
-\emph default
- operation has been started on the memory descriptor.
- The memory region associated with this descriptor should should be considered
- volatile until the corresponding END or FAIL event is logged.
-\layout Description
-
-PTL_EVENT_PUT_END A previously initiated
-\emph on
-put
-\emph default
- operation completed successfully.
- The underlying layers will not alter the memory (on behalf of this operation)
- once this event has been logged.
-
-\layout Description
-
-PTL_EVENT_PUT_FAIL A previously initiated
-\emph on
-put
-\emph default
- operation completed unsuccessfully.
- The underlying layers will not alter the memory (on behalf of this operation)
- once this event has been logged.
-
-\layout Description
-
-PTL_EVENT_REPLY_START A
-\emph on
-reply
-\emph default
- operation has been started on the memory descriptor.
-
-\layout Description
-
-PTL_EVENT_REPLY_END A previously initiated
-\emph on
-reply
-\emph default
- operation has completed successfully .
- This event is logged after the data (if any) from the reply has been written
- into the memory descriptor.
-
-\layout Description
-
-PTL_EVENT_REPLY_FAIL A previously initiated
-\emph on
-reply
-\emph default
- operation has completed unsuccessfully.
- This event is logged after the data (if any) from the reply has been written
- into the memory descriptor.
-
-\layout Description
-
-PTL_EVENT_ACK An
-\emph on
-acknowledgement
-\emph default
- was received.
- This event is logged when the acknowledgement is received
-\layout Description
-
-PTL_EVENT_SEND_START An outgoing
-\emph on
-send
-\emph default
- operation has been started.
- The memory region associated with this descriptor should not be altered
- until the corresponding END or FAIL event is logged.
-\layout Description
-
-PTL_EVENT_SEND_END A previously initiated
-\emph on
-send
-\emph default
- operation has completed successfully.
- This event is logged after the entire buffer has been sent and it is safe
- for the application to reuse the buffer.
-
-\layout Description
-
-PTL_EVENT_SEND_FAIL A previously initiated
-\emph on
-send
-\emph default
- operation has completed unsuccessfully.
- The process can safely manipulate the memory or free the memory descriptor
- once it sees this event.
-\layout Description
-
-PTL_EVENT_UNLINK A memory descriptor associated with this event queue has
- been automatically unlinked.
- This event is not generated when a memory descriptor is explicitly unlinked
- by calling
-\shape italic
-PtlMDUnlink
-\shape default
-.
- This event does not decrement the threshold count.
-\layout Subsection
-
-Event Ordering
-\layout Standard
-
-The Portals API guarantees that a when a process initiates two operations
- on a remote process, the operations will be initiated on the remote process
- in the same order that they were initiated on the original process.
- As an example, if process A intitates two
-\emph on
-put
-\emph default
- operations,
-\emph on
-x
-\emph default
- and
-\emph on
-y
-\emph default
-, on process B, the Portals API guarantees that process A will receive the
-
-\family typewriter
-PTL_EVENT_SEND_START
-\family default
- events for
-\emph on
-x
-\emph default
- and
-\emph on
-y
-\emph default
- in the same order that process B receives the
-\family typewriter
-PTL_EVENT_PUT_START
-\family default
- events for
-\emph on
-x
-\emph default
- and
-\emph on
-y
-\emph default
-.
- Notice that the API does not guarantee that the start events will be delivered
- in the same order that process A initiated the
-\emph on
-x
-\emph default
- and
-\emph on
-y
-\emph default
- operations.
- If process A needs to ensure the ordering of these operations, it should
- include code to wait for the initiation of
-\emph on
-x
-\emph default
- before it initiates
-\emph on
-y
-\emph default
-.
-\layout Subsection
-
-Failure Notification
-\layout Standard
-
-Operations may fail to complete successfully; however, unless the node itself
- fails, every operation that is started will eventually complete.
- While an operation is in progress, the memory associated with the operation
- should not be viewed (in the case of a put or a reply) or altered (in the
- case of a send or get).
- Operation completion, whether successful or unsuccessful, is final.
- That is, when an operation completes, the memory associated with the operation
- will no longer be read or altered by the operation.
- A network interface can use the
-\family typewriter
-ptl_ni_fail_t
-\family default
- to define more specific information regarding the failure of the operation
- and record this information in the
-\family typewriter
-ni_fail_type
-\family default
- field of the event.
-\layout Subsection
-
-The Event Type
-\begin_inset LatexCommand \label{sec:event-type}
-
-\end_inset
-
-
-\layout LyX-Code
-
-typedef struct {
-\newline
- ptl_event_kind_t type;
-\newline
- ptl_process_id_t initiator;
-\newline
- ptl_uid_t uid;
-\layout LyX-Code
-
- ptl_pt_index_t portal;
-\newline
- ptl_match_bits_t match_bits;
-\newline
- ptl_size_t rlength;
-\newline
- ptl_size_t mlength;
-\newline
- ptl_size_t offset;
-\newline
- ptl_handle_md_t md_handle;
-\newline
- ptl_md_t mem_desc;
-\newline
- ptl_hdr_data_t hdr_data;
-\newline
- ptl_seq_t link;
-\newline
- ptl_ni_fail_t ni_fail_type;
-\newline
- volatile ptl_seq_t sequence;
-\newline
-} ptl_event_t;
-\layout Standard
-\noindent
-An event structure includes the following members:
-\layout Description
-
-type Indicates the type of the event.
-
-\layout Description
-
-initiator The id of the initiator.
-
-\layout Description
-
-portal The Portal table index specified in the request.
-
-\layout Description
-
-match_bits A copy of the match bits specified in the request.
- See section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset
-
- for more information on match bits.
-
-\layout Description
-
-rlength The length (in bytes) specified in the request.
-
-\layout Description
-
-mlength The length (in bytes) of the data that was manipulated by the operation.
- For truncated operations, the manipulated length will be the number of
- bytes specified by the memory descriptor (possibly with an offset) operation.
- For all other operations, the manipulated length will be the length of
- the requested operation.
-
-\layout Description
-
-offset Is the displacement (in bytes) into the memory region that the operation
- used.
- The offset can be determined by the operation (see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:datamovement}
-
-\end_inset
-
-) for a remote managed memory descriptor, or by the local memory descriptor
- (see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset
-
-).
-
-\layout Description
-
-md_handle Is the handle to the memory descriptor associated with the event.
-\layout Description
-
-mem_desc Is the state of the memory descriptor immediately after the event
- has been processed.
-
-\layout Description
-
-hdr_data 64 bits of out-of-band user data (see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-).
-
-\layout Description
-
-link The
-\emph on
-link
-\emph default
- member is used to link
-\family typewriter
-START
-\family default
- events with the
-\family typewriter
-END
-\family default
- or
-\family typewriter
-FAIL
-\family default
- event that signifies completion of the operation.
- The
-\emph on
-link
-\emph default
- member will be the same for the two events associated with an operation.
- The link member is also used to link an
-\family typewriter
-UNLINK
-\family default
- event with the event that caused the memory descriptor to be unlinked.
-\layout Description
-
-sequence The sequence number for this event.
- Sequence numbers are unique to each event.
-\layout Comment
-
-The
-\emph on
-sequence
-\emph default
- member is the last member and is volatile to support SMP implementations.
- When an event structure is filled in, the
-\emph on
-sequence
-\emph default
- member should be written after all other members have been updated.
- Moreover, a memory barrier should be inserted between the updating of other
- members and the updating of the
-\emph on
-sequence
-\emph default
- member.
-\layout Subsection
-
-PtlEQAlloc
-\begin_inset LatexCommand \label{sec:eqalloc}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlEQAlloc( ptl_handle_ni_t interface,
-\newline
- ptl_size_t count,
-\newline
- ptl_handle_eq_t* handle );
-\layout Standard
-\noindent
-The
-\emph on
-PtlEQAlloc
-\emph default
- function is used to build an event queue.
-
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_NI Indicates that
-\family typewriter
-interface
-\family default
- is not a valid network interface handle.
-
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- event queue.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-handle
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="3" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-interface
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the interface with which the event queue will be associated.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-count
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The number of events that can be stored in the event queue.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-handle
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold a handle for the newly created
- event queue.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlEQFree
-\begin_inset LatexCommand \label{sec:eqfree}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlEQFree( ptl_handle_eq_t eventq );
-\layout Standard
-\noindent
-The
-\emph on
-PtlEQFree
-\emph default
- function releases the resources associated with an event queue.
- It is up to the user to insure that no memory descriptors are associated
- with the event queue once it is freed.
-
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_EQ Indicates that
-\family typewriter
-eventq
-\family default
- is not a valid event queue handle.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="1" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-eventq
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-A handle for the event queue to be released.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlEQGet
-\begin_inset LatexCommand \label{sec:eqget}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlEQGet( ptl_handle_eq_t eventq,
-\newline
- ptl_event_t* event );
-\layout Standard
-\noindent
-The
-\emph on
-PTLEQGet
-\emph default
- function is a nonblocking function that can be used to get the next event
- in an event queue.
- The event is removed from the queue.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at
- least one event between this event and the last event obtained (using
-\emph on
-PtlEQGet
-\emph default
- or
-\emph on
-PtlEQWait
-\emph default
-) from this event queue has been dropped due to limited space in the event
- queue.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_EQ_EMPTY Indicates that
-\family typewriter
-eventq
-\family default
- is empty or another thread is waiting on
-\emph on
-PtlEQWait
-\emph default
-.
-
-\layout Description
-
-PTL_INV_EQ Indicates that
-\family typewriter
-eventq
-\family default
- is not a valid event queue handle.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-event
-\family default
- is not a legal address.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="2" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.5in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-eventq
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the event queue.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-event
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold the values associated with
- the next event in the event queue.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlEQWait
-\begin_inset LatexCommand \label{sec:eqwait}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlEQWait( ptl_handle_eq_t eventq,
-\newline
- ptl_event_t* event );
-\layout Standard
-\noindent
-The
-\emph on
-PTLEQWait
-\emph default
- function can be used to block the calling process (thread) until there
- is an event in an event queue.
- This function also returns the next event in the event queue and removes
- this event from the queue.
- This is the only blocking operation in the Portals 3.2 API.
- In the event that multiple threads are waiting on the same event queue,
- PtlEQWait is guaranteed to wake exactly one thread, but the order in which
- they are awakened is not specified.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at
- least one event between this event and the last event obtained (using
-\emph on
-PtlEQGet
-\emph default
- or
-\emph on
-PtlEQWait
-\emph default
-) from this event queue has been dropped due to limited space in the event
- queue.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_EQ Indicates that
-\family typewriter
-eventq
-\family default
- is not a valid event queue handle.
-
-\layout Description
-
-PTL_SEGV Indicates that
-\family typewriter
-event
-\family default
- is not a legal address.
- queue handle.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-\noindent
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="2" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-eventq
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the event queue to wait on.
- The calling process (thread) will be blocked until
-\family typewriter
-eventq
-\family default
- is not empty.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-event
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-output
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-On successful return, this location will hold the values associated with
- the next event in the event queue.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Section
-
-The Access Control Table
-\begin_inset LatexCommand \label{sec:ac}
-
-\end_inset
-
-
-\layout Standard
-
-Processes can use the access control table to control which processes are
- allowed to perform operations on Portal table entries.
- Each communication interface has a Portal table and an access control table.
- The access control table for the default interface contains an entry at
- index zero that allows all processes with the same user id to communicate.
- Entries in the access control table can be manipulated using the
-\emph on
-PtlACEntry
-\emph default
- function.
-\layout Subsection
-
-PtlACEntry
-\begin_inset LatexCommand \label{sec:acentry}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlACEntry( ptl_handle_ni_t interface,
-\newline
- ptl_ac_index_t index,
-\newline
- ptl_process_id_t matchid,
-\newline
- ptl_uid_t user_id,
-\newline
- ptl_pt_index_t portal );
-\layout Standard
-\noindent
-The
-\emph on
-PtlACEntry
-\emph default
- function can be used to update an entry in the access control table for
- an interface.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_NI Indicates that
-\family typewriter
-interface
-\family default
- is not a valid network interface handle.
-
-\layout Description
-
-PTL_AC_INV_INDEX Indicates that
-\family typewriter
-index
-\family default
- is not a valid access control table index.
-
-\layout Description
-
-PTL_INV_PROC Indicates that
-\family typewriter
-matchid
-\family default
- is not a valid process identifier.
-
-\layout Description
-
-PTL_PT_INV_INDEX Indicates that
-\family typewriter
-portal
-\family default
- is not a valid Portal table index.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="5" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-interface
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Identifies the interface to use.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-index
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The index of the entry in the access control table to update.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-matchid
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Identifies the process(es) that are allowed to perform operations.
- The constants
-\family typewriter
-PTL_PID_ANY
-\family default
- and
-\family typewriter
-PTL_NID_ANY
-\family default
- can be used to wildcard either of the ids in the
-\family typewriter
-ptl_process_id_t
-\family default
- structure.
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-user_id
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Identifies the user that is allowed to perform operations.
- The value
-\family typewriter
-PTL_UID_ANY
-\family default
- can be used to wildcard the user.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-portal
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Identifies the Portal index(es) that can be used.
- The value
-\family typewriter
-PTL_PT_INDEX_ANY
-\family default
- can be used to wildcard the Portal index.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Section
-
-Data Movement Operations
-\begin_inset LatexCommand \label{sec:datamovement}
-
-\end_inset
-
-
-\layout Standard
-
-The Portals API provides two data movement operations:
-\emph on
-PtlPut
-\emph default
- and
-\emph on
-PtlGet
-\emph default
-.
-\layout Subsection
-
-PtlPut
-\begin_inset LatexCommand \label{sec:put}
-
-\end_inset
-
-
-\layout LyX-Code
-
-typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t;
-\newline
-
-\newline
-int PtlPut( ptl_handle_md_t mem_desc,
-\newline
- ptl_ack_req_t ack_req,
-\newline
- ptl_process_id_t target,
-\newline
- ptl_pt_index_t portal,
-\newline
- ptl_ac_index_t cookie,
-\newline
- ptl_match_bits_t match_bits,
-\newline
- ptl_size_t offset,
-\newline
- ptl_hdr_data_t hdr_data );
-\layout Standard
-\noindent
-Values of the type
-\family typewriter
-ptl_ack_req_t
-\family default
- are used to control whether an acknowledgement should be sent when the
- operation completes (i.e., when the data has been written to a memory descriptor
- of the
-\family typewriter
-target
-\family default
- process).
- The value
-\family typewriter
-PTL_ACK_REQ
-\family default
- requests an acknowledgement, the value
-\family typewriter
-PTL_NOACK_REQ
-\family default
- requests that no acknowledgement should be generated.
-\layout Standard
-
-The
-\emph on
-PtlPut
-\emph default
- function initiates an asynchronous put operation.
- There are several events associated with a put operation: initiation of
- the send on the local node (
-\family typewriter
-PTL_EVENT_SEND_START
-\family default
-), completion of the send on the local node (
-\family typewriter
-PTL_EVENT_SEND_END
-\family default
- or
-\family typewriter
-PTL_EVENT_SEND_FAIL
-\family default
-), and, when the send completes successfully, the receipt of an acknowledgement
- (
-\family typewriter
-PTL_EVENT_ACK
-\family default
-) indicating that the operation was accepted by the target.
- These events will be logged in the event queue associated with the memory
- descriptor (
-\family typewriter
-mem_desc
-\family default
-) used in the put operation.
- Using a memory descriptor that does not have an associated event queue
- results in these events being discarded.
- In this case, the application must have another mechanism (e.g., a higher
- level protocol) for determining when it is safe to modify the memory region
- associated with the memory descriptor.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_MD Indicates that
-\family typewriter
-mem_desc
-\family default
- is not a valid memory descriptor.
-
-\layout Description
-
-PTL_INV_PROC Indicates that
-\family typewriter
-target
-\family default
- is not a valid process id.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="8" columns="3">
-<features>
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the memory descriptor that describes the memory to be sent.
- If the memory descriptor has an event queue associated with it, it will
- be used to record events when the message has been sent (PTL_EVENT_SEND_START,
- PTL_EVENT_SEND_END).
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ack_req
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-Controls whether an acknowledgement event is requested.
- Acknowledgements are only sent when they are requested by the initiating
- process
-\series bold
-and
-\series default
- the memory descriptor has an event queue
-\series bold
-and
-\series default
- the target memory descriptor enables them.
- Allowed constants:
-\family typewriter
-PTL_ACK_REQ
-\family default
-,
-\family typewriter
-PTL_NOACK_REQ
-\family default
-.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-target
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A process id for the target process.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-portal
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The index in the remote Portal table.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-cookie
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The index into the access control table of the target process.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-match_bits
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The match bits to use for message selection at the target process.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-offset
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The offset into the target memory descriptor (only used when the target
- memory descriptor has the
-\family typewriter
-PTL_MD_MANAGE_REMOTE
-\family default
- option set).
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-hdr_data
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-64 bits of user data that can be included in message header.
- This data is written to an event queue entry at the target if an event
- queue is present on the matching memory descriptor.
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Subsection
-
-PtlGet
-\begin_inset LatexCommand \label{sec:get}
-
-\end_inset
-
-
-\layout LyX-Code
-
-int PtlGet( ptl_handle_md_t mem_desc,
-\newline
- ptl_process_id_t target,
-\newline
- ptl_pt_index_t portal,
-\newline
- ptl_ac_index_t cookie,
-\newline
- ptl_match_bits_t match_bits,
-\newline
- ptl_size_t offset );
-\layout Standard
-\noindent
-The
-\emph on
-PtlGet
-\emph default
- function initiates a remote read operation.
- There are two event pairs associated with a get operation , when the data
- is sent from the remote node, a
-\family typewriter
-PTL_EVENT_GET{START|END}
-\family default
- event pair is registered on the remote node; and when the data is returned
- from the remote node a
-\family typewriter
-PTL_EVENT_REPLY{START|END}
-\family default
- event pair is registered on the local node.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-
-\layout Description
-
-PTL_INV_MD Indicates that
-\family typewriter
-mem_desc
-\family default
- is not a valid memory descriptor.
-
-\layout Description
-
-PTL_INV_PROC Indicates that
-\family typewriter
-target
-\family default
- is not a valid process id.
-
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="6" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A handle for the memory descriptor that describes the memory into which
- the requested data will be received.
- The memory descriptor can have an event queue associated with it to record
- events, such as when the message receive has started (
-\family typewriter
-PTL_EVENT_REPLY
-\family default
-_
-\family typewriter
-START
-\family default
-).
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-target
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-A process id for the target process.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-portal
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The index in the remote Portal table.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-cookie
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The index into the access control table of the target process.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-match_bits
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The match bits to use for message selection at the target process.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-offset
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-input
-\end_inset
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-The offset into the target memory descriptor (only used when the target
- memory descriptor has the
-\family typewriter
-PTL_MD_MANAGE_REMOTE
-\family default
- option set).
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\layout Section
-
-Summary
-\layout Standard
-
-
-\begin_inset LatexCommand \label{sec:summary}
-
-\end_inset
-
- We conclude this section by summarizing the names introduced by the Portals
- 3.2 API.
- We start by summarizing the names of the types introduced by the API.
- This is followed by a summary of the functions introduced by the API.
- Which is followed by a summary of the function return codes.
- Finally, we conclude with a summary of the other constant values introduced
- by the API.
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:types}
-
-\end_inset
-
- presents a summary of the types defined by the Portals API.
- The first column in this table gives the type name, the second column gives
- a brief description of the type, the third column identifies the section
- where the type is defined, and the fourth column lists the functions that
- have arguments of this type.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Types Defined by the Portals 3.2 API
-\begin_inset LatexCommand \label{tab:types}
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-medskip
-\end_inset
-
-
-\layout Standard
-\noindent
-
-\size small
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="25" columns="4">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="2in">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="2.2in">
-<row bottomline="true">
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
- Name
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
- Meaning
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
- Sect
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
- Functions
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_ac_index_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-indexes for an access control table
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:index-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlACEntry, PtlPut, PtlGet
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_ack_req_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-acknowledgement request types
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlPut
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-kinds of events
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlGet
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-information about events
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:event-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlEQGet
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-plt_seq_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-event sequence number
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:event-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlEQGet, PtlEQWait
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_handle_any_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-handles for any object
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIHandle
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_handle_eq_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-handles for event queues
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlEQAlloc, PtlEQFree, PtlEQGet, PtlEQWait, PtlMDUpdate
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_handle_md_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-handles for memory descriptors
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMDAlloc, PtlMDUnlink, PtlMDUpdate, PtlMEAttach, PtlMEAttachAny, PtlMEInsert,
- PtlPut, PtlGet
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_handle_me_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-handles for match entries
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMEUnlink
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_handle_ni_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-handles for network interfaces
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIInit, PtlNIFini, PtlNIStatus, PtlNIDist, PtlEQAlloc, PtlACEntry, PtlPut,
- PtlGet
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_nid_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-node identifiers
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
- PtlGetId,PtlACEntry
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_pid_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-process identifier
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlGetId, PtlACEntry
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_uid_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-user indentifier
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlGetUid, PtlACEntry
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_ins_pos_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-insertion position (before or after)
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMEAttach, PtlMEAttachAny, PtlMEInsert
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_interface_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-identifiers for network interfaces
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ni-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIInit
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_match_bits_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-match (and ignore) bits
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:mb-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlPut, PtlGet
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_md_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-memory descriptors
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMDAttach, PtlMDUpdate
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_ni_fail_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-network interface-specific failures
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlEQGet, PtlEQWait
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_process_id_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-process identifiers
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:pid-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlGetId, PtlNIDist, PtlMEAttach, PtlMEAttachAny, PtlACEntry, PtlPut, PtlGet
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_pt_index_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-indexes for Portal tables
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:index-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMEAttach, PtlMEAttachAny, PtlACEntry
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_size_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-sizes
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:size-t}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlEQAlloc, PtlPut, PtlGet
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_sr_index_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-indexes for status registers
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:stat-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIStatus
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_sr_value_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-values in status registers
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:stat-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIStatus
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_unlink_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-unlink options
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMDAttach
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:func}
-
-\end_inset
-
- presents a summary of the functions defined by the Portals API.
- The first column in this table gives the name for the function, the second
- column gives a brief description of the operation implemented by the function,
- and the third column identifies the section where the function is defined.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Functions Defined by the Portals 3.2 API
-\begin_inset LatexCommand \label{tab:func}
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-medskip
-\end_inset
-
-
-\layout Standard
-\align center
-
-\size small
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="24" columns="3">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-Name
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Operation
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Section
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlACEntry
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- update an entry in an access control table
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ac}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlEQAlloc
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- create an event queue
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlEQGet
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- get the next event from an event queue
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlEQFree
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- release the resources for an event queue
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlEQWait
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- wait for a new event in an event queue
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlFini
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- shutdown the Portals API
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:init}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlGet
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- perform a get operation
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:datamovement}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlGetId
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- get the id for the current process
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:pid}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlInit
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- initialize the Portals API
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:init}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMDAttach
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- create a memory descriptor and attach it to a match entry
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMDBind
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- create a free-floating memory descriptor
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:mdbind}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMDUnlink
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- remove a memory descriptor from a list and release its resources
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMDUpdate
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- update a memory descriptor
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMEAttach
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-create a match entry and attach it to a Portal table
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlMEAttachAny
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-create a match entry and attach it to a free Portal table entry
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:attachany}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMEInsert
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- create a match entry and insert it in a list
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMEUnlink
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- remove a match entry from a list and release its resources
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlNIDist
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- get the distance to another process
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlNIFini
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- shutdown a network interface
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlNIHandle
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- get the network interface handle for an object
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlNIInit
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- initialize a network interface
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlNIStatus
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- read a network interface status register
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlPut
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- perform a put operation
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:datamovement}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:retcodes}
-
-\end_inset
-
- summarizes the return codes used by functions defined by the Portals API.
- All of these constants are integer values.
- The first column of this table gives the symbolic name for the constant,
- the second column gives a brief description of the value, and the third
- column identifies the functions that can return this value.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Function Return Codes for the Portals 3.2 API
-\begin_inset LatexCommand \label{tab:retcodes}
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-medskip
-\end_inset
-
-
-\layout Standard
-\align center
-
-\size small
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="27" columns="3">
-<features>
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="2.6in">
-<row bottomline="true">
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Name
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Meaning
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Functions
-\series default
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_AC_INV_INDEX
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid access control table index
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
- PtlACEntry
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EQ_DROPPED
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-at least one event has been dropped
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
- PtlEQGet, PtlWait
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EQ_EMPTY
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-no events available in an event queue
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
- PtlEQGet
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_FAIL
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-error during initialization or cleanup
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
- PtlInit, PtlFini
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_ILL_MD
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-illegal memory descriptor values
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMDAttach, PtlMDBind, PtlMDUpdate
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INIT_DUP
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-duplicate initialization of an interface
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIInit
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INIT_INV
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-initialization of an invalid interface
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIInit
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INUSE
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-the ME already has an MD
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMDAttach
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INV_ASIZE
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid access control table size
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIInit
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INV_EQ
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid event queue handle
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMDUpdate, PtlEQFree, PtlEQGet
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INV_HANDLE
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid handle
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIHandle
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INV_MD
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid memory descriptor handle
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMDUnlink, PtlMDUpdate
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INV_ME
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid match entry handle
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlMDAttach
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INV_NI
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid network interface handle
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIDist, PtlNIFini, PtlMDBind, PtlEQAlloc
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INV_PROC
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid process identifier
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIInit, PtlNIDist, PtlMEAttach, PtlMEInsert, PtlACEntry, PtlPut, PtlGet
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INV_PTINDEX
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid Portal table index
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
- PtlMEAttach
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INV_REG
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid status register
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
- PtlNIStatus
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INV_SR_INDX
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid status register index
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
- PtlNIStatus
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_ML_TOOLONG
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-match list too long
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
- PtlMEAttach, PtlMEInsert
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_MD_INUSE
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-MD has pending operations
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlMDUnlink
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_NOINIT
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-uninitialized API
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\emph on
-all
-\emph default
-, except PtlInit
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_NOSPACE
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-insufficient memory
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIInit, PtlMDAttach, PtlMDBind, PtlEQAlloc, PtlMEAttach, PtlMEInsert
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_NOUPDATE
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- no update was performed
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
- PtlMDUpdate
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_PT_FULL
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-Portal table is full
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlMEAttachAny
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_OK
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- success
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-
-\emph on
-all
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_SEGV
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-addressing violation
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent
-PtlNIInit, PtlNIStatus, PtlNIDist, PtlNIHandle, PtlMDBind, PtlMDUpdate,
- PtlEQAlloc, PtlEQGet, PtlEQWait
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:oconsts}
-
-\end_inset
-
- summarizes the remaining constant values introduced by the Portals API.
- The first column in this table presents the symbolic name for the constant,
- the second column gives a brief description of the value, the third column
- identifies the type for the value, and the fourth column identifies the
- sections in which the value is mentioned.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Other Constants Defined by the Portals 3.2 API
-\begin_inset LatexCommand \label{tab:oconsts}
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-medskip
-\end_inset
-
-
-\layout Standard
-\align center
-
-\size small
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="36" columns="5">
-<features>
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Name
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Meaning
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Base type
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Intr.
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Ref.
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_ACK_REQ
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-request an acknowledgement
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_ack_req_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EQ_NONE
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a NULL event queue handle
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_handle_eq_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset
-
-,
-\begin_inset LatexCommand \ref{sec:mdupdate}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_GET_START
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-get event start
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_GET_END
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-get event end
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_GET_FAIL
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-get event fail
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_PUT_START
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-put event start
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_PUT_END
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-put event end
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_PUT_FAIL
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-put event fail
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_REPLY_START
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-reply event start
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_REPLY_END
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-reply event end
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_REPLY_FAIL
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-reply event fail
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_ACK_START
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-acknowledgement event start
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_ACK_END
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-acknowledgement event end
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_ACK_FAIL
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-acknowledgement event fail
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_SEND_START
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-send event start
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_SEND_END
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-send event end
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_SEND_FAIL
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-send event fail
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_EVENT_UNLINK
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-unlink event
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_event_kind_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_PID_ANY
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-wildcard for process id fields
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_pid_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset
-
-,
-\begin_inset LatexCommand \ref{sec:acentry}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_NID_ANY
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-wildcard for node id fields
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_nid_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset
-
-,
-\begin_inset LatexCommand \ref{sec:acentry}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_UID_ANY
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-wildcard for user id
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_uid_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset
-
-,
-\begin_inset LatexCommand \ref{sec:acentry}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_IFACE_DEFAULT
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-default interface
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_interface_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ni-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INS_AFTER
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-insert after
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_ins_pos_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:meinsert}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_INS_BEFORE
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-insert before
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_ins_pos_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:meinsert}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_MD_ACK_DISABLE
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a flag to disable acknowledgements
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-int
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_MD_MANAGE_REMOTE
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a flag to enable the use of remote offsets
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-int
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-,
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_MD_OP_GET
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a flag to enable get operations
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-int
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_MD_OP_PUT
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a flag to enable put operations
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-int
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_MD_THRESH_INF
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-infinite threshold for a memory descriptor
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-int
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_MD_TRUNCATE
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a flag to enable truncation of a request
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-int
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_NOACK_REQ
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-request no acknowledgement
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_ack_req_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_PT_INDEX_ANY
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-wildcard for Portal indexes
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_pt_index_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:acentry}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_RETAIN
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-disable unlinking
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_unlink_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:mdattach}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_SR_DROP_COUNT
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-index for the dropped count register
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_sr_index_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:stat-type}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:nistatus}
-
-\end_inset
-
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-PTL_UNLINK
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-enable unlinking
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_unlink_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:mdattach}
-
-\end_inset
-
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Chapter
-
-The Semantics of Message Transmission
-\begin_inset LatexCommand \label{sec:semantics}
-
-\end_inset
-
-
-\layout Standard
-
-The portals API uses four types of messages: put requests, acknowledgements,
- get requests, and replies.
- In this section, we describe the information passed on the wire for each
- type of message.
- We also describe how this information is used to process incoming messages.
-\layout Section
-
-Sending Messages
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:put-wire}
-
-\end_inset
-
- summarizes the information that is transmitted for a put request.
- The first column provides a descriptive name for the information, the second
- column provides the type for this information, the third column identifies
- the source of the information, and the fourth column provides additional
- notes.
- Most information that is transmitted is obtained directly from the
-\emph on
-PtlPut
-\emph default
- operation.
- Notice that the handle for the memory descriptor used in the
-\emph on
-PtlPut
-\emph default
- operation is transmitted even though this value cannot be interpreted by
- the target.
- A value of anything other than
-\family typewriter
-PTL_MD_NONE
-\family default
-, is interpreted as a request for an acknowledgement.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Information Passed in a Put Request
-\begin_inset LatexCommand \label{tab:put-wire}
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-medskip
-\end_inset
-
-
-\layout Standard
-\align center
-
-\size small
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="12" columns="4">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Information
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Type
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-\emph on
-PtlPut
-\emph default
- arg
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Notes
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-operation
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-int
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-indicates a put request
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-initiator
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_process_id_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-local information
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-user
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_uid_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-local information
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-target
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_process_id_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-target
-\family default
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-portal index
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_pt_index_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-portal
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-cookie
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_ac_index_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-cookie
-\family default
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-match bits
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_match_bits_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-match_bits
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-offset
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_size_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-offset
-\family default
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-memory desc
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_handle_md_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\family default
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-no ack if
-\family typewriter
-PTL_MD_NONE
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-length
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_size_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-length
-\family default
- member
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-data
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family roman
-\emph on
-bytes
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-start
-\family default
- and
-\family typewriter
-length
-\family default
- members
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:ack-wire}
-
-\end_inset
-
- summarizes the information transmitted in an acknowledgement.
- Most of the information is simply echoed from the put request.
- Notice that the initiator and target are obtained directly from the put
- request, but are swapped in generating the acknowledgement.
- The only new piece of information in the acknowledgement is the manipulated
- length which is determined as the put request is satisfied.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Information Passed in an Acknowledgement
-\begin_inset LatexCommand \label{tab:ack-wire}
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-medskip
-\end_inset
-
-
-\layout Standard
-\align center
-
-\size small
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="10" columns="4">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Information
-\series default
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Type
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Put Information
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Notes
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-operation
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-int
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- indicates an acknowledgement
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- initiator
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_process_id_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- target
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- target
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_process_id_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- initiator
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- portal index
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_pt_index_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- portal index
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- echo
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- match bits
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_match_bits_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- match bits
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- echo
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- offset
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_size_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- offset
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- echo
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- memory desc
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
- ptl_handle_md_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- memory desc
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- echo
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- requested length
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
- ptl_size_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- length
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- echo
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- manipulated length
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
- ptl_size_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- obtained from the operation
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:get-wire}
-
-\end_inset
-
- summarizes the information that is transmitted for a get request.
- Like the information transmitted in a put request, most of the information
- transmitted in a get request is obtained directly from the
-\emph on
-PtlGet
-\emph default
- operation.
- Unlike put requests, get requests do not include the event queue handle.
- In this case, the reply is generated whenever the operation succeeds and
- the memory descriptor must not be unlinked until the reply is received.
- As such, there is no advantage to explicitly sending the event queue handle.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Information Passed in a Get Request
-\begin_inset LatexCommand \label{tab:get-wire}
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-medskip
-\end_inset
-
-
-\layout Standard
-\align center
-
-\size small
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="11" columns="4">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Information
-\series default
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Type
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-\emph on
-PtlGet
-\emph default
- argument
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Notes
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-operation
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-int
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-indicates a get operation
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-initiator
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_process_id_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-local information
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-user
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_uid_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-local information
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-target
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_process_id_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-target
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-portal index
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_pt_index_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-portal
-\family default
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-cookie
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_ac_index_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-cookie
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-match bits
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_match_bits_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-match_bits
-\family default
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-offset
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_size_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-offset
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-memory desc
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_handle_md_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\family default
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-length
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_size_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-mem_desc
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-length
-\family default
- member
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:reply-wire}
-
-\end_inset
-
- summarizes the information transmitted in a reply.
- Like an acknowledgement, most of the information is simply echoed from
- the get request.
- The initiator and target are obtained directly from the get request, but
- are swapped in generating the acknowledgement.
- The only new information in the acknowledgement are the manipulated length
- and the data, which are determined as the get request is satisfied.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Information Passed in a Reply
-\begin_inset LatexCommand \label{tab:reply-wire}
-
-\end_inset
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash
-medskip
-\end_inset
-
-
-\layout Standard
-\align center
-
-\size small
-
-\begin_inset Tabular
-<lyxtabular version="3" rows="11" columns="4">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Information
-\series default
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Type
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Put Information
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold
-Notes
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-operation
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-int
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-indicates an acknowledgement
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-initiator
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_process_id_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-target
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-target
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_process_id_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-initiator
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-portal index
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_pt_index_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-portal index
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-echo
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-match bits
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_match_bits_t
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-match bits
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-echo
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-offset
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_size_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-offset
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-echo
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-memory desc
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_handle_md_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-memory desc
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-echo
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-requested length
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_size_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-length
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-echo
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-manipulated length
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter
-ptl_size_t
-\family default
-
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-obtained from the operation
-\end_inset
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-data
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\emph on
-bytes
-\end_inset
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-obtained from the operation
-\end_inset
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Section
-
-Receiving Messages
-\begin_inset LatexCommand \label{sec:receiving}
-
-\end_inset
-
-
-\layout Standard
-
-When an incoming message arrives on a network interface, the communication
- system first checks that the target process identified in the request is
- a valid process that has initialized the network interface (i.e., that the
- target process has a valid Portal table).
- If this test fails, the communication system discards the message and increment
-s the dropped message count for the interface.
- The remainder of the processing depends on the type of the incoming message.
- Put and get messages are subject to access control checks and translation
- (searching a match list), while acknowledgement and reply messages bypass
- the access control checks and the translation step.
-\layout Standard
-
-Acknowledgement messages include a handle for the memory descriptor used
- in the original
-\emph on
-PtlPut
-\emph default
- operation.
- This memory descriptor will identify the event queue where the event should
- be recorded.
- Upon receipt of an acknowledgement, the runtime system only needs to confirm
- that the memory descriptor and event queue still exist and that there is
- space for another event.
- Should the any of these conditions fail, the message is simply discarded
- and the dropped message count for the interface is incremented.
- Otherwise, the system builds an acknowledgement event from the information
- in the acknowledgement message and adds it to the event queue.
-\layout Standard
-
-Reception of reply messages is also relatively straightforward.
- Each reply message includes a handle for a memory descriptor.
- If this descriptor exists, it is used to receive the message.
- A reply message will be dropped if the memory descriptor identified in
- the request doesn't exist.
- In either of this case, the dropped message count for the interface is
- incremented.
- These are the only reasons for dropping reply messages.
- Every memory descriptor accepts and truncates incoming reply messages,
- eliminating the other potential reasons for rejecting a reply message.
-\layout Standard
-
-The critical step in processing an incoming put or get request involves
- mapping the request to a memory descriptor.
- This step starts by using the Portal index in the incoming request to identify
- a list of match entries.
- This list of match entries is searched in order until a match entry is
- found whose match criteria matches the match bits in the incoming request
- and whose memory descriptor accepts the request.
-\layout Standard
-
-Because acknowledge and reply messages are generated in response to requests
- made by the process receiving these messages, the checks performed by the
- runtime system for acknowledgements and replies are minimal.
- In contrast, put and get messages are generated by remote processes and
- the checks performed for these messages are more extensive.
- Incoming put or get messages may be rejected because:
-\layout Itemize
-
-the Portal index supplied in the request is not valid;
-\layout Itemize
-
-the cookie supplied in the request is not a valid access control entry;
-
-\layout Itemize
-
-the access control entry identified by the cookie does not match the identifier
- of the requesting process;
-\layout Itemize
-
-the access control entry identified by the access control entry does not
- match the Portal index supplied in the request; or
-\layout Itemize
-
-the match bits supplied in the request do not match any of the match entries
- with a memory descriptor that accepts the request.
-
-\layout Standard
-
-In all cases, if the message is rejected, the incoming message is discarded
- and the dropped message count for the interface is incremented.
-\layout Standard
-
-A memory descriptor may reject an incoming request for any of the following
- reasons:
-\layout Itemize
-
-the
-\family typewriter
-PTL_MD_PUT
-\family default
- or
-\family typewriter
-PTL_MD_GET
-\family default
- option has not been enabled and the operation is put or get, respectively;
-
-\layout Itemize
-
-the length specified in the request is too long for the memory descriptor
- and the
-\family typewriter
-PTL_MD_TRUNCATE
-\family default
- option has not been enabled.
-\layout Chapter
-
-Examples
-\begin_inset LatexCommand \label{sec:examples}
-
-\end_inset
-
-
-\layout Comment
-
-The examples presented in this chapter have not been updated to reflect
- the current API.
-\layout Standard
-
-In this section we present several example to illustrate expected usage
- patterns for the Portals 3.2 API.
- The first example describes how to implement parallel servers using the
- features of the Portals 3.2 API.
- This example covers the access control list and the use of remote managed
- offsets.
- The second example presents an approach to dealing with dropped requests.
- This example covers aspects of match lists and memory descriptors.
- The final example covers message reception in MPI.
- This example illustrates more sophisticated uses of matching and a procedure
- to update a memory descriptor.
-\layout Section
-
-Parallel File Servers
-\begin_inset LatexCommand \label{sec:expfs}
-
-\end_inset
-
-
-\layout Standard
-
-Figure\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:file}
-
-\end_inset
-
- illustrates the logical structure of a parallel file server.
- In this case, the parallel server consists of four servers that stripe
- application data across four disks.
- We would like to present applications with the illusion that the file server
- is a single entity.
- We will assume that all of the processes that constitute the parallel server
- have the same user id.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center
-
-\begin_inset Graphics FormatVersion 1
- filename file.eps
- display color
- size_type 0
- rotateOrigin center
- lyxsize_type 1
- lyxwidth 196pt
- lyxheight 147pt
-\end_inset
-
-
-\layout Caption
-
-Parallel File Server
-\begin_inset LatexCommand \label{fig:file}
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-When an application establishes a connection to the parallel file server,
- it will allocate a Portal and access control list entry for communicating
- with the server.
- The access control list entry will include the Portal and match any process
- in the parallel file server's, so all of the file server processes will
- have access to the portal.
- The Portal information and access control entry will be sent to the file
- server at this time.
- If the application and server need to have multiple, concurrent I/O operations,
- they can use additional portals or match entries to keep the operations
- from interfering with one another.
-\layout Standard
-
-When an application initiates an I/O operation, it first builds a memory
- descriptor that describes the memory region involved in the operation.
- This memory descriptor will enable the appropriate operation (put for read
- operations and get for write operations) and enable the use of remote offsets
- (this lets the servers decide where their data should be placed in the
- memory region).
- After creating the memory descriptor and linking it into the appropriate
- Portal entry, the application sends a read or write request (using
-\emph on
-PtlPut
-\emph default
-) to one of the file server processes.
- The file server processes can then use put or get operations with the appropria
-te offsets to fill or retrieve the contents of the application's buffer.
- To know when the operation has completed, the application can add an event
- queue to the memory descriptor and add up the lengths of the remote operations
- until the sum is the size of the requested I/O operation.
-\layout Section
-
-Dealing with Dropped Requests
-\begin_inset LatexCommand \label{sec:exdrop}
-
-\end_inset
-
-
-\layout Standard
-
-If a process does not anticipate unexpected requests, they will be discarded.
- Applications using the Portals API can query the dropped count for the
- interface to determine the number of requests that have been dropped (see
- Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:nistatus}
-
-\end_inset
-
-).
- While this approach minimizes resource consumption, it does not provide
- information that might be critical in debugging the implementation of a
- higher level protocol.
-\layout Standard
-
-To keep track of more information about dropped requests, we use a memory
- descriptor that truncates each incoming request to zero bytes and logs
- the
-\begin_inset Quotes eld
-\end_inset
-
-dropped
-\begin_inset Quotes erd
-\end_inset
-
- operations in an event queue.
- Note that the operations are not dropped in the Portals sense, because
- the operation succeeds.
-\layout Standard
-
-The following code fragment illustrates an implementation of this approach.
- In this case, we assume that a thread is launched to execute the function
-
-\family typewriter
-watch_drop
-\family default
-.
- This code starts by building an event queue to log truncated operations
- and a memory descriptor to truncate the incoming requests.
- This example only captures
-\begin_inset Quotes eld
-\end_inset
-
-dropped
-\begin_inset Quotes erd
-\end_inset
-
- requests for a single portal.
- In a more realistic situation, the memory descriptor would be appended
- to the match list for every portal.
- We also assume that the thread is capable of keeping up with the
-\begin_inset Quotes eld
-\end_inset
-
-dropped
-\begin_inset Quotes erd
-\end_inset
-
- requests.
- If this is not the case, we could use a finite threshold on the memory
- descriptor to capture the first few dropped requests.
-\layout LyX-Code
-
-
-\size small
-#include <stdio.h>
-\newline
-#include <stdlib.h>
-\newline
-#include <portals.h>
-\newline
-
-\newline
-#define DROP_SIZE 32 /* number of dropped requests to track */
-\newline
-
-\newline
-int watch_drop( ptl_handle_ni_t ni, ptl_pt_index_t index ) {
-\newline
- ptl_handle_eq_t drop_events;
-\newline
- ptl_event_t event;
-\newline
- ptl_handle_md_t drop_em;
-\newline
- ptl_md_t drop_desc;
-\newline
- ptl_process_id_t any_proc;
-\newline
- ptl_handle_me_t match_any;
-\newline
-
-\newline
- /* create the event queue */
-\newline
- if( PtlEQAlloc(ni, DROP_SIZE, &drop_events) != PTL_OK ) {
-\newline
- fprintf( stderr, "Couldn't create the event queue
-\backslash
-n" );
-\newline
- exit( 1 );
-\newline
- }
-\newline
-
-\newline
- /* build a match entry */
-\newline
- any_proc.nid = PTL_ID_ANY;
-\newline
- any_proc.pid = PTL_ID_ANY;
-\newline
- PtlMEAttach( index, any_proc, 0, ~(ptl_match_bits_t)0, PTL_RETAIN,
-\newline
- &match_any );
-\newline
-
-\newline
- /* create the memory descriptor */
-\newline
- drop_desc.start = NULL;
-\newline
- drop_desc.length = 0;
-\newline
- drop_desc.threshold = PTL_MD_THRESH_INF;
-\newline
- drop_desc.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_TRUNCATE;
-\newline
- drop_desc.user_ptr = NULL;
-\newline
- drop_desc.eventq = drop_events;
-\newline
- if( PtlMDAttach(match_any, drop_desc, &drop_em) != PTL_OK ) {
-\newline
- fprintf( stderr, "Couldn't create the memory descriptor
-\backslash
-n" );
-\newline
- exit( 1 );
-\newline
- }
-\newline
-
-\newline
- /* watch for "dropped" requests */
-\newline
- while( 1 ) {
-\newline
- if( PtlEQWait( drop_events, &event ) != PTL_OK ) break;
-\newline
- fprintf( stderr, "Dropped request from gid = event.initiator.gid,
- event.initiator.rid );
-\newline
- }
-\newline
-}
-\layout Section
-
-Message Transmission in MPI
-\begin_inset LatexCommand \label{sec:exmpi}
-
-\end_inset
-
-
-\layout Standard
-
-We conclude this section with a fairly extensive example that describes
- an approach to implementing message transmission for MPI.
- Like many MPI implementations, we distinguish two message transmission
- protocols: a short message protocol and a long message protocol.
- We use the constant
-\family typewriter
-MPI_LONG_LENGTH
-\family default
- to determine the size of a long message.
-\layout Standard
-
-For small messages, the sender simply sends the message and presumes that
- the message will be received (i.e., the receiver has allocated a memory region
- to receive the message body).
- For large messages, the sender also sends the message, but does not presume
- that the message body will be saved.
- Instead, the sender builds a memory descriptor for the message and enables
- get operations on this descriptor.
- If the target does not save the body of the message, it will record an
- event for the put operation.
- When the process later issues a matching MPI receive, it will perform a
- get operation to retrieve the body of the message.
-\layout Standard
-
-To facilitate receive side matching based on the protocol, we use the most
- significant bit in the match bits to indicate the protocol: 1 for long
- messages and 0 for short messages.
-\layout Standard
-
-The following code presents a function that implements the send side of
- the protocol.
- The global variable
-\family typewriter
-EndGet
-\family default
- is the last match entry attached to the Portal index used for posting long
- messages.
- This entry does not match any incoming requests (i.e., the memory descriptor
- rejects all get operations) and is built during initialization of the MPI
- library.
- The other global variable,
-\family typewriter
-MPI_NI
-\family default
-, is a handle for the network interface used by the MPI implementation.
-\layout LyX-Code
-
-
-\size small
-extern ptl_handle_me_t EndGet;
-\newline
-extern ptl_handle_ni_t MPI_NI;
-\newline
-
-\newline
-void MPIsend( void *buf, ptl_size_t len, void *data, ptl_handle_eq_t eventq,
-\newline
- ptl_process_id target, ptl_match_bits_t match )
-\newline
-{
-\newline
- ptl_handle_md_t send_handle;
-\newline
- ptl_md_t mem_desc;
-\newline
- ptl_ack_req_t want_ack;
-\newline
-
-\newline
- mem_desc.start = buf;
-\newline
- mem_desc.length = len;
-\newline
- mem_desc.threshold = 1;
-\newline
- mem_desc.options = PTL_MD_GET_OP;
-\newline
- mem_desc.user_ptr = data;
-\newline
- mem_desc.eventq = eventq;
-\newline
-
-\newline
- if( len >= MPI_LONG_LENGTH ) {
-\newline
- ptl_handle_me_t me_handle;
-\newline
-
-\newline
- /* add a match entry to the end of the get list */
-\newline
- PtlMEInsert( target, match, 0, PTL_UNLINK, PTL_INS_BEFORE, EndGet,
- &me_handle );
-\newline
- PtlMDAttach( me_handle, mem_desc, PTL_UNLINK, NULL );
-\newline
-
-\newline
- /* we want an ack for long messages */
-\newline
- want_ack = PTL_ACK_REQ;
-\newline
-
-\newline
- /* set the protocol bit to indicate that this is a long message
- */
-\newline
- match |= 1<<63;
-\newline
- } else {
-\newline
- /* we don't want an ack for short messages */
-\newline
- want_ack = PTL_ACK_REQ;
-\newline
-
-\newline
- /* set the protocol bit to indicate that this is a short message
- */
-\newline
- match &= ~(1<<63);
-\newline
- }
-\newline
-
-\newline
- /* create a memory descriptor and send it */
-\newline
- PtlMDBind( MPI_NI, mem_desc, &send_handle );
-\newline
- PtlPut( send_handle, want_ack, target, MPI_SEND_PINDEX, MPI_AINDEX, match,
- 0 );
-\newline
-}
-\layout Standard
-
-The
-\emph on
-MPISend
-\emph default
- function returns as soon as the message has been scheduled for transmission.
- The event queue argument,
-\family typewriter
-eventq
-\family default
-, can be used to determine the disposition of the message.
- Assuming that
-\family typewriter
-eventq
-\family default
- is not
-\family typewriter
-PTL_EQ_NONE
-\family default
-, a
-\family typewriter
-PTL_EVENT_SENT
-\family default
- event will be recorded for each message as the message is transmitted.
- For small messages, this is the only event that will be recorded in
-\family typewriter
-eventq
-\family default
-.
- In contrast, long messages include an explicit request for an acknowledgement.
- If the
-\family typewriter
-target
-\family default
- process has posted a matching receive, the acknowledgement will be sent
- as the message is received.
- If a matching receive has not been posted, the message will be discarded
- and no acknowledgement will be sent.
- When the
-\family typewriter
-target
-\family default
- process later issues a matching receive, the receive will be translated
- into a get operation and a
-\family typewriter
-PTL_EVENT_GET
-\family default
- event will be recorded in
-\family typewriter
-eventq
-\family default
-.
-\layout Standard
-
-Figure\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:mpi}
-
-\end_inset
-
- illustrates the organization of the match list used for receiving MPI messages.
- The initial entries (not shown in this figure) would be used to match the
- MPI receives that have been preposted by the application.
- The preposted receives are followed by a match entry,
-\emph on
-RcvMark
-\emph default
-, that marks the boundary between preposted receives and the memory descriptors
- used for
-\begin_inset Quotes eld
-\end_inset
-
-unexpected
-\begin_inset Quotes erd
-\end_inset
-
- messages.
- The
-\emph on
-RcvMark
-\emph default
- entry is followed by a small collection of match entries that match unexpected
-
-\begin_inset Quotes eld
-\end_inset
-
-short
-\begin_inset Quotes erd
-\end_inset
-
- messages, i.e., messages that have a 0 in the most significant bit of their
- match bits.
- The memory descriptors associated with these match entries will append
- the incoming message to the associated memory descriptor and record an
- event in an event queue for unexpected messages.
- The unexpected short message matching entries are followed by a match entry
- that will match messages that were not matched by the preceding match entries,
- i.e., the unexpected long messages.
- The memory descriptor associated with this match entry truncates the message
- body and records an event in the event queue for unexpected messages.
- Note that of the memory descriptors used for unexpected messages share
- a common event queue.
- This makes it possible to process the unexpected messages in the order
- in which they arrived, regardless of.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center
-
-\begin_inset Graphics FormatVersion 1
- filename mpi.eps
- display color
- size_type 0
- rotateOrigin center
- lyxsize_type 1
- lyxwidth 389pt
- lyxheight 284pt
-\end_inset
-
-
-\layout Caption
-
-Message Reception in MPI
-\begin_inset LatexCommand \label{fig:mpi}
-
-\end_inset
-
-
-\end_inset
-
-
-\layout Standard
-
-When the local MPI process posts an MPI receive, we must first search the
- events unexpected message queue to see if a matching message has already
- arrived.
- If no matching message is found, a match entry for the receive is inserted
- before the
-\emph on
-RcvMark
-\emph default
- entry--after the match entries for all of the previously posted receives
- and before the match entries for the unexpected messages.
- This ensures that preposted receives are matched in the order that they
- were posted (a requirement of MPI).
-
-\layout Standard
-
-While this strategy respects the temporal semantics of MPI, it introduces
- a race condition: a matching message might arrive after the events in the
- unexpected message queue have been searched, but before the match entry
- for the receive has been inserted in the match list.
-
-\layout Standard
-
-To avoid this race condition we start by setting the
-\family typewriter
-threshold
-\family default
- of the memory descriptor to 0, making the descriptor inactive.
- We then insert the match entry into the match list and proceed to search
- the events in the unexpected message queue.
- A matching message that arrives as we are searching the unexpected message
- queue will not be accepted by the memory descriptor and, if not matched
- by an earlier match list element, will add an event to the unexpected message
- queue.
- After searching the events in the unexpected message queue, we update the
- memory descriptor, setting the threshold to 1 to activate the memory descriptor.
- This update is predicated by the condition that the unexpected message
- queue is empty.
- We repeat the process of searching the unexpected message queue until the
- update succeeds.
-\layout Standard
-
-The following code fragment illustrates this approach.
- Because events must be removed from the unexpected message queue to be
- examined, this code fragment assumes the existence of a user managed event
- list,
-\family typewriter
-Rcvd
-\family default
-, for the events that have already been removed from the unexpected message
- queue.
- In an effort to keep the example focused on the basic protocol, we have
- omitted the code that would be needed to manage the memory descriptors
- used for unexpected short messages.
- In particular, we simply leave messages in these descriptors until they
- are received by the application.
- In a robust implementation, we would introduce code to ensure that short
- unexpected messages are removed from these memory descriptors so that they
- can be re-used.
-\layout LyX-Code
-
-
-\size small
-extern ptl_handle_eq_t UnexpQueue;
-\newline
-extern ptl_handle_me_t RcvMark;
-\newline
-extern ptl_handle_me_t ShortMatch;
-\newline
-
-\newline
-typedef struct event_list_tag {
-\newline
- ptl_event_t event;
-\newline
- struct event_list_tag* next;
-\newline
-} event_list;
-\newline
-
-\newline
-extern event_list Rcvd;
-\newline
-
-\newline
-void AppendRcvd( ptl_event_t event )
-\newline
-{
-\newline
- /* append an event onto the Rcvd list */
-\newline
-}
-\newline
-
-\newline
-int SearchRcvd( void *buf, ptl_size_t len, ptl_process_id_t sender, ptl_match_bi
-ts_t match,
-\newline
- ptl_match_bits_t ignore, ptl_event_t *event )
-\newline
-{
-\newline
- /* Search the Rcvd event queue, looking for a message that matches the
- requested message.
-\newline
- * If one is found, remove the event from the Rcvd list and return it.
- */
-\newline
-}
-\newline
-
-\newline
-typedef enum { RECEIVED, POSTED } receive_state;
-\newline
-
-\newline
-receive_state CopyMsg( void *buf, ptl_size_t &length, ptl_event_t event,
- ptl_md_t md_buf )
-\newline
-{
-\newline
- ptl_md_t md_buf;
-\newline
- ptl_handle_me_t me_handle;
-\newline
-
-\newline
- if( event.rlength >= MPI_LONG_LENGTH ) {
-\newline
- PtlMDBind( MPI_NI, md_buf, &md_handle );
-\newline
- PtlGet( event.initiator, MPI_GET_PINDEX, 0, event.match_bits, MPI_AINDEX,
- md_handle );
-\newline
- return POSTED;
-\newline
- } else {
-\newline
- /* copy the message */
-\newline
- if( event.mlength < *length ) *length = event.mlength;
-\newline
- memcpy( buf, (char*)event.md_desc.start+event.offset, *length );
-\newline
- return RECEIVED;
-\newline
- }
-\newline
-}
-\newline
-
-\newline
-receive_state MPIreceive( void *buf, ptl_size_t &len, void *MPI_data, ptl_handle
-_eq_t eventq,
-\newline
- ptl_process_id_t sender, ptl_match_bits_t match,
- ptl_match_bits_t ignore )
-\newline
-{
-\newline
- ptl_md_t md_buf;
-\newline
- ptl_handle_md_t md_handle;
-\newline
- ptl_handle_me_t me_handle;
-\newline
- ptl_event_t event;
-\newline
-
-\newline
- /* build a memory descriptor for the receive */
-\newline
- md_buf.start = buf;
-\newline
- md_buf.length = *len;
-\newline
- md_buf.threshold = 0; /* temporarily disabled */
-\newline
- md_buf.options = PTL_MD_PUT_OP;
-\newline
- md_buf.user_ptr = MPI_data;
-\newline
- md_buf.eventq = eventq;
-\newline
-
-\newline
- /* see if we have already received the message */
-\newline
- if( SearchRcvd(buf, len, sender, match, ignore, &event) )
-\newline
- return CopyMsg( buf, len, event, md_buf );
-\newline
-
-\newline
- /* create the match entry and attach the memory descriptor */
-\newline
- PtlMEInsert(sender, match, ignore, PTL_UNLINK, PTL_INS_BEFORE, RcvMark,
- &me_handle);
-\newline
- PtlMDAttach( me_handle, md_buf, PTL_UNLINK, &md_handle );
-\newline
-
-\newline
- md_buf.threshold = 1;
-\newline
- do
-\newline
- if( PtlEQGet( UnexpQueue, &event ) != PTL_EQ_EMPTY ) {
-\newline
- if( MPIMatch(event, match, ignore, sender) ) {
-\newline
- return CopyMsg( buf, len, (char*)event.md_desc.start+event.offset,
- md_buf );
-\newline
- } else {
-\newline
- AppendRcvd( event );
-\newline
- }
-\newline
- }
-\newline
- while( PtlMDUpdate(md_handle, NULL, &md_buf, unexp_queue) == PTL_NOUPDATE
- );
-\newline
- return POSTED;
-\newline
-}
-\layout Chapter*
-
-Acknowledgments
-\layout Standard
-
-Several people have contributed to the philosophy, design, and implementation
- of the Portals message passing architecture as it has evolved.
- We acknowledge the following people for their contributions: Al Audette,
- Lee Ann Fisk, David Greenberg, Tramm Hudson, Gabi Istrail, Chu Jong, Mike
- Levenhagen, Jim Otto, Mark Sears, Lance Shuler, Mack Stallcup, Jeff VanDyke,
- Dave van Dresser, Lee Ward, and Stephen Wheat.
-
-\layout Standard
-
-
-\begin_inset LatexCommand \BibTeX[ieee]{portals3}
-
-\end_inset
-
-
-\the_end
+++ /dev/null
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter
-100.00
-Single
--2
-1200 2
-6 1350 900 2175 1200
-4 0 0 100 0 0 10 0.0000 0 105 825 1350 1200 Transmission\001
-4 0 0 100 0 0 10 0.0000 0 105 285 1620 1050 Data\001
--6
-2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 2700 1275 2700 1725
-2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 900 525 2700 1200
-2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5
- 0 300 1200 300 1200 2250 0 2250 0 300
-2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5
- 2400 300 3600 300 3600 2250 2400 2250 2400 300
-2 1 1 1 0 7 100 0 -1 4.000 0 0 7 1 0 2
- 0 0 1.00 60.00 120.00
- 2699 1788 899 1938
-4 0 0 100 0 0 10 0.0000 0 105 720 2775 1650 Translation\001
-4 1 0 100 0 0 10 0.0000 0 135 555 1800 2025 Optional\001
-4 1 0 100 0 0 10 0.0000 0 135 1170 1800 2175 Acknowledgement\001
-4 0 0 100 0 0 10 0.0000 0 105 405 2850 1500 Portal\001
-4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001
-4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001
+++ /dev/null
-config.h
-stamp-h
-stamp-h1
-stamp-h.in
-Makefile
-Makefile.in
+++ /dev/null
-SUBDIRS = libcfs lnet
-
-EXTRA_DIST = cygwin-ioctl.h
+++ /dev/null
-/*
- * linux/ioctl.h for Linux by H.H. Bergman.
- */
-
-#ifndef _ASMI386_IOCTL_H
-#define _ASMI386_IOCTL_H
-
-/* ioctl command encoding: 32 bits total, command in lower 16 bits,
- * size of the parameter structure in the lower 14 bits of the
- * upper 16 bits.
- * Encoding the size of the parameter structure in the ioctl request
- * is useful for catching programs compiled with old versions
- * and to avoid overwriting user space outside the user buffer area.
- * The highest 2 bits are reserved for indicating the ``access mode''.
- * NOTE: This limits the max parameter size to 16kB -1 !
- */
-
-/*
- * The following is for compatibility across the various Linux
- * platforms. The i386 ioctl numbering scheme doesn't really enforce
- * a type field. De facto, however, the top 8 bits of the lower 16
- * bits are indeed used as a type field, so we might just as well make
- * this explicit here. Please be sure to use the decoding macros
- * below from now on.
- */
-#undef _IO
-#undef _IOR
-#undef _IOW
-#undef _IOC
-#undef IOC_IN
-#undef IOC_OUT
-
-#define _IOC_NRBITS 8
-#define _IOC_TYPEBITS 8
-#define _IOC_SIZEBITS 14
-#define _IOC_DIRBITS 2
-
-#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
-#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1)
-#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1)
-#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1)
-
-#define _IOC_NRSHIFT 0
-#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
-#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
-#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS)
-
-/*
- * Direction bits.
- */
-#define _IOC_NONE 0U
-#define _IOC_WRITE 1U
-#define _IOC_READ 2U
-
-#define _IOC(dir,type,nr,size) \
- (((dir) << _IOC_DIRSHIFT) | \
- ((type) << _IOC_TYPESHIFT) | \
- ((nr) << _IOC_NRSHIFT) | \
- ((size) << _IOC_SIZESHIFT))
-
-/* used to create numbers */
-#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0)
-#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size))
-#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
-#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
-
-/* used to decode ioctl numbers.. */
-#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
-#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
-#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
-#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
-
-/* ...and for the drivers/sound files... */
-
-#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT)
-#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT)
-#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
-#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT)
-#define IOCSIZE_SHIFT (_IOC_SIZESHIFT)
-
-#endif /* _ASMI386_IOCTL_H */
+++ /dev/null
-Makefile.in
-Makefile
+++ /dev/null
-SUBDIRS := linux
-if DARWIN
-SUBDIRS += darwin
-endif
-DIST_SUBDIRS := $(SUBDIRS)
-
-EXTRA_DIST := curproc.h kp30.h libcfs.h list.h lltrace.h \
- portals_utils.h types.h user-lock.h user-prim.h user-time.h \
- user-tcpip.h user-bitops.h bitmap.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2007 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-#ifndef _LIBCFS_BITMAP_H_
-#define _LIBCFS_BITMAP_H_
-
-
-typedef struct {
- int size;
- unsigned long data[0];
-} bitmap_t;
-
-#define CFS_BITMAP_SIZE(nbits) \
- (((nbits/BITS_PER_LONG)+1)*sizeof(long)+sizeof(bitmap_t))
-
-static inline
-bitmap_t *ALLOCATE_BITMAP(int size)
-{
- bitmap_t *ptr;
-
- OBD_ALLOC(ptr, CFS_BITMAP_SIZE(size));
- if (ptr == NULL)
- RETURN(ptr);
-
- ptr->size = size;
-
- RETURN (ptr);
-}
-
-#define FREE_BITMAP(ptr) OBD_FREE(ptr, CFS_BITMAP_SIZE(ptr->size))
-
-static inline
-void cfs_bitmap_set(bitmap_t *bitmap, int nbit)
-{
- set_bit(nbit, bitmap->data);
-}
-
-static inline
-void cfs_bitmap_clear(bitmap_t *bitmap, int nbit)
-{
- clear_bit(nbit, bitmap->data);
-}
-
-static inline
-int cfs_bitmap_check(bitmap_t *bitmap, int nbit)
-{
- return test_bit(nbit, bitmap->data);
-}
-
-/* return 0 is bitmap has none set bits */
-static inline
-int cfs_bitmap_check_empty(bitmap_t *bitmap)
-{
- return find_first_bit(bitmap->data, bitmap->size) == bitmap->size;
-}
-
-#define cfs_foreach_bit(bitmap, pos) \
- for((pos)=find_first_bit((bitmap)->data, bitmap->size); \
- (pos) < (bitmap)->size; \
- (pos) = find_next_bit((bitmap)->data, (bitmap)->size, (pos)))
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Lustre curproc API declaration
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation. Lustre is distributed in the hope that it will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
- * Public License for more details. You should have received a copy of the GNU
- * General Public License along with Lustre; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#ifndef __LIBCFS_CURPROC_H__
-#define __LIBCFS_CURPROC_H__
-
-#ifdef __KERNEL__
-/*
- * Portable API to access common characteristics of "current" UNIX process.
- *
- * Implemented in portals/include/libcfs/<os>/
- */
-uid_t cfs_curproc_uid(void);
-gid_t cfs_curproc_gid(void);
-uid_t cfs_curproc_fsuid(void);
-gid_t cfs_curproc_fsgid(void);
-pid_t cfs_curproc_pid(void);
-int cfs_curproc_groups_nr(void);
-int cfs_curproc_is_in_groups(gid_t group);
-void cfs_curproc_groups_dump(gid_t *array, int size);
-mode_t cfs_curproc_umask(void);
-char *cfs_curproc_comm(void);
-
-
-/*
- * Plus, platform-specific constant
- *
- * CFS_CURPROC_COMM_MAX,
- *
- * and opaque scalar type
- *
- * cfs_kernel_cap_t
- */
-cfs_kernel_cap_t cfs_curproc_cap_get(void);
-void cfs_curproc_cap_set(cfs_kernel_cap_t cap);
-#endif
-
-/* __LIBCFS_CURPROC_H__ */
-#endif
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-Makefile.in
-Makefile
+++ /dev/null
-EXTRA_DIST := darwin-mem.h darwin-types.h libcfs.h portals_utils.h \
- darwin-fs.h darwin-prim.h darwin-utils.h lltrace.h \
- darwin-lock.h darwin-sync.h darwin-tcpip.h kp30.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Implementation of standard file system interfaces for XNU kernel.
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-#ifndef __LIBCFS_DARWIN_FS_H__
-#define __LIBCFS_DARWIN_FS_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-
-#include <sys/types.h>
-#include <sys/systm.h>
-
-#include <sys/kernel.h>
-#include <sys/file.h>
-#include <sys/time.h>
-#include <sys/filedesc.h>
-#include <sys/mount.h>
-#include <sys/stat.h>
-#include <sys/sysctl.h>
-#include <sys/ubc.h>
-#include <sys/mbuf.h>
-#include <sys/namei.h>
-#include <sys/fcntl.h>
-#include <sys/lockf.h>
-#include <stdarg.h>
-
-#include <mach/mach_types.h>
-#include <mach/time_value.h>
-#include <kern/clock.h>
-#include <sys/param.h>
-#include <IOKit/system.h>
-
-#include <libcfs/darwin/darwin-types.h>
-#include <libcfs/darwin/darwin-lock.h>
-#include <libcfs/darwin/darwin-mem.h>
-#include <libcfs/list.h>
-
-/*
- * File operating APIs in kernel
- */
-#ifdef __DARWIN8__
-/*
- * Kernel file descriptor
- */
-typedef struct cfs_kern_file {
- int f_flags;
- vnode_t f_vp;
- vfs_context_t f_ctxt;
-} cfs_file_t;
-
-#else
-
-typedef struct file cfs_file_t;
-
-#endif
-
-int kern_file_size(cfs_file_t *fp, off_t *size);
-#define cfs_filp_size(fp) \
- ({ \
- off_t __size; \
- kern_file_size((fp), &__size); \
- __size; \
- })
-#define cfs_filp_poff(fp) (NULL)
-
-cfs_file_t *kern_file_open(const char *name, int flags, int mode, int *err);
-int kern_file_close(cfs_file_t *fp);
-int kern_file_read(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos);
-int kern_file_write(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos);
-int kern_file_sync(cfs_file_t *fp);
-
-#define cfs_filp_open(n, f, m, e) kern_file_open(n, f, m, e)
-#define cfs_filp_close(f) kern_file_close(f)
-#define cfs_filp_read(f, b, n, p) kern_file_read(f, b, n, p)
-#define cfs_filp_write(f, b, n, p) kern_file_write(f, b, n, p)
-#define cfs_filp_fsync(f) kern_file_sync(f)
-
-int ref_file(cfs_file_t *fp);
-int rele_file(cfs_file_t *fp);
-int file_count(cfs_file_t *fp);
-#define cfs_get_file(f) ref_file(f)
-#define cfs_put_file(f) rele_file(f)
-#define cfs_file_count(f) file_count(f)
-
-#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
-#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t)
-
-typedef struct flock cfs_flock_t;
-#define cfs_flock_type(fl) ((fl)->l_type)
-#define cfs_flock_set_type(fl, type) do { (fl)->l_type = (type); } while(0)
-#define cfs_flock_pid(fl) ((fl)->l_pid)
-#define cfs_flock_set_pid(fl, pid) do { (fl)->l_pid = (pid); } while(0)
-#define cfs_flock_start(fl) ((fl)->l_start)
-#define cfs_flock_set_start(fl, start) do { (fl)->l_start = (start); } while(0)
-
-static inline loff_t cfs_flock_end(cfs_flock_t *fl)
-{
- return (fl->l_len == 0 ? CFS_OFFSET_MAX: (fl->l_start + fl->l_len));
-}
-
-static inline void cfs_flock_set_end(cfs_flock_t *fl, loff_t end)
-{
- if (end == CFS_OFFSET_MAX)
- fl->l_len = 0;
- else
- fl->l_len = end - fl->l_start;
-}
-
-#define ATTR_MODE 0x0001
-#define ATTR_UID 0x0002
-#define ATTR_GID 0x0004
-#define ATTR_SIZE 0x0008
-#define ATTR_ATIME 0x0010
-#define ATTR_MTIME 0x0020
-#define ATTR_CTIME 0x0040
-#define ATTR_ATIME_SET 0x0080
-#define ATTR_MTIME_SET 0x0100
-#define ATTR_FORCE 0x0200 /* Not a change, but a change it */
-#define ATTR_ATTR_FLAG 0x0400
-#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */
-#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */
-#define ATTR_CTIME_SET 0x2000
-#define ATTR_BLOCKS 0x4000
-#define ATTR_KILL_SUID 0
-#define ATTR_KILL_SGID 0
-
-#define in_group_p(x) (0)
-
-struct posix_acl_entry {
- short e_tag;
- unsigned short e_perm;
- unsigned int e_id;
-};
-
-struct posix_acl {
- atomic_t a_refcount;
- unsigned int a_count;
- struct posix_acl_entry a_entries[0];
-};
-
-struct posix_acl *posix_acl_alloc(int count, int flags);
-static inline struct posix_acl *posix_acl_from_xattr(const void *value,
- size_t size)
-{
- return posix_acl_alloc(0, 0);
-}
-static inline void posix_acl_release(struct posix_acl *acl) {};
-static inline int posix_acl_valid(const struct posix_acl *acl) { return 0; }
-static inline struct posix_acl * posix_acl_dup(struct posix_acl *acl)
-{
- return acl;
-}
-
-#else /* !__KERNEL__ */
-
-typedef struct file cfs_file_t;
-
-#endif /* END __KERNEL__ */
-
-typedef struct {
- void *d;
-} cfs_dentry_t;
-
-#ifndef O_SYNC
-#define O_SYNC 0
-#endif
-#ifndef O_DIRECTORY
-#define O_DIRECTORY 0
-#endif
-#ifndef O_LARGEFILE
-#define O_LARGEFILE 0
-#endif
-
-#endif
+++ /dev/null
-#ifndef __LIBCFS_DARWIN_CFS_LOCK_H__
-#define __LIBCFS_DARWIN_CFS_LOCK_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-#include <mach/sync_policy.h>
-#include <mach/task.h>
-#include <mach/semaphore.h>
-#include <kern/assert.h>
-#include <kern/thread.h>
-
-#include <libcfs/darwin/darwin-types.h>
-#include <libcfs/darwin/darwin-sync.h>
-
-/*
- * spin_lock (use Linux kernel's primitives)
- *
- * - spin_lock_init(x)
- * - spin_lock(x)
- * - spin_unlock(x)
- * - spin_trylock(x)
- *
- * - spin_lock_irqsave(x, f)
- * - spin_unlock_irqrestore(x, f)
- */
-struct spin_lock {
- struct kspin spin;
-};
-
-typedef struct spin_lock spinlock_t;
-
-static inline void spin_lock_init(spinlock_t *lock)
-{
- kspin_init(&lock->spin);
-}
-
-static inline void spin_lock(spinlock_t *lock)
-{
- kspin_lock(&lock->spin);
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
- kspin_unlock(&lock->spin);
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
- return kspin_trylock(&lock->spin);
-}
-
-static inline void spin_lock_done(spinlock_t *lock)
-{
- kspin_done(&lock->spin);
-}
-
-#error "does this lock out timer callbacks?"
-#define spin_lock_bh(x) spin_lock(x)
-#define spin_unlock_bh(x) spin_unlock(x)
-#define spin_lock_bh_init(x) spin_lock_init(x)
-
-extern boolean_t ml_set_interrupts_enabled(boolean_t enable);
-#define __disable_irq() ml_set_interrupts_enabled(FALSE)
-#define __enable_irq(x) (void) ml_set_interrupts_enabled(x)
-
-#define spin_lock_irqsave(s, f) do{ \
- f = __disable_irq(); \
- spin_lock(s); }while(0)
-
-#define spin_unlock_irqrestore(s, f) do{ \
- spin_unlock(s); \
- __enable_irq(f);}while(0)
-
-/*
- * Semaphore
- *
- * - sema_init(x, v)
- * - __down(x)
- * - __up(x)
- */
-struct semaphore {
- struct ksem sem;
-};
-
-static inline void sema_init(struct semaphore *s, int val)
-{
- ksem_init(&s->sem, val);
-}
-
-static inline void __down(struct semaphore *s)
-{
- ksem_down(&s->sem, 1);
-}
-
-static inline void __up(struct semaphore *s)
-{
- ksem_up(&s->sem, 1);
-}
-
-/*
- * Mutex:
- *
- * - init_mutex(x)
- * - init_mutex_locked(x)
- * - mutex_up(x)
- * - mutex_down(x)
- */
-
-#define mutex_up(s) __up(s)
-#define mutex_down(s) __down(s)
-
-#define init_mutex(x) sema_init(x, 1)
-#define init_mutex_locked(x) sema_init(x, 0)
-
-/*
- * Completion:
- *
- * - init_completion(c)
- * - complete(c)
- * - wait_for_completion(c)
- */
-struct completion {
- /*
- * Emulate completion by semaphore for now.
- *
- * XXX nikita: this is not safe if completion is used to synchronize
- * exit from kernel daemon thread and kext unloading. In this case
- * some core function (a la complete_and_exit()) is needed.
- */
- struct ksem sem;
-};
-
-static inline void init_completion(struct completion *c)
-{
- ksem_init(&c->sem, 0);
-}
-
-static inline void complete(struct completion *c)
-{
- ksem_up(&c->sem, 1);
-}
-
-static inline void wait_for_completion(struct completion *c)
-{
- ksem_down(&c->sem, 1);
-}
-
-/*
- * rw_semaphore:
- *
- * - DECLARE_RWSEM(x)
- * - init_rwsem(x)
- * - down_read(x)
- * - up_read(x)
- * - down_write(x)
- * - up_write(x)
- */
-struct rw_semaphore {
- struct krw_sem s;
-};
-
-static inline void init_rwsem(struct rw_semaphore *s)
-{
- krw_sem_init(&s->s);
-}
-
-static inline void fini_rwsem(struct rw_semaphore *s)
-{
- krw_sem_done(&s->s);
-}
-
-static inline void down_read(struct rw_semaphore *s)
-{
- krw_sem_down_r(&s->s);
-}
-
-static inline int down_read_trylock(struct rw_semaphore *s)
-{
- int ret = krw_sem_down_r_try(&s->s);
- return ret == 0;
-}
-
-static inline void down_write(struct rw_semaphore *s)
-{
- krw_sem_down_w(&s->s);
-}
-
-static inline int down_write_trylock(struct rw_semaphore *s)
-{
- int ret = krw_sem_down_w_try(&s->s);
- return ret == 0;
-}
-
-static inline void up_read(struct rw_semaphore *s)
-{
- krw_sem_up_r(&s->s);
-}
-
-static inline void up_write(struct rw_semaphore *s)
-{
- krw_sem_up_w(&s->s);
-}
-
-/*
- * read-write lock : Need to be investigated more!!
- *
- * - DECLARE_RWLOCK(l)
- * - rwlock_init(x)
- * - read_lock(x)
- * - read_unlock(x)
- * - write_lock(x)
- * - write_unlock(x)
- */
-typedef struct krw_spin rwlock_t;
-
-#define rwlock_init(pl) krw_spin_init(pl)
-
-#define read_lock(l) krw_spin_down_r(l)
-#define read_unlock(l) krw_spin_up_r(l)
-#define write_lock(l) krw_spin_down_w(l)
-#define write_unlock(l) krw_spin_up_w(l)
-
-#define write_lock_irqsave(l, f) do{ \
- f = __disable_irq(); \
- write_lock(l); }while(0)
-
-#define write_unlock_irqrestore(l, f) do{ \
- write_unlock(l); \
- __enable_irq(f);}while(0)
-
-#define read_lock_irqsave(l, f) do{ \
- f = __disable_irq(); \
- read_lock(l); }while(0)
-
-#define read_unlock_irqrestore(l, f) do{ \
- read_unlock(l); \
- __enable_irq(f);}while(0)
-/*
- * Funnel:
- *
- * Safe funnel in/out
- */
-#ifdef __DARWIN8__
-
-#define CFS_DECL_FUNNEL_DATA
-#define CFS_DECL_CONE_DATA DECLARE_FUNNEL_DATA
-#define CFS_DECL_NET_DATA DECLARE_FUNNEL_DATA
-#define CFS_CONE_IN do {} while(0)
-#define CFS_CONE_EX do {} while(0)
-
-#define CFS_NET_IN do {} while(0)
-#define CFS_NET_EX do {} while(0)
-
-#else
-
-#define CFS_DECL_FUNNEL_DATA \
- boolean_t __funnel_state = FALSE; \
- funnel_t *__funnel
-#define CFS_DECL_CONE_DATA CFS_DECL_FUNNEL_DATA
-#define CFS_DECL_NET_DATA CFS_DECL_FUNNEL_DATA
-
-void lustre_cone_in(boolean_t *state, funnel_t **cone);
-void lustre_cone_ex(boolean_t state, funnel_t *cone);
-
-#define CFS_CONE_IN lustre_cone_in(&__funnel_state, &__funnel)
-#define CFS_CONE_EX lustre_cone_ex(__funnel_state, __funnel)
-
-void lustre_net_in(boolean_t *state, funnel_t **cone);
-void lustre_net_ex(boolean_t state, funnel_t *cone);
-
-#define CFS_NET_IN lustre_net_in(&__funnel_state, &__funnel)
-#define CFS_NET_EX lustre_net_ex(__funnel_state, __funnel)
-
-#endif
-
-#else
-#include <libcfs/user-lock.h>
-#endif /* __KERNEL__ */
-
-/* __XNU_CFS_LOCK_H */
-#endif
+++ /dev/null
-#ifndef __LIBCFS_DARWIN_CFS_MEM_H__
-#define __LIBCFS_DARWIN_CFS_MEM_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-
-#include <sys/types.h>
-#include <sys/systm.h>
-
-#include <sys/vm.h>
-#include <sys/kernel.h>
-#include <sys/ubc.h>
-#include <sys/uio.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/lockf.h>
-
-#include <mach/mach_types.h>
-#include <mach/vm_types.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <mach/machine/vm_param.h>
-#include <kern/thread_call.h>
-#include <sys/param.h>
-#include <sys/vm.h>
-
-#include <libcfs/darwin/darwin-types.h>
-#include <libcfs/darwin/darwin-sync.h>
-#include <libcfs/darwin/darwin-lock.h>
-#include <libcfs/list.h>
-
-/*
- * Basic xnu_page struct, should be binary compatibility with
- * all page types in xnu (we have only xnu_raw_page, xll_page now)
- */
-
-/* Variable sized pages are not supported */
-
-#ifdef PAGE_SHIFT
-#define CFS_PAGE_SHIFT PAGE_SHIFT
-#else
-#define CFS_PAGE_SHIFT 12
-#endif
-
-#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT)
-
-#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE - 1))
-
-enum {
- XNU_PAGE_RAW,
- XNU_PAGE_XLL,
- XNU_PAGE_NTYPES
-};
-
-typedef __u32 page_off_t;
-
-/*
- * For XNU we have our own page cache built on top of underlying BSD/MACH
- * infrastructure. In particular, we have two disjoint types of pages:
- *
- * - "raw" pages (XNU_PAGE_RAW): these are just buffers mapped into KVM,
- * based on UPLs, and
- *
- * - "xll" pages (XNU_PAGE_XLL): these are used by file system to cache
- * file data, owned by file system objects, hashed, lrued, etc.
- *
- * cfs_page_t has to cover both of them, because core Lustre code is based on
- * the Linux assumption that page is _both_ memory buffer and file system
- * caching entity.
- *
- * To achieve this, all types of pages supported on XNU has to start from
- * common header that contains only "page type". Common cfs_page_t operations
- * dispatch through operation vector based on page type.
- *
- */
-typedef struct xnu_page {
- int type;
-} cfs_page_t;
-
-struct xnu_page_ops {
- void *(*page_map) (cfs_page_t *);
- void (*page_unmap) (cfs_page_t *);
- void *(*page_address) (cfs_page_t *);
-};
-
-void xnu_page_ops_register(int type, struct xnu_page_ops *ops);
-void xnu_page_ops_unregister(int type);
-
-/*
- * raw page, no cache object, just like buffer
- */
-struct xnu_raw_page {
- struct xnu_page header;
- void *virtual;
- atomic_t count;
- struct list_head link;
-};
-
-/*
- * Public interface to lustre
- *
- * - cfs_alloc_page(f)
- * - cfs_free_page(p)
- * - cfs_kmap(p)
- * - cfs_kunmap(p)
- * - cfs_page_address(p)
- */
-
-/*
- * Of all functions above only cfs_kmap(), cfs_kunmap(), and
- * cfs_page_address() can be called on file system pages. The rest is for raw
- * pages only.
- */
-
-cfs_page_t *cfs_alloc_page(u_int32_t flags);
-void cfs_free_page(cfs_page_t *page);
-void cfs_get_page(cfs_page_t *page);
-int cfs_put_page_testzero(cfs_page_t *page);
-int cfs_page_count(cfs_page_t *page);
-#define cfs_page_index(pg) (0)
-
-void *cfs_page_address(cfs_page_t *pg);
-void *cfs_kmap(cfs_page_t *pg);
-void cfs_kunmap(cfs_page_t *pg);
-
-/*
- * Memory allocator
- */
-
-void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
-void cfs_free(void *addr);
-
-void *cfs_alloc_large(size_t nr_bytes);
-void cfs_free_large(void *addr);
-
-extern int get_preemption_level(void);
-
-#define CFS_ALLOC_ATOMIC_TRY \
- (get_preemption_level() != 0 ? CFS_ALLOC_ATOMIC : 0)
-
-/*
- * Slab:
- *
- * No slab in OSX, use zone allocator to simulate slab
- */
-#define SLAB_HWCACHE_ALIGN 0
-
-#ifdef __DARWIN8__
-/*
- * In Darwin8, we cannot use zalloc_noblock(not exported by kernel),
- * also, direct using of zone allocator is not recommended.
- */
-#define CFS_INDIVIDUAL_ZONE (0)
-
-#if !CFS_INDIVIDUAL_ZONE
-#include <libkern/OSMalloc.h>
-typedef OSMallocTag mem_cache_t;
-#else
-typedef void* zone_t;
-typedef zone_t mem_cache_t;
-#endif
-
-#else /* !__DARWIN8__ */
-
-#define CFS_INDIVIDUAL_ZONE (1)
-
-typedef zone_t mem_cache_t;
-
-#endif /* !__DARWIN8__ */
-
-#define MC_NAME_MAX_LEN 64
-
-typedef struct cfs_mem_cache {
- int mc_size;
- mem_cache_t mc_cache;
- struct list_head mc_link;
- char mc_name [MC_NAME_MAX_LEN];
-} cfs_mem_cache_t;
-
-#define KMEM_CACHE_MAX_COUNT 64
-#define KMEM_MAX_ZONE 8192
-
-cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long);
-int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
-void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
-void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
-
-/*
- * Misc
- */
-/* XXX Liang: num_physpages... fix me */
-#define num_physpages (64 * 1024)
-
-#define CFS_DECL_MMSPACE
-#define CFS_MMSPACE_OPEN do {} while(0)
-#define CFS_MMSPACE_CLOSE do {} while(0)
-
-#define copy_from_user(kaddr, uaddr, size) copyin(CAST_USER_ADDR_T(uaddr), (caddr_t)kaddr, size)
-#define copy_to_user(uaddr, kaddr, size) copyout((caddr_t)kaddr, CAST_USER_ADDR_T(uaddr), size)
-
-#if 0
-static inline int strncpy_from_user(char *kaddr, char *uaddr, int size)
-{
- size_t count;
- return copyinstr((const user_addr_t)uaddr, (void *)kaddr, size, &count);
-}
-#endif
-
-#if defined (__ppc__)
-#define mb() __asm__ __volatile__ ("sync" : : : "memory")
-#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define wmb() __asm__ __volatile__ ("eieio" : : : "memory")
-#elif defined (__i386__)
-#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
-#define rmb() mb()
-#define wmb() __asm__ __volatile__ ("": : :"memory")
-#else
-#error architecture not supported
-#endif
-
-#else /* !__KERNEL__ */
-
-#define CFS_CACHE_SHIFT 12
-#define PAGE_CACHE_SIZE (1 << CFS_CACHE_SHIFT)
-#include <libcfs/user-prim.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __XNU_CFS_MEM_H__ */
+++ /dev/null
-#ifndef __LIBCFS_DARWIN_CFS_PRIM_H__
-#define __LIBCFS_DARWIN_CFS_PRIM_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-#include <sys/types.h>
-#include <sys/systm.h>
-
-#ifndef __DARWIN8__
-# ifndef __APPLE_API_PRIVATE
-# define __APPLE_API_PRIVATE
-# include <sys/user.h>
-# undef __APPLE_API_PRIVATE
-# else
-# include <sys/user.h>
-# endif
-# include <mach/mach_traps.h>
-# include <mach/thread_switch.h>
-# include <machine/cpu_number.h>
-#endif /* !__DARWIN8__ */
-
-#include <sys/kernel.h>
-
-#include <mach/thread_act.h>
-#include <mach/mach_types.h>
-#include <mach/time_value.h>
-#include <kern/sched_prim.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <mach/machine/vm_param.h>
-#include <machine/machine_routines.h>
-#include <kern/clock.h>
-#include <kern/thread_call.h>
-#include <sys/param.h>
-#include <sys/vm.h>
-
-#include <libcfs/darwin/darwin-types.h>
-#include <libcfs/darwin/darwin-utils.h>
-#include <libcfs/darwin/darwin-lock.h>
-
-/*
- * Symbol functions for libcfs
- *
- * OSX has no facility for use to register symbol.
- * So we have to implement it.
- */
-#define CFS_SYMBOL_LEN 64
-
-struct cfs_symbol {
- char name[CFS_SYMBOL_LEN];
- void *value;
- int ref;
- struct list_head sym_list;
-};
-
-extern kern_return_t cfs_symbol_register(const char *, const void *);
-extern kern_return_t cfs_symbol_unregister(const char *);
-extern void * cfs_symbol_get(const char *);
-extern kern_return_t cfs_symbol_put(const char *);
-
-/*
- * sysctl typedef
- *
- * User can register/unregister a list of sysctl_oids
- * sysctl_oid is data struct of osx's sysctl-entry
- */
-#define CONFIG_SYSCTL 1
-
-typedef struct sysctl_oid * cfs_sysctl_table_t;
-typedef cfs_sysctl_table_t cfs_sysctl_table_header_t;
-cfs_sysctl_table_header_t *cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg);
-void cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table);
-
-/*
- * Proc file system APIs, no /proc fs support in OSX
- */
-typedef struct cfs_proc_dir_entry {
- void *data;
-} cfs_proc_dir_entry_t;
-
-cfs_proc_dir_entry_t * cfs_create_proc_entry(char *name, int mod,
- cfs_proc_dir_entry_t *parent);
-void cfs_free_proc_entry(cfs_proc_dir_entry_t *de);
-void cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry);
-
-typedef int (cfs_read_proc_t)(char *page, char **start, off_t off,
- int count, int *eof, void *data);
-typedef int (cfs_write_proc_t)(struct file *file, const char *buffer,
- unsigned long count, void *data);
-
-/*
- * cfs pseudo device
- *
- * cfs_psdev_t
- * cfs_psdev_register:
- * cfs_psdev_deregister:
- */
-typedef struct {
- int index;
- void *handle;
- const char *name;
- struct cdevsw *devsw;
- void *private;
-} cfs_psdev_t;
-
-extern kern_return_t cfs_psdev_register(cfs_psdev_t *);
-extern kern_return_t cfs_psdev_deregister(cfs_psdev_t *);
-
-/*
- * Task struct and ...
- *
- * Using BSD current_proc in Darwin
- */
-extern boolean_t assert_wait_possible(void);
-extern void *get_bsdtask_info(task_t);
-
-#ifdef __DARWIN8__
-
-typedef struct {} cfs_task_t;
-#define cfs_current() ((cfs_task_t *)current_thread())
-#else /* !__DARWIN8__ */
-
-typedef struct uthread cfs_task_t;
-
-#define current_uthread() ((struct uthread *)get_bsdthread_info(current_act()))
-#define cfs_current() current_uthread()
-
-#endif /* !__DARWIN8__ */
-
-#define cfs_task_lock(t) do {;} while (0)
-#define cfs_task_unlock(t) do {;} while (0)
-
-#define set_current_state(s) do {;} while (0)
-
-#define CFS_DECL_JOURNAL_DATA
-#define CFS_PUSH_JOURNAL do {;} while(0)
-#define CFS_POP_JOURNAL do {;} while(0)
-
-#define THREAD_NAME(comm, fmt, a...)
-/*
- * Kernel thread:
- *
- * OSX kernel thread can not be created with args,
- * so we have to implement new APIs to create thread with args
- */
-
-typedef int (*cfs_thread_t)(void *);
-
-extern task_t kernel_task;
-
-/*
- * cloning flags, no use in OSX, just copy them from Linux
- */
-#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
-#define CLONE_VM 0x00000100 /* set if VM shared between processes */
-#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
-#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
-#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
-#define CLONE_PID 0x00001000 /* set if pid shared */
-#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
-#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
-#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
-#define CLONE_THREAD 0x00010000 /* Same thread group? */
-#define CLONE_NEWNS 0x00020000 /* New namespace group? */
-
-#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD)
-
-extern int cfs_kernel_thread(cfs_thread_t func, void *arg, int flag);
-
-
-/*
- * Wait Queue implementation
- *
- * Like wait_queue in Linux
- */
-typedef struct cfs_waitq {
- struct ksleep_chan wq_ksleep_chan;
-} cfs_waitq_t;
-
-typedef struct cfs_waitlink {
- struct cfs_waitq *wl_waitq;
- struct ksleep_link wl_ksleep_link;
-} cfs_waitlink_t;
-
-typedef int cfs_task_state_t;
-
-#define CFS_TASK_INTERRUPTIBLE THREAD_ABORTSAFE
-#define CFS_TASK_UNINT THREAD_UNINT
-
-void cfs_waitq_init(struct cfs_waitq *waitq);
-void cfs_waitlink_init(struct cfs_waitlink *link);
-
-void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
-void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
- struct cfs_waitlink *link);
-void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq);
-void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
-int cfs_waitq_active(struct cfs_waitq *waitq);
-
-void cfs_waitq_signal(struct cfs_waitq *waitq);
-void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
-void cfs_waitq_broadcast(struct cfs_waitq *waitq);
-
-void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state);
-cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
- cfs_task_state_t state,
- cfs_duration_t timeout);
-
-/*
- * Thread schedule APIs.
- */
-#define MAX_SCHEDULE_TIMEOUT ((long)(~0UL>>12))
-extern void thread_set_timer_deadline(__u64 deadline);
-extern void thread_cancel_timer(void);
-
-static inline int cfs_schedule_timeout(int state, int64_t timeout)
-{
- int result;
-
-#ifdef __DARWIN8__
- result = assert_wait((event_t)current_thread(), state);
-#else
- result = assert_wait((event_t)current_uthread(), state);
-#endif
- if (timeout > 0) {
- __u64 expire;
- nanoseconds_to_absolutetime(timeout, &expire);
- clock_absolutetime_interval_to_deadline(expire, &expire);
- thread_set_timer_deadline(expire);
- }
- if (result == THREAD_WAITING)
- result = thread_block(THREAD_CONTINUE_NULL);
- if (timeout > 0)
- thread_cancel_timer();
- if (result == THREAD_TIMED_OUT)
- result = 0;
- else
- result = 1;
- return result;
-}
-
-#define cfs_schedule() cfs_schedule_timeout(CFS_TASK_UNINT, CFS_TICK)
-#define cfs_pause(tick) cfs_schedule_timeout(CFS_TASK_UNINT, tick)
-
-#define __wait_event(wq, condition) \
-do { \
- struct cfs_waitlink __wait; \
- \
- cfs_waitlink_init(&__wait); \
- for (;;) { \
- cfs_waitq_add(&wq, &__wait); \
- if (condition) \
- break; \
- cfs_waitq_wait(&__wait, CFS_TASK_UNINT); \
- cfs_waitq_del(&wq, &__wait); \
- } \
- cfs_waitq_del(&wq, &__wait); \
-} while (0)
-
-#define wait_event(wq, condition) \
-do { \
- if (condition) \
- break; \
- __wait_event(wq, condition); \
-} while (0)
-
-#define __wait_event_interruptible(wq, condition, ex, ret) \
-do { \
- struct cfs_waitlink __wait; \
- \
- cfs_waitlink_init(&__wait); \
- for (;;) { \
- if (ex == 0) \
- cfs_waitq_add(&wq, &__wait); \
- else \
- cfs_waitq_add_exclusive(&wq, &__wait); \
- if (condition) \
- break; \
- if (!cfs_signal_pending()) { \
- cfs_waitq_wait(&__wait, \
- CFS_TASK_INTERRUPTIBLE); \
- cfs_waitq_del(&wq, &__wait); \
- continue; \
- } \
- ret = -ERESTARTSYS; \
- break; \
- } \
- cfs_waitq_del(&wq, &__wait); \
-} while (0)
-
-#define wait_event_interruptible(wq, condition) \
-({ \
- int __ret = 0; \
- if (!condition) \
- __wait_event_interruptible(wq, condition, \
- 0, __ret); \
- __ret; \
-})
-
-#define wait_event_interruptible_exclusive(wq, condition) \
-({ \
- int __ret = 0; \
- if (!condition) \
- __wait_event_interruptible(wq, condition, \
- 1, __ret); \
- __ret; \
-})
-
-#ifndef __DARWIN8__
-extern void wakeup_one __P((void * chan));
-#endif
-/* only used in tests */
-#define wake_up_process(p) \
- do { \
- wakeup_one((caddr_t)p); \
- } while (0)
-
-/* used in couple of places */
-static inline void sleep_on(cfs_waitq_t *waitq)
-{
- cfs_waitlink_t link;
-
- cfs_waitlink_init(&link);
- cfs_waitq_add(waitq, &link);
- cfs_waitq_wait(&link, CFS_TASK_UNINT);
- cfs_waitq_del(waitq, &link);
-}
-
-/*
- * Signal
- */
-typedef sigset_t cfs_sigset_t;
-
-#define SIGNAL_MASK_ASSERT()
-/*
- * Timer
- */
-typedef struct cfs_timer {
- struct ktimer t;
-} cfs_timer_t;
-
-#define cfs_init_timer(t) do {} while(0)
-void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg);
-void cfs_timer_done(struct cfs_timer *t);
-void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline);
-void cfs_timer_disarm(struct cfs_timer *t);
-int cfs_timer_is_armed(struct cfs_timer *t);
-
-cfs_time_t cfs_timer_deadline(struct cfs_timer *t);
-
-/*
- * Ioctl
- * We don't need to copy out everything in osx
- */
-#define cfs_ioctl_data_out(a, d, l) \
- ({ \
- int __size; \
- int __rc = 0; \
- assert((l) >= sizeof(*d)); \
- __size = (l) - sizeof(*d); \
- if (__size > 0) \
- __rc = copy_to_user((void *)a + __size, \
- (void *)d + __size, \
- __size); \
- __rc; \
- })
-
-/*
- * CPU
- */
-/* Run in PowerG5 who is PPC64 */
-#define SMP_CACHE_BYTES 128
-#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-#define NR_CPUS 2
-
-/*
- * XXX Liang: patch xnu and export current_processor()?
- *
- * #define smp_processor_id() current_processor()
- */
-#define smp_processor_id() 0
-/* XXX smp_call_function is not supported in xnu */
-#define smp_call_function(f, a, n, w) do {} while(0)
-int cfs_online_cpus(void);
-#define smp_num_cpus cfs_online_cpus()
-
-/*
- * Misc
- */
-extern int is_suser(void);
-
-#ifndef likely
-#define likely(exp) (exp)
-#endif
-#ifndef unlikely
-#define unlikely(exp) (exp)
-#endif
-
-#define lock_kernel() do {} while(0)
-#define unlock_kernel() do {} while(0)
-
-#define CAP_SYS_BOOT 0
-#define CAP_SYS_ADMIN 1
-#define capable(a) ((a) == CAP_SYS_BOOT ? is_suser(): is_suser1())
-
-#define USERMODEHELPER(path, argv, envp) (0)
-
-#define cfs_module(name, version, init, fini) \
-extern kern_return_t _start(kmod_info_t *ki, void *data); \
-extern kern_return_t _stop(kmod_info_t *ki, void *data); \
-__private_extern__ kern_return_t name##_start(kmod_info_t *ki, void *data); \
-__private_extern__ kern_return_t name##_stop(kmod_info_t *ki, void *data); \
- \
-kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1, \
- { "com.clusterfs.lustre." #name }, { version }, \
- -1, 0, 0, 0, 0, name##_start, name##_stop }; \
- \
-__private_extern__ kmod_start_func_t *_realmain = name##_start; \
-__private_extern__ kmod_stop_func_t *_antimain = name##_stop; \
-__private_extern__ int _kext_apple_cc = __APPLE_CC__ ; \
- \
-kern_return_t name##_start(kmod_info_t *ki, void *d) \
-{ \
- return init(); \
-} \
- \
-kern_return_t name##_stop(kmod_info_t *ki, void *d) \
-{ \
- fini(); \
- return KERN_SUCCESS; \
-} \
- \
-/* \
- * to allow semicolon after cfs_module(...) \
- */ \
-struct __dummy_ ## name ## _struct {}
-
-#define inter_module_get(n) cfs_symbol_get(n)
-#define inter_module_put(n) cfs_symbol_put(n)
-
-static inline int request_module(char *name)
-{
- return (-EINVAL);
-}
-
-#ifndef __exit
-#define __exit
-#endif
-#ifndef __init
-#define __init
-#endif
-
-#define EXPORT_SYMBOL(s)
-#define MODULE_AUTHOR(s)
-#define MODULE_DESCRIPTION(s)
-#define MODULE_LICENSE(s)
-#define MODULE_PARM(a, b)
-#define MODULE_PARM_DESC(a, b)
-
-#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
-#define LINUX_VERSION_CODE KERNEL_VERSION(2,5,0)
-
-#define NR_IRQS 512
-#define in_interrupt() ml_at_interrupt_context()
-
-#define KERN_EMERG "<0>" /* system is unusable */
-#define KERN_ALERT "<1>" /* action must be taken immediately */
-#define KERN_CRIT "<2>" /* critical conditions */
-#define KERN_ERR "<3>" /* error conditions */
-#define KERN_WARNING "<4>" /* warning conditions */
-#define KERN_NOTICE "<5>" /* normal but significant condition */
-#define KERN_INFO "<6>" /* informational */
-#define KERN_DEBUG "<7>" /* debug-level messages */
-
-static inline long PTR_ERR(const void *ptr)
-{
- return (long) ptr;
-}
-
-#define ERR_PTR(err) ((void *)err)
-#define IS_ERR(p) ((unsigned long)(p) + 1000 < 1000)
-
-#else /* !__KERNEL__ */
-
-typedef struct cfs_proc_dir_entry {
- void *data;
-} cfs_proc_dir_entry_t;
-
-#include <libcfs/user-prim.h>
-#define __WORDSIZE 32
-
-#endif /* END __KERNEL__ */
-/*
- * Error number
- */
-#ifndef EPROTO
-#define EPROTO EPROTOTYPE
-#endif
-#ifndef EBADR
-#define EBADR EBADRPC
-#endif
-#ifndef ERESTARTSYS
-#define ERESTARTSYS 512
-#endif
-#ifndef EDEADLOCK
-#define EDEADLOCK EDEADLK
-#endif
-#ifndef ECOMM
-#define ECOMM EINVAL
-#endif
-#ifndef ENODATA
-#define ENODATA EINVAL
-#endif
-#ifndef ENOTSUPP
-#define ENOTSUPP EINVAL
-#endif
-
-#if BYTE_ORDER == BIG_ENDIAN
-# define __BIG_ENDIAN
-#else
-# define __LITTLE_ENDIAN
-#endif
-
-#endif /* __LIBCFS_DARWIN_CFS_PRIM_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Implementation of standard libcfs synchronization primitives for XNU
- * kernel.
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-/*
- * xnu_sync.h
- *
- * Created by nikita on Sun Jul 18 2004.
- *
- * Prototypes of XNU synchronization primitives.
- */
-
-#ifndef __LIBCFS_DARWIN_XNU_SYNC_H__
-#define __LIBCFS_DARWIN_XNU_SYNC_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#define XNU_SYNC_DEBUG (1)
-
-#if XNU_SYNC_DEBUG
-#define ON_SYNC_DEBUG(e) e
-#else
-#define ON_SYNC_DEBUG(e)
-#endif
-
-enum {
- /* "egrep -i '^(o?x)?[abcdeflo]*$' /usr/dict/words" is your friend */
- KMUT_MAGIC = 0x0bac0cab, /* [a, [b, c]] = b (a, c) - c (a, b) */
- KSEM_MAGIC = 0x1abe11ed,
- KCOND_MAGIC = 0xb01dface,
- KRW_MAGIC = 0xdabb1edd,
- KSPIN_MAGIC = 0xca11ab1e,
- KRW_SPIN_MAGIC = 0xbabeface,
- KSLEEP_CHAN_MAGIC = 0x0debac1e,
- KSLEEP_LINK_MAGIC = 0xacc01ade,
- KTIMER_MAGIC = 0xbefadd1e
-};
-
-/* ------------------------- spin lock ------------------------- */
-
-/*
- * XXX nikita: don't use NCPUS it's hardcoded to (1) in cpus.h
- */
-#define SMP (1)
-
-#include <libcfs/list.h>
-
-#ifdef __DARWIN8__
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <kern/locks.h>
-
-/*
- * hw_lock is not available in Darwin8 (hw_lock_* are not exported at all),
- * so use lck_spin_t. we can hack out lck_spin_t easily, it's the only
- * hacking in Darwin8.x. We did so because it'll take a lot of time to
- * add lock_done for all locks, maybe it should be done in the future.
- * If lock_done for all locks were added, we can:
- *
- * typedef lck_spin_t *xnu_spin_t;
- */
-#if defined (__ppc__)
-typedef struct {
- unsigned int opaque[3];
-} xnu_spin_t;
-#elif defined (__i386__)
-typedef struct {
- unsigned int opaque[10];
-} xnu_spin_t;
-#endif
-
-/*
- * wait_queue is not available in Darwin8 (wait_queue_* are not exported),
- * use assert_wait/wakeup/wake_one (wait_queue in kernel hash).
- */
-typedef void * xnu_wait_queue_t;
-
-/* DARWIN8 */
-#else
-
-#include <mach/mach_types.h>
-#include <sys/types.h>
-#include <kern/simple_lock.h>
-
-typedef hw_lock_data_t xnu_spin_t;
-typedef struct wait_queue xnu_wait_queue_t;
-
-/* DARWIN8 */
-#endif
-
-struct kspin {
-#if SMP
- xnu_spin_t lock;
-#endif
-#if XNU_SYNC_DEBUG
- unsigned magic;
- thread_t owner;
-#endif
-};
-
-void kspin_init(struct kspin *spin);
-void kspin_done(struct kspin *spin);
-void kspin_lock(struct kspin *spin);
-void kspin_unlock(struct kspin *spin);
-int kspin_trylock(struct kspin *spin);
-
-#if XNU_SYNC_DEBUG
-/*
- * two functions below are for use in assertions
- */
-/* true, iff spin-lock is locked by the current thread */
-int kspin_islocked(struct kspin *spin);
-/* true, iff spin-lock is not locked by the current thread */
-int kspin_isnotlocked(struct kspin *spin);
-#else
-#define kspin_islocked(s) (1)
-#define kspin_isnotlocked(s) (1)
-#endif
-
-/* ------------------------- rw spinlock ----------------------- */
-struct krw_spin {
- struct kspin guard;
- int count;
-#if XNU_SYNC_DEBUG
- unsigned magic;
-#endif
-};
-
-void krw_spin_init(struct krw_spin *sem);
-void krw_spin_done(struct krw_spin *sem);
-void krw_spin_down_r(struct krw_spin *sem);
-void krw_spin_down_w(struct krw_spin *sem);
-void krw_spin_up_r(struct krw_spin *sem);
-void krw_spin_up_w(struct krw_spin *sem);
-
-/* ------------------------- semaphore ------------------------- */
-
-struct ksem {
- struct kspin guard;
- xnu_wait_queue_t q;
- int value;
-#if XNU_SYNC_DEBUG
- unsigned magic;
-#endif
-};
-
-void ksem_init(struct ksem *sem, int value);
-void ksem_done(struct ksem *sem);
-int ksem_up (struct ksem *sem, int value);
-void ksem_down(struct ksem *sem, int value);
-int ksem_trydown(struct ksem *sem, int value);
-
-/* ------------------------- mutex ------------------------- */
-
-struct kmut {
- struct ksem s;
-#if XNU_SYNC_DEBUG
- unsigned magic;
- thread_t owner;
-#endif
-};
-
-void kmut_init(struct kmut *mut);
-void kmut_done(struct kmut *mut);
-
-void kmut_lock (struct kmut *mut);
-void kmut_unlock (struct kmut *mut);
-int kmut_trylock(struct kmut *mut);
-
-#if XNU_SYNC_DEBUG
-/*
- * two functions below are for use in assertions
- */
-/* true, iff mutex is locked by the current thread */
-int kmut_islocked(struct kmut *mut);
-/* true, iff mutex is not locked by the current thread */
-int kmut_isnotlocked(struct kmut *mut);
-#else
-#define kmut_islocked(m) (1)
-#define kmut_isnotlocked(m) (1)
-#endif
-
-/* ------------------------- condition variable ------------------------- */
-
-struct kcond_link {
- struct kcond_link *next;
- struct ksem sem;
-};
-
-struct kcond {
- struct kspin guard;
- struct kcond_link *waiters;
-#if XNU_SYNC_DEBUG
- unsigned magic;
-#endif
-};
-
-void kcond_init(struct kcond *cond);
-void kcond_done(struct kcond *cond);
-void kcond_wait(struct kcond *cond, struct kspin *lock);
-void kcond_signal(struct kcond *cond);
-void kcond_broadcast(struct kcond *cond);
-
-void kcond_wait_guard(struct kcond *cond);
-void kcond_signal_guard(struct kcond *cond);
-void kcond_broadcast_guard(struct kcond *cond);
-
-/* ------------------------- read-write semaphore ------------------------- */
-
-struct krw_sem {
- int count;
- struct kcond cond;
-#if XNU_SYNC_DEBUG
- unsigned magic;
-#endif
-};
-
-void krw_sem_init(struct krw_sem *sem);
-void krw_sem_done(struct krw_sem *sem);
-void krw_sem_down_r(struct krw_sem *sem);
-int krw_sem_down_r_try(struct krw_sem *sem);
-void krw_sem_down_w(struct krw_sem *sem);
-int krw_sem_down_w_try(struct krw_sem *sem);
-void krw_sem_up_r(struct krw_sem *sem);
-void krw_sem_up_w(struct krw_sem *sem);
-
-/* ------------------------- sleep-channel ------------------------- */
-
-struct ksleep_chan {
- struct kspin guard;
- struct list_head waiters;
-#if XNU_SYNC_DEBUG
- unsigned magic;
-#endif
-};
-
-#define KSLEEP_CHAN_INITIALIZER {{{0}}}
-
-struct ksleep_link {
- int flags;
- event_t event;
- int hits;
- struct ksleep_chan *forward;
- struct list_head linkage;
-#if XNU_SYNC_DEBUG
- unsigned magic;
-#endif
-};
-
-enum {
- KSLEEP_EXCLUSIVE = 1
-};
-
-void ksleep_chan_init(struct ksleep_chan *chan);
-void ksleep_chan_done(struct ksleep_chan *chan);
-
-void ksleep_link_init(struct ksleep_link *link);
-void ksleep_link_done(struct ksleep_link *link);
-
-void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link);
-void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link);
-
-void ksleep_wait(struct ksleep_chan *chan, int state);
-int64_t ksleep_timedwait(struct ksleep_chan *chan, int state, __u64 timeout);
-
-void ksleep_wake(struct ksleep_chan *chan);
-void ksleep_wake_all(struct ksleep_chan *chan);
-void ksleep_wake_nr(struct ksleep_chan *chan, int nr);
-
-#define KSLEEP_LINK_DECLARE(name) \
-{ \
- .flags = 0, \
- .event = 0, \
- .hits = 0, \
- .linkage = CFS_LIST_HEAD(name.linkage), \
- .magic = KSLEEP_LINK_MAGIC \
-}
-
-/* ------------------------- timer ------------------------- */
-
-struct ktimer {
- struct kspin guard;
- void (*func)(void *);
- void *arg;
- u_int64_t deadline; /* timer deadline in absolute nanoseconds */
- int armed;
-#if XNU_SYNC_DEBUG
- unsigned magic;
-#endif
-};
-
-void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg);
-void ktimer_done(struct ktimer *t);
-void ktimer_arm(struct ktimer *t, u_int64_t deadline);
-void ktimer_disarm(struct ktimer *t);
-int ktimer_is_armed(struct ktimer *t);
-
-u_int64_t ktimer_deadline(struct ktimer *t);
-
-/* __XNU_SYNC_H__ */
-#endif
-
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_DARWIN_TCPIP_H__
-#define __LIBCFS_DARWIN_TCPIP_H__
-
-#ifdef __KERNEL__
-#include <sys/socket.h>
-
-#ifdef __DARWIN8__
-
-struct socket;
-
-typedef void (*so_upcall)(socket_t sock, void* arg, int waitf);
-
-#define CFS_SOCK_UPCALL 0x1
-#define CFS_SOCK_DOWN 0x2
-
-#define CFS_SOCK_MAGIC 0xbabeface
-
-typedef struct cfs_socket {
- socket_t s_so;
- int s_magic;
- int s_flags;
- so_upcall s_upcall;
- void *s_upcallarg;
-} cfs_socket_t;
-
-
-/* cfs_socket_t to bsd socket */
-#define C2B_SOCK(s) ((s)->s_so)
-
-static inline int get_sock_intopt(socket_t so, int opt)
-{
- int val, len;
- int rc;
-
- /*
- * sock_getsockopt will take a lock(mutex) for socket,
- * so it can be blocked. So be careful while using
- * them.
- */
- len = sizeof(val);
- rc = sock_getsockopt(so, SOL_SOCKET, opt, &val, &len);
- assert(rc == 0);
- return val;
-}
-
-#define SOCK_ERROR(s) get_sock_intopt(C2B_SOCK(s), SO_ERROR)
-/* #define SOCK_WMEM_QUEUED(s) (0) */
-#define SOCK_WMEM_QUEUED(s) get_sock_intopt(C2B_SOCK(s), SO_NWRITE)
-/* XXX Liang: no reliable way to get it in Darwin8.x */
-#define SOCK_TEST_NOSPACE(s) (0)
-
-void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg);
-void libcfs_sock_reset_cb(cfs_socket_t *sock);
-
-#else /* !__DARWIN8__ */
-
-#define SOCK_WMEM_QUEUED(so) ((so)->so_snd.sb_cc)
-#define SOCK_ERROR(so) ((so)->so_error)
-
-#define SOCK_TEST_NOSPACE(so) (sbspace(&(so)->so_snd) < (so)->so_snd.sb_lowat)
-
-#endif /* !__DARWIN8__ */
-
-#endif /* __KERNEL END */
-
-#endif /* __XNU_CFS_TYPES_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- * Implementation of portable time API for XNU kernel
- *
- */
-
-#ifndef __LIBCFS_DARWIN_DARWIN_TIME_H__
-#define __LIBCFS_DARWIN_DARWIN_TIME_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-/* Portable time API */
-
-/*
- * Platform provides three opaque data-types:
- *
- * cfs_time_t represents point in time. This is internal kernel
- * time rather than "wall clock". This time bears no
- * relation to gettimeofday().
- *
- * cfs_duration_t represents time interval with resolution of internal
- * platform clock
- *
- * cfs_fs_time_t represents instance in world-visible time. This is
- * used in file-system time-stamps
- *
- * cfs_time_t cfs_time_current(void);
- * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t);
- * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t);
- * int cfs_time_before (cfs_time_t, cfs_time_t);
- * int cfs_time_beforeq(cfs_time_t, cfs_time_t);
- *
- * cfs_duration_t cfs_duration_build(int64_t);
- *
- * time_t cfs_duration_sec (cfs_duration_t);
- * void cfs_duration_usec(cfs_duration_t, struct timeval *);
- * void cfs_duration_nsec(cfs_duration_t, struct timespec *);
- *
- * void cfs_fs_time_current(cfs_fs_time_t *);
- * time_t cfs_fs_time_sec (cfs_fs_time_t *);
- * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *);
- * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *);
- * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
- * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
- *
- * CFS_TIME_FORMAT
- * CFS_DURATION_FORMAT
- *
- */
-
-#define ONE_BILLION ((u_int64_t)1000000000)
-#define ONE_MILLION 1000000
-
-#ifdef __KERNEL__
-#include <sys/types.h>
-#include <sys/systm.h>
-
-#include <sys/kernel.h>
-
-#include <mach/mach_types.h>
-#include <mach/time_value.h>
-#include <kern/clock.h>
-#include <sys/param.h>
-
-#include <libcfs/darwin/darwin-types.h>
-#include <libcfs/darwin/darwin-utils.h>
-#include <libcfs/darwin/darwin-lock.h>
-
-/*
- * There are three way to measure time in OS X:
- * 1. nanoseconds
- * 2. absolute time (abstime unit equal to the length of one bus cycle),
- * schedule of thread/timer are counted by absolute time, but abstime
- * in different mac can be different also, so we wouldn't use it.
- * 3. clock interval (1sec = 100hz). But clock interval only taken by KPI
- * like tsleep().
- *
- * We use nanoseconds (uptime, not calendar time)
- *
- * clock_get_uptime() :get absolute time since bootup.
- * nanouptime() :get nanoseconds since bootup
- * microuptime() :get microseonds since bootup
- * nanotime() :get nanoseconds since epoch
- * microtime() :get microseconds since epoch
- */
-typedef u_int64_t cfs_time_t; /* nanoseconds */
-typedef int64_t cfs_duration_t;
-
-#define CFS_TIME_T "%llu"
-#define CFS_DURATION_T "%lld"
-
-typedef struct timeval cfs_fs_time_t;
-
-static inline cfs_time_t cfs_time_current(void)
-{
- struct timespec instant;
-
- nanouptime(&instant);
- return ((u_int64_t)instant.tv_sec) * NSEC_PER_SEC + instant.tv_nsec;
-}
-
-static inline time_t cfs_time_current_sec(void)
-{
- struct timespec instant;
-
- nanouptime(&instant);
- return instant.tv_sec;
-}
-
-static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
-{
- return t + d;
-}
-
-static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
-{
- return t1 - t2;
-}
-
-static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
-{
- return (int64_t)t1 - (int64_t)t2 < 0;
-}
-
-static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
-{
- return (int64_t)t1 - (int64_t)t2 <= 0;
-}
-
-static inline void cfs_fs_time_current(cfs_fs_time_t *t)
-{
- microtime((struct timeval *)t);
-}
-
-static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
-{
- return t->tv_sec;
-}
-
-static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
-{
- *v = *t;
-}
-
-static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
-{
- s->tv_sec = t->tv_sec;
- s->tv_nsec = t->tv_usec * NSEC_PER_USEC;
-}
-
-static inline cfs_duration_t cfs_time_seconds(int seconds)
-{
- return (NSEC_PER_SEC * (int64_t)seconds);
-}
-
-/*
- * internal helper function used by cfs_fs_time_before*()
- */
-static inline int64_t __cfs_fs_time_flat(cfs_fs_time_t *t)
-{
- return ((int64_t)t->tv_sec)*NSEC_PER_SEC + t->tv_usec*NSEC_PER_USEC;
-}
-
-static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
-{
- return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) < 0;
-}
-
-static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
-{
- return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) <= 0;
-}
-
-static inline time_t cfs_duration_sec(cfs_duration_t d)
-{
- return d / NSEC_PER_SEC;
-}
-
-static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
-{
- s->tv_sec = d / NSEC_PER_SEC;
- s->tv_usec = (d - ((int64_t)s->tv_sec) * NSEC_PER_SEC) / NSEC_PER_USEC;
-}
-
-static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
-{
- s->tv_sec = d / NSEC_PER_SEC;
- s->tv_nsec = d - ((int64_t)s->tv_sec) * NSEC_PER_SEC;
-}
-
-#define cfs_time_current_64 cfs_time_current
-#define cfs_time_add_64 cfs_time_add
-#define cfs_time_shift_64 cfs_time_shift
-#define cfs_time_before_64 cfs_time_before
-#define cfs_time_beforeq_64 cfs_time_beforeq
-
-/*
- * One jiffy (in nanoseconds)
- *
- * osfmk/kern/sched_prim.c
- * #define DEFAULT_PREEMPTION_RATE 100
- */
-#define CFS_TICK (NSEC_PER_SEC / (u_int64_t)100)
-
-#define LTIME_S(t) (t)
-
-/* __KERNEL__ */
-#else
-
-/*
- * User level
- */
-#include <libcfs/user-time.h>
-
-/* __KERNEL__ */
-#endif
-
-/* __LIBCFS_DARWIN_DARWIN_TIME_H__ */
-#endif
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_DARWIN_XNU_TYPES_H__
-#define __LIBCFS_DARWIN_XNU_TYPES_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#include <mach/mach_types.h>
-#include <sys/types.h>
-
-#ifndef _BLKID_TYPES_H
-#define _BLKID_TYPES_H
-#endif
-
-typedef u_int8_t __u8;
-typedef u_int16_t __u16;
-typedef u_int32_t __u32;
-typedef u_int64_t __u64;
-typedef int8_t __s8;
-typedef int16_t __s16;
-typedef int32_t __s32;
-typedef int64_t __s64;
-
-#ifdef __KERNEL__
-
-#include <kern/kern_types.h>
-
-
-typedef struct { int e; } event_chan_t;
-typedef dev_t kdev_t;
-
-/*
- * Atmoic define
- */
-#include <libkern/OSAtomic.h>
-
-typedef struct { volatile uint32_t counter; } atomic_t;
-
-#define ATOMIC_INIT(i) { (i) }
-#define atomic_read(a) ((a)->counter)
-#define atomic_set(a, v) (((a)->counter) = (v))
-#ifdef __DARWIN8__
-/* OS*Atomic return the value before the operation */
-#define atomic_add(v, a) OSAddAtomic(v, (SInt32 *)&((a)->counter))
-#define atomic_sub(v, a) OSAddAtomic(-(v), (SInt32 *)&((a)->counter))
-#define atomic_inc(a) OSIncrementAtomic((SInt32 *)&((a)->counter))
-#define atomic_dec(a) OSDecrementAtomic((SInt32 *)&((a)->counter))
-#else /* !__DARWIN8__ */
-#define atomic_add(v, a) hw_atomic_add((__u32 *)&((a)->counter), v)
-#define atomic_sub(v, a) hw_atomic_sub((__u32 *)&((a)->counter), v)
-#define atomic_inc(a) atomic_add(1, a)
-#define atomic_dec(a) atomic_sub(1, a)
-#endif /* !__DARWIN8__ */
-#define atomic_sub_and_test(v, a) (atomic_sub(v, a) == (v))
-#define atomic_dec_and_test(a) (atomic_dec(a) == 1)
-#define atomic_inc_return(a) (atomic_inc(a) + 1)
-#define atomic_dec_return(a) (atomic_dec(a) - 1)
-
-#include <libsa/mach/mach.h>
-typedef off_t loff_t;
-
-#else /* !__KERNEL__ */
-
-#include <stdint.h>
-
-typedef off_t loff_t;
-
-#endif /* __KERNEL END */
-typedef unsigned short umode_t;
-
-#endif /* __XNU_CFS_TYPES_H__ */
+++ /dev/null
-#ifndef __LIBCFS_DARWIN_UTILS_H__
-#define __LIBCFS_DARWIN_UTILS_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#include <sys/random.h>
-
-#ifdef __KERNEL__
-inline int isspace(char c);
-char *strpbrk(const char *cs, const char *ct);
-char * strsep(char **s, const char *ct);
-size_t strnlen(const char * s, size_t count);
-char * strstr(const char *in, const char *str);
-char * strrchr(const char *p, int ch);
-char * ul2dstr(unsigned long address, char *buf, int len);
-
-#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3)
-#define simple_strtoul(a1, a2, a3) strtoul(a1, a2, a3)
-#define simple_strtoll(a1, a2, a3) strtoq(a1, a2, a3)
-#define simple_strtoull(a1, a2, a3) strtouq(a1, a2, a3)
-
-#define test_bit(i, a) isset(a, i)
-#define set_bit(i, a) setbit(a, i)
-#define clear_bit(i, a) clrbit(a, i)
-
-#define get_random_bytes(buf, len) read_random(buf, len)
-
-#endif /* __KERNEL__ */
-
-#ifndef min_t
-#define min_t(type,x,y) \
- ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
-#endif
-#ifndef max_t
-#define max_t(type,x,y) \
- ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
-#endif
-
-#define do_div(n,base) \
- ({ \
- __u64 __n = (n); \
- __u32 __base = (base); \
- __u32 __mod; \
- \
- __mod = __n % __base; \
- n = __n / __base; \
- __mod; \
- })
-
-#define NIPQUAD(addr) \
- ((unsigned char *)&addr)[0], \
- ((unsigned char *)&addr)[1], \
- ((unsigned char *)&addr)[2], \
- ((unsigned char *)&addr)[3]
-
-#define HIPQUAD NIPQUAD
-
-#ifndef LIST_CIRCLE
-#define LIST_CIRCLE(elm, field) \
- do { \
- (elm)->field.le_prev = &(elm)->field.le_next; \
- } while (0)
-#endif
-
-#endif /* __XNU_UTILS_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LIBCFS_DARWIN_KP30__
-#define __LIBCFS_DARWIN_KP30__
-
-#ifndef __LIBCFS_KP30_H__
-#error Do not #include this file directly. #include <libcfs/kp30.h> instead
-#endif
-
-#ifdef __KERNEL__
-
-#include <sys/types.h>
-#include <sys/malloc.h>
-#include <sys/systm.h>
-#include <mach/mach_types.h>
-#include <string.h>
-#include <sys/file.h>
-#include <sys/conf.h>
-#include <miscfs/devfs/devfs.h>
-#include <stdarg.h>
-
-#include <libcfs/darwin/darwin-lock.h>
-#include <libcfs/darwin/darwin-prim.h>
-#include <lnet/lnet.h>
-
-#define our_cond_resched() cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, 1)
-
-#ifdef CONFIG_SMP
-#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */
-#else
-#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
-#endif
-#define LASSERT_SEM_LOCKED(sem) do {} while(0) /* XXX */
-
-#define LIBCFS_PANIC(msg) panic(msg)
-#error libcfs_register_panic_notifier() missing
-#error libcfs_unregister_panic_notifier() missing
-
-/* --------------------------------------------------------------------- */
-
-#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x)
-#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x)
-
-#define PORTAL_SYMBOL_GET(x) ((typeof(&x))cfs_symbol_get(#x))
-#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x)
-
-#define PORTAL_MODULE_USE do{int i = 0; i++;}while(0)
-#define PORTAL_MODULE_UNUSE do{int i = 0; i--;}while(0)
-
-#define num_online_cpus() cfs_online_cpus()
-
-/******************************************************************************/
-/* XXX Liang: There is no module parameter supporting in OSX */
-#define CFS_MODULE_PARM(name, t, type, perm, desc)
-
-#define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */
-/******************************************************************************/
-
-#else /* !__KERNEL__ */
-# include <stdio.h>
-# include <stdlib.h>
-# include <stdint.h>
-# include <unistd.h>
-# include <time.h>
-# include <machine/limits.h>
-# include <sys/types.h>
-#endif
-
-#define BITS_PER_LONG LONG_BIT
-/******************************************************************************/
-/* Light-weight trace
- * Support for temporary event tracing with minimal Heisenberg effect. */
-#define LWT_SUPPORT 0
-
-typedef struct {
- long long lwte_when;
- char *lwte_where;
- void *lwte_task;
- long lwte_p1;
- long lwte_p2;
- long lwte_p3;
- long lwte_p4;
-} lwt_event_t;
-
-# define LWT_EVENT(p1,p2,p3,p4) /* no lwt implementation yet */
-
-/* -------------------------------------------------------------------------- */
-
-#define IOCTL_LIBCFS_TYPE struct libcfs_ioctl_data
-
-#define LPU64 "%llu"
-#define LPD64 "%lld"
-#define LPX64 "%#llx"
-#define LPSZ "%lu"
-#define LPSSZ "%ld"
-# define LI_POISON ((int)0x5a5a5a5a)
-# define LL_POISON ((long)0x5a5a5a5a)
-# define LP_POISON ((void *)(long)0x5a5a5a5a)
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LIBCFS_DARWIN_LIBCFS_H__
-#define __LIBCFS_DARWIN_LIBCFS_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#include <mach/mach_types.h>
-#include <sys/errno.h>
-#include <string.h>
-#include <libcfs/darwin/darwin-types.h>
-#include <libcfs/darwin/darwin-time.h>
-#include <libcfs/darwin/darwin-prim.h>
-#include <libcfs/darwin/darwin-mem.h>
-#include <libcfs/darwin/darwin-lock.h>
-#include <libcfs/darwin/darwin-fs.h>
-#include <libcfs/darwin/darwin-tcpip.h>
-
-#ifdef __KERNEL__
-# include <sys/types.h>
-# include <sys/time.h>
-# define do_gettimeofday(tv) microuptime(tv)
-#else
-# include <sys/time.h>
-# define do_gettimeofday(tv) gettimeofday(tv, NULL);
-typedef unsigned long long cycles_t;
-#endif
-
-#define __cpu_to_le64(x) OSSwapHostToLittleInt64(x)
-#define __cpu_to_le32(x) OSSwapHostToLittleInt32(x)
-#define __cpu_to_le16(x) OSSwapHostToLittleInt16(x)
-
-#define __le16_to_cpu(x) OSSwapLittleToHostInt16(x)
-#define __le32_to_cpu(x) OSSwapLittleToHostInt32(x)
-#define __le64_to_cpu(x) OSSwapLittleToHostInt64(x)
-
-#define cpu_to_le64(x) __cpu_to_le64(x)
-#define cpu_to_le32(x) __cpu_to_le32(x)
-#define cpu_to_le16(x) __cpu_to_le16(x)
-
-#define le64_to_cpu(x) __le64_to_cpu(x)
-#define le32_to_cpu(x) __le32_to_cpu(x)
-#define le16_to_cpu(x) __le16_to_cpu(x)
-
-#define __swab16(x) OSSwapInt16(x)
-#define __swab32(x) OSSwapInt32(x)
-#define __swab64(x) OSSwapInt64(x)
-#define __swab16s(x) do { *(x) = __swab16(*(x)); } while (0)
-#define __swab32s(x) do { *(x) = __swab32(*(x)); } while (0)
-#define __swab64s(x) do { *(x) = __swab64(*(x)); } while (0)
-
-struct ptldebug_header {
- __u32 ph_len;
- __u32 ph_flags;
- __u32 ph_subsys;
- __u32 ph_mask;
- __u32 ph_cpu_id;
- __u32 ph_sec;
- __u64 ph_usec;
- __u32 ph_stack;
- __u32 ph_pid;
- __u32 ph_extern_pid;
- __u32 ph_line_num;
-} __attribute__((packed));
-
-
-#ifdef __KERNEL__
-# include <sys/systm.h>
-# include <pexpert/pexpert.h>
-/* Fix me */
-# define THREAD_SIZE 8192
-#else
-# define THREAD_SIZE 8192
-#endif
-#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
-
-#define CHECK_STACK() do { } while(0)
-#define CDEBUG_STACK() (0L)
-
-/* Darwin has defined RETURN, so we have to undef it in lustre */
-#ifdef RETURN
-#undef RETURN
-#endif
-
-/*
- * When this is enabled debugging messages are indented according to the
- * current "nesting level". Nesting level in increased when ENTRY macro
- * is executed, and decreased on EXIT and RETURN.
- */
-#ifdef __KERNEL__
-#define ENTRY_NESTING_SUPPORT (0)
-#endif
-
-#if ENTRY_NESTING_SUPPORT
-
-/*
- * Currently ENTRY_NESTING_SUPPORT is only supported for XNU port. Basic
- * idea is to keep per-thread pointer to small data structure (struct
- * cfs_debug_data) describing current nesting level. In XNU unused
- * proc->p_wmegs field in hijacked for this. On Linux
- * current->journal_info can be used. In user space
- * pthread_{g,s}etspecific().
- *
- * ENTRY macro allocates new cfs_debug_data on stack, and installs it as
- * a current nesting level, storing old data in cfs_debug_data it just
- * created.
- *
- * EXIT pops old value back.
- *
- */
-
-/*
- * One problem with this approach is that there is a lot of code that
- * does ENTRY and then escapes scope without doing EXIT/RETURN. In this
- * case per-thread current nesting level pointer is dangling (it points
- * to the stack area that is possible already overridden). To detect
- * such cases, we add two magic fields to the cfs_debug_data and check
- * them whenever current nesting level pointer is dereferenced. While
- * looking flaky this works because stack is always consumed
- * "continously".
- */
-enum {
- CDD_MAGIC1 = 0x02128506,
- CDD_MAGIC2 = 0x42424242
-};
-
-struct cfs_debug_data {
- unsigned int magic1;
- struct cfs_debug_data *parent;
- int nesting_level;
- unsigned int magic2;
-};
-
-void __entry_nesting(struct cfs_debug_data *child);
-void __exit_nesting(struct cfs_debug_data *child);
-unsigned int __current_nesting_level(void);
-
-#define ENTRY_NESTING \
-struct cfs_debug_data __cdd = { .magic1 = CDD_MAGIC1, \
- .parent = NULL, \
- .nesting_level = 0, \
- .magic2 = CDD_MAGIC2 }; \
-__entry_nesting(&__cdd);
-
-#define EXIT_NESTING __exit_nesting(&__cdd)
-
-/* ENTRY_NESTING_SUPPORT */
-#else
-
-#define ENTRY_NESTING do {;} while (0)
-#define EXIT_NESTING do {;} while (0)
-#define __current_nesting_level() (0)
-
-/* ENTRY_NESTING_SUPPORT */
-#endif
-
-#define LUSTRE_LNET_PID 12345
-
-#define _XNU_LIBCFS_H
-
-/*
- * Platform specific declarations for cfs_curproc API (libcfs/curproc.h)
- *
- * Implementation is in darwin-curproc.c
- */
-#define CFS_CURPROC_COMM_MAX MAXCOMLEN
-/*
- * XNU has no capabilities
- */
-typedef int cfs_kernel_cap_t;
-
-#ifdef __KERNEL__
-enum {
- /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */
- CFS_STACK_TRACE_DEPTH = 16
-};
-
-struct cfs_stack_trace {
- void *frame[CFS_STACK_TRACE_DEPTH];
-};
-
-#define printk(format, args...) printf(format, ## args)
-
-#ifdef WITH_WATCHDOG
-#undef WITH_WATCHDOG
-#endif
-
-#endif /* __KERNEL__ */
-
-#endif /* _XNU_LIBCFS_H */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LIBCFS_DARWIN_LLTRACE_H__
-#define __LIBCFS_DARWIN_LLTRACE_H__
-
-#ifndef __LIBCFS_LLTRACE_H__
-#error Do not #include this file directly. #include <libcfs/lltrace.h> instead
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <string.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <lnet/types.h>
-#include <libcfs/kp30.h>
-#include <mach/vm_param.h>
-#include <lnet/lnetctl.h>
-
-#endif
+++ /dev/null
-#ifndef __LIBCFS_DARWIN_PORTALS_UTILS_H__
-#define __LIBCFS_DARWIN_PORTALS_UTILS_H__
-
-#ifndef __LIBCFS_PORTALS_UTILS_H__
-#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead
-#endif
-
-#include <libcfs/list.h>
-#ifdef __KERNEL__
-#include <mach/mach_types.h>
-#include <libcfs/libcfs.h>
-#else /* !__KERNEL__ */
-#include <machine/endian.h>
-#include <netinet/in.h>
-#include <sys/syscall.h>
-#endif /* !__KERNEL__ */
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LIBCFS_KP30_H__
-#define __LIBCFS_KP30_H__
-
-/* Controlled via configure key */
-/* #define LIBCFS_DEBUG */
-
-#include <libcfs/libcfs.h>
-#include <lnet/types.h>
-
-#if defined(__linux__)
-#include <libcfs/linux/kp30.h>
-#elif defined(__APPLE__)
-#include <libcfs/darwin/kp30.h>
-#elif defined(__WINNT__)
-#include <libcfs/winnt/kp30.h>
-#else
-#error Unsupported operating system
-#endif
-
-#ifndef DEBUG_SUBSYSTEM
-# define DEBUG_SUBSYSTEM S_UNDEFINED
-#endif
-
-#ifdef __KERNEL__
-
-#ifdef LIBCFS_DEBUG
-
-/*
- * When this is on, LASSERT macro includes check for assignment used instead
- * of equality check, but doesn't have unlikely(). Turn this on from time to
- * time to make test-builds. This shouldn't be on for production release.
- */
-#define LASSERT_CHECKED (0)
-
-#if LASSERT_CHECKED
-/*
- * Assertion.
- *
- * Strange construction with empty "then" clause is used to trigger compiler
- * warnings on the assertions of the form LASSERT(a = b);
- *
- * "warning: suggest parentheses around assignment used as truth value"
- *
- * requires -Wall. Unfortunately this rules out use of likely/unlikely.
- */
-#define LASSERT(cond) \
-({ \
- if (cond) \
- ; \
- else \
- libcfs_assertion_failed( #cond , __FILE__, \
- __FUNCTION__, __LINE__); \
-})
-
-#define LASSERTF(cond, fmt, a...) \
-({ \
- if (cond) \
- ; \
- else { \
- libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \
- __FILE__, __FUNCTION__,__LINE__, \
- "ASSERTION(" #cond ") failed:" fmt, \
- ## a); \
- LBUG(); \
- } \
-})
-
-/* LASSERT_CHECKED */
-#else
-
-#define LASSERT(cond) \
-({ \
- if (unlikely(!(cond))) \
- libcfs_assertion_failed(#cond , __FILE__, \
- __FUNCTION__, __LINE__); \
-})
-
-#define LASSERTF(cond, fmt, a...) \
-({ \
- if (unlikely(!(cond))) { \
- libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \
- __FILE__, __FUNCTION__,__LINE__, \
- "ASSERTION(" #cond ") failed:" fmt, \
- ## a); \
- LBUG(); \
- } \
-})
-
-/* LASSERT_CHECKED */
-#endif
-
-/* LIBCFS_DEBUG */
-#else
-#define LASSERT(e) ((void)(0))
-#define LASSERTF(cond, fmt...) ((void)(0))
-#endif /* LIBCFS_DEBUG */
-
-#define KLASSERT(e) LASSERT(e)
-
-void lbug_with_loc(char *file, const char *func, const int line)
- __attribute__((noreturn));
-
-#define LBUG() lbug_with_loc(__FILE__, __FUNCTION__, __LINE__)
-
-extern atomic_t libcfs_kmemory;
-/*
- * Memory
- */
-#ifdef LIBCFS_DEBUG
-
-# define libcfs_kmem_inc(ptr, size) \
-do { \
- atomic_add(size, &libcfs_kmemory); \
-} while (0)
-
-# define libcfs_kmem_dec(ptr, size) do { \
- atomic_sub(size, &libcfs_kmemory); \
-} while (0)
-
-#else
-# define libcfs_kmem_inc(ptr, size) do {} while (0)
-# define libcfs_kmem_dec(ptr, size) do {} while (0)
-#endif /* LIBCFS_DEBUG */
-
-#define LIBCFS_VMALLOC_SIZE 16384
-
-#define LIBCFS_ALLOC_GFP(ptr, size, mask) \
-do { \
- LASSERT(!in_interrupt() || \
- (size <= LIBCFS_VMALLOC_SIZE && mask == CFS_ALLOC_ATOMIC));\
- if (unlikely((size) > LIBCFS_VMALLOC_SIZE)) \
- (ptr) = cfs_alloc_large(size); \
- else \
- (ptr) = cfs_alloc((size), (mask)); \
- if (unlikely((ptr) == NULL)) { \
- CERROR("LNET: out of memory at %s:%d (tried to alloc '" \
- #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\
- CERROR("LNET: %d total bytes allocated by lnet\n", \
- atomic_read(&libcfs_kmemory)); \
- } else { \
- libcfs_kmem_inc((ptr), (size)); \
- if (!((mask) & CFS_ALLOC_ZERO)) \
- memset((ptr), 0, (size)); \
- } \
- CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \
- (int)(size), (ptr), atomic_read (&libcfs_kmemory)); \
-} while (0)
-
-#define LIBCFS_ALLOC(ptr, size) \
- LIBCFS_ALLOC_GFP(ptr, size, CFS_ALLOC_IO)
-
-#define LIBCFS_ALLOC_ATOMIC(ptr, size) \
- LIBCFS_ALLOC_GFP(ptr, size, CFS_ALLOC_ATOMIC)
-
-#define LIBCFS_FREE(ptr, size) \
-do { \
- int s = (size); \
- if (unlikely((ptr) == NULL)) { \
- CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \
- "%s:%d\n", s, __FILE__, __LINE__); \
- break; \
- } \
- libcfs_kmem_dec((ptr), s); \
- CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \
- s, (ptr), atomic_read(&libcfs_kmemory)); \
- if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \
- cfs_free_large(ptr); \
- else \
- cfs_free(ptr); \
-} while (0)
-
-/******************************************************************************/
-
-/* htonl hack - either this, or compile with -O2. Stupid byteorder/generic.h */
-#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__)
-#define ___htonl(x) __cpu_to_be32(x)
-#define ___htons(x) __cpu_to_be16(x)
-#define ___ntohl(x) __be32_to_cpu(x)
-#define ___ntohs(x) __be16_to_cpu(x)
-#define htonl(x) ___htonl(x)
-#define ntohl(x) ___ntohl(x)
-#define htons(x) ___htons(x)
-#define ntohs(x) ___ntohs(x)
-#endif
-
-void libcfs_debug_dumpstack(cfs_task_t *tsk);
-void libcfs_run_upcall(char **argv);
-void libcfs_run_lbug_upcall(char * file, const char *fn, const int line);
-void libcfs_debug_dumplog(void);
-int libcfs_debug_init(unsigned long bufsize);
-int libcfs_debug_cleanup(void);
-int libcfs_debug_clear_buffer(void);
-int libcfs_debug_mark_buffer(char *text);
-
-void libcfs_debug_set_level(unsigned int debug_level);
-
-#else /* !__KERNEL__ */
-# ifdef LIBCFS_DEBUG
-# undef NDEBUG
-# include <assert.h>
-# define LASSERT(e) assert(e)
-# define LASSERTF(cond, args...) \
-do { \
- if (!(cond)) \
- CERROR(args); \
- assert(cond); \
-} while (0)
-# define LBUG() assert(0)
-# else
-# define LASSERT(e) ((void)(0))
-# define LASSERTF(cond, args...) do { } while (0)
-# define LBUG() ((void)(0))
-# endif /* LIBCFS_DEBUG */
-# define KLASSERT(e) do { } while (0)
-# define printk(format, args...) printf (format, ## args)
-# ifdef CRAY_XT3 /* buggy calloc! */
-# define LIBCFS_ALLOC(ptr, size) \
- do { \
- (ptr) = malloc(size); \
- memset(ptr, 0, size); \
- } while (0)
-# else
-# define LIBCFS_ALLOC(ptr, size) do { (ptr) = calloc(1,size); } while (0)
-# endif
-# define LIBCFS_FREE(a, b) do { free(a); } while (0)
-
-void libcfs_debug_dumplog(void);
-int libcfs_debug_init(unsigned long bufsize);
-int libcfs_debug_cleanup(void);
-
-/*
- * Generic compiler-dependent macros required for kernel
- * build go below this comment. Actual compiler/compiler version
- * specific implementations come from the above header files
- */
-
-#define likely(x) __builtin_expect(!!(x), 1)
-#define unlikely(x) __builtin_expect(!!(x), 0)
-
-/* !__KERNEL__ */
-#endif
-
-/*
- * compile-time assertions. @cond has to be constant expression.
- * ISO C Standard:
- *
- * 6.8.4.2 The switch statement
- *
- * ....
- *
- * [#3] The expression of each case label shall be an integer
- * constant expression and no two of the case constant
- * expressions in the same switch statement shall have the same
- * value after conversion...
- *
- */
-#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } })
-
-/* support decl needed both by kernel and liblustre */
-int libcfs_isknown_lnd(int type);
-char *libcfs_lnd2modname(int type);
-char *libcfs_lnd2str(int type);
-int libcfs_str2lnd(const char *str);
-char *libcfs_net2str(__u32 net);
-char *libcfs_nid2str(lnet_nid_t nid);
-__u32 libcfs_str2net(const char *str);
-lnet_nid_t libcfs_str2nid(const char *str);
-int libcfs_str2anynid(lnet_nid_t *nid, const char *str);
-char *libcfs_id2str(lnet_process_id_t id);
-void libcfs_setnet0alias(int type);
-
-/* how an LNET NID encodes net:address */
-#define LNET_NIDADDR(nid) ((__u32)((nid) & 0xffffffff))
-#define LNET_NIDNET(nid) ((__u32)(((nid) >> 32)) & 0xffffffff)
-#define LNET_MKNID(net,addr) ((((__u64)(net))<<32)|((__u64)(addr)))
-/* how net encodes type:number */
-#define LNET_NETNUM(net) ((net) & 0xffff)
-#define LNET_NETTYP(net) (((net) >> 16) & 0xffff)
-#define LNET_MKNET(typ,num) ((((__u32)(typ))<<16)|((__u32)(num)))
-
-/* implication */
-#define ergo(a, b) (!(a) || (b))
-/* logical equivalence */
-#define equi(a, b) (!!(a) == !!(b))
-
-#ifndef CURRENT_TIME
-# define CURRENT_TIME time(0)
-#endif
-
-/* --------------------------------------------------------------------
- * Light-weight trace
- * Support for temporary event tracing with minimal Heisenberg effect.
- * All stuff about lwt are put in arch/kp30.h
- * -------------------------------------------------------------------- */
-
-struct libcfs_device_userstate
-{
- int ldu_memhog_pages;
- cfs_page_t *ldu_memhog_root_page;
-};
-
-/* what used to be in portals_lib.h */
-#ifndef MIN
-# define MIN(a,b) (((a)<(b)) ? (a): (b))
-#endif
-#ifndef MAX
-# define MAX(a,b) (((a)>(b)) ? (a): (b))
-#endif
-
-#define MKSTR(ptr) ((ptr))? (ptr) : ""
-
-static inline int size_round4 (int val)
-{
- return (val + 3) & (~0x3);
-}
-
-static inline int size_round (int val)
-{
- return (val + 7) & (~0x7);
-}
-
-static inline int size_round16(int val)
-{
- return (val + 0xf) & (~0xf);
-}
-
-static inline int size_round32(int val)
-{
- return (val + 0x1f) & (~0x1f);
-}
-
-static inline int size_round0(int val)
-{
- if (!val)
- return 0;
- return (val + 1 + 7) & (~0x7);
-}
-
-static inline size_t round_strlen(char *fset)
-{
- return (size_t)size_round((int)strlen(fset) + 1);
-}
-
-#define LOGL(var,len,ptr) \
-do { \
- if (var) \
- memcpy((char *)ptr, (const char *)var, len); \
- ptr += size_round(len); \
-} while (0)
-
-#define LOGU(var,len,ptr) \
-do { \
- if (var) \
- memcpy((char *)var, (const char *)ptr, len); \
- ptr += size_round(len); \
-} while (0)
-
-#define LOGL0(var,len,ptr) \
-do { \
- if (!len) \
- break; \
- memcpy((char *)ptr, (const char *)var, len); \
- *((char *)(ptr) + len) = 0; \
- ptr += size_round(len + 1); \
-} while (0)
-
-/*
- * USER LEVEL STUFF BELOW
- */
-
-#define LIBCFS_IOCTL_VERSION 0x0001000a
-
-struct libcfs_ioctl_data {
- __u32 ioc_len;
- __u32 ioc_version;
-
- __u64 ioc_nid;
- __u64 ioc_u64[1];
-
- __u32 ioc_flags;
- __u32 ioc_count;
- __u32 ioc_net;
- __u32 ioc_u32[7];
-
- __u32 ioc_inllen1;
- char *ioc_inlbuf1;
- __u32 ioc_inllen2;
- char *ioc_inlbuf2;
-
- __u32 ioc_plen1; /* buffers in userspace */
- char *ioc_pbuf1;
- __u32 ioc_plen2; /* buffers in userspace */
- char *ioc_pbuf2;
-
- char ioc_bulk[0];
-};
-
-
-struct libcfs_ioctl_hdr {
- __u32 ioc_len;
- __u32 ioc_version;
-};
-
-struct libcfs_debug_ioctl_data
-{
- struct libcfs_ioctl_hdr hdr;
- unsigned int subs;
- unsigned int debug;
-};
-
-#define LIBCFS_IOC_INIT(data) \
-do { \
- memset(&data, 0, sizeof(data)); \
- data.ioc_version = LIBCFS_IOCTL_VERSION; \
- data.ioc_len = sizeof(data); \
-} while (0)
-
-/* FIXME check conflict with lustre_lib.h */
-#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long)
-
-static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
-{
- int len = sizeof(*data);
- len += size_round(data->ioc_inllen1);
- len += size_round(data->ioc_inllen2);
- return len;
-}
-
-static inline int libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
-{
- if (data->ioc_len > (1<<30)) {
- CERROR ("LIBCFS ioctl: ioc_len larger than 1<<30\n");
- return 1;
- }
- if (data->ioc_inllen1 > (1<<30)) {
- CERROR ("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
- return 1;
- }
- if (data->ioc_inllen2 > (1<<30)) {
- CERROR ("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
- return 1;
- }
- if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
- CERROR ("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
- return 1;
- }
- if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
- CERROR ("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
- return 1;
- }
- if (data->ioc_pbuf1 && !data->ioc_plen1) {
- CERROR ("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
- return 1;
- }
- if (data->ioc_pbuf2 && !data->ioc_plen2) {
- CERROR ("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
- return 1;
- }
- if (data->ioc_plen1 && !data->ioc_pbuf1) {
- CERROR ("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
- return 1;
- }
- if (data->ioc_plen2 && !data->ioc_pbuf2) {
- CERROR ("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
- return 1;
- }
- if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_len ) {
- CERROR ("LIBCFS ioctl: packlen != ioc_len\n");
- return 1;
- }
- if (data->ioc_inllen1 &&
- data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
- CERROR ("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
- return 1;
- }
- if (data->ioc_inllen2 &&
- data->ioc_bulk[size_round(data->ioc_inllen1) +
- data->ioc_inllen2 - 1] != '\0') {
- CERROR ("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
- return 1;
- }
- return 0;
-}
-
-#ifndef __KERNEL__
-static inline int libcfs_ioctl_pack(struct libcfs_ioctl_data *data, char **pbuf,
- int max)
-{
- char *ptr;
- struct libcfs_ioctl_data *overlay;
- data->ioc_len = libcfs_ioctl_packlen(data);
- data->ioc_version = LIBCFS_IOCTL_VERSION;
-
- if (*pbuf && libcfs_ioctl_packlen(data) > max)
- return 1;
- if (*pbuf == NULL) {
- *pbuf = malloc(data->ioc_len);
- }
- if (!*pbuf)
- return 1;
- overlay = (struct libcfs_ioctl_data *)*pbuf;
- memcpy(*pbuf, data, sizeof(*data));
-
- ptr = overlay->ioc_bulk;
- if (data->ioc_inlbuf1)
- LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
- if (data->ioc_inlbuf2)
- LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
- if (libcfs_ioctl_is_invalid(overlay))
- return 1;
-
- return 0;
-}
-
-#else
-
-extern int libcfs_ioctl_getdata(char *buf, char *end, void *arg);
-extern int libcfs_ioctl_popdata(void *arg, void *buf, int size);
-
-#endif
-
-/* ioctls for manipulating snapshots 30- */
-#define IOC_LIBCFS_TYPE 'e'
-#define IOC_LIBCFS_MIN_NR 30
-/* libcfs ioctls */
-#define IOC_LIBCFS_PANIC _IOWR('e', 30, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_LWT_CONTROL _IOWR('e', 33, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_LWT_SNAPSHOT _IOWR('e', 34, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_LWT_LOOKUP_STRING _IOWR('e', 35, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_MEMHOG _IOWR('e', 36, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_PING_TEST _IOWR('e', 37, IOCTL_LIBCFS_TYPE)
-/* lnet ioctls */
-#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_ADD_ROUTE _IOWR('e', 52, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_DEL_ROUTE _IOWR('e', 53, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_ROUTE _IOWR('e', 54, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_LNETST _IOWR('e', 63, IOCTL_LIBCFS_TYPE)
-/* lnd ioctls */
-#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_TXDESC _IOWR('e', 77, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_GMID _IOWR('e', 81, IOCTL_LIBCFS_TYPE)
-
-#define IOC_LIBCFS_MAX_NR 81
-
-
-enum {
- /* Only add to these values (i.e. don't ever change or redefine them):
- * network addresses depend on them... */
- QSWLND = 1,
- SOCKLND = 2,
- GMLND = 3,
- PTLLND = 4,
- O2IBLND = 5,
- CIBLND = 6,
- OPENIBLND = 7,
- IIBLND = 8,
- LOLND = 9,
- RALND = 10,
- VIBLND = 11,
- MXLND = 12,
-};
-
-enum {
- DEBUG_DAEMON_START = 1,
- DEBUG_DAEMON_STOP = 2,
- DEBUG_DAEMON_PAUSE = 3,
- DEBUG_DAEMON_CONTINUE = 4,
-};
-
-
-enum cfg_record_type {
- PORTALS_CFG_TYPE = 1,
- LUSTRE_CFG_TYPE = 123,
-};
-
-typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data);
-
-/* lustre_id output helper macros */
-#define DLID4 "%lu/%lu/%lu/%lu"
-
-#define OLID4(id) \
- (unsigned long)(id)->li_fid.lf_id, \
- (unsigned long)(id)->li_fid.lf_group, \
- (unsigned long)(id)->li_stc.u.e3s.l3s_ino, \
- (unsigned long)(id)->li_stc.u.e3s.l3s_gen
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LIBCFS_LIBCFS_H__
-#define __LIBCFS_LIBCFS_H__
-
-#if !__GNUC__
-#define __attribute__(x)
-#endif
-
-#if defined(__linux__)
-#include <libcfs/linux/libcfs.h>
-#elif defined(__APPLE__)
-#include <libcfs/darwin/libcfs.h>
-#elif defined(__WINNT__)
-#include <libcfs/winnt/libcfs.h>
-#else
-#error Unsupported operating system.
-#endif
-
-#include "curproc.h"
-
-#ifndef __KERNEL__
-#include <stdio.h>
-#endif
-
-/* Controlled via configure key */
-/* #define LIBCFS_DEBUG */
-
-#ifndef offsetof
-# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
-#endif
-
-/* cardinality of array */
-#define sizeof_array(a) ((sizeof (a)) / (sizeof ((a)[0])))
-
-#if !defined(container_of)
-/* given a pointer @ptr to the field @member embedded into type (usually
- * struct) @type, return pointer to the embedding instance of @type. */
-#define container_of(ptr, type, member) \
- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-#endif
-
-#define container_of0(ptr, type, member) \
-({ \
- typeof(ptr) __ptr = (ptr); \
- type *__res; \
- \
- if (unlikely(IS_ERR(__ptr) || __ptr == NULL)) \
- __res = (type *)__ptr; \
- else \
- __res = container_of(__ptr, type, member); \
- __res; \
-})
-
-/*
- * true iff @i is power-of-2
- */
-#define IS_PO2(i) \
-({ \
- typeof(i) __i; \
- \
- __i = (i); \
- !(__i & (__i - 1)); \
-})
-
-#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
-
-/*
- * Debugging
- */
-extern unsigned int libcfs_subsystem_debug;
-extern unsigned int libcfs_stack;
-extern unsigned int libcfs_debug;
-extern unsigned int libcfs_printk;
-extern unsigned int libcfs_console_ratelimit;
-extern cfs_duration_t libcfs_console_max_delay;
-extern cfs_duration_t libcfs_console_min_delay;
-extern unsigned int libcfs_console_backoff;
-extern unsigned int libcfs_debug_binary;
-extern char debug_file_path[1024];
-
-int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys);
-int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys);
-
-/* Has there been an LBUG? */
-extern unsigned int libcfs_catastrophe;
-extern unsigned int libcfs_panic_on_lbug;
-
-/*
- * struct ptldebug_header is defined in libcfs/<os>/libcfs.h
- */
-
-#define PH_FLAG_FIRST_RECORD 1
-
-/* Debugging subsystems (32 bits, non-overlapping) */
-/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
-#define S_UNDEFINED 0x00000001
-#define S_MDC 0x00000002
-#define S_MDS 0x00000004
-#define S_OSC 0x00000008
-#define S_OST 0x00000010
-#define S_CLASS 0x00000020
-#define S_LOG 0x00000040
-#define S_LLITE 0x00000080
-#define S_RPC 0x00000100
-#define S_MGMT 0x00000200
-#define S_LNET 0x00000400
-#define S_LND 0x00000800 /* ALL LNDs */
-#define S_PINGER 0x00001000
-#define S_FILTER 0x00002000
-/* unused */
-#define S_ECHO 0x00008000
-#define S_LDLM 0x00010000
-#define S_LOV 0x00020000
-/* unused */
-/* unused */
-/* unused */
-/* unused */
-/* unused */
-#define S_LMV 0x00800000 /* b_new_cmd */
-/* unused */
-#define S_SEC 0x02000000 /* upcall cache */
-#define S_GSS 0x04000000 /* b_new_cmd */
-/* unused */
-#define S_MGC 0x10000000
-#define S_MGS 0x20000000
-#define S_FID 0x40000000 /* b_new_cmd */
-#define S_FLD 0x80000000 /* b_new_cmd */
-/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
-
-/* Debugging masks (32 bits, non-overlapping) */
-/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
-#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */
-#define D_INODE 0x00000002
-#define D_SUPER 0x00000004
-#define D_EXT2 0x00000008 /* anything from ext2_debug */
-#define D_MALLOC 0x00000010 /* print malloc, free information */
-#define D_CACHE 0x00000020 /* cache-related items */
-#define D_INFO 0x00000040 /* general information */
-#define D_IOCTL 0x00000080 /* ioctl related information */
-#define D_NETERROR 0x00000100 /* network errors */
-#define D_NET 0x00000200 /* network communications */
-#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
-#define D_BUFFS 0x00000800
-#define D_OTHER 0x00001000
-#define D_DENTRY 0x00002000
-#define D_NETTRACE 0x00004000
-#define D_PAGE 0x00008000 /* bulk page handling */
-#define D_DLMTRACE 0x00010000
-#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
-#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
-#define D_HA 0x00080000 /* recovery and failover */
-#define D_RPCTRACE 0x00100000 /* for distributed debugging */
-#define D_VFSTRACE 0x00200000
-#define D_READA 0x00400000 /* read-ahead */
-#define D_MMAP 0x00800000
-#define D_CONFIG 0x01000000
-#define D_CONSOLE 0x02000000
-#define D_QUOTA 0x04000000
-#define D_SEC 0x08000000
-/* keep these in sync with lnet/{utils,libcfs}/debug.c */
-
-#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)
-
-#ifndef DEBUG_SUBSYSTEM
-# define DEBUG_SUBSYSTEM S_UNDEFINED
-#endif
-
-#define CDEBUG_DEFAULT_MAX_DELAY (cfs_time_seconds(600)) /* jiffies */
-#define CDEBUG_DEFAULT_MIN_DELAY ((cfs_time_seconds(1) + 1) / 2) /* jiffies */
-#define CDEBUG_DEFAULT_BACKOFF 2
-typedef struct {
- cfs_time_t cdls_next;
- int cdls_count;
- cfs_duration_t cdls_delay;
-} cfs_debug_limit_state_t;
-
-/* Controlled via configure key */
-/* #define CDEBUG_ENABLED */
-
-#if defined(__KERNEL__) || (defined(__arch_lib__) && !defined(LUSTRE_UTILS))
-
-#ifdef CDEBUG_ENABLED
-#define __CDEBUG(cdls, mask, format, a...) \
-do { \
- CHECK_STACK(); \
- \
- if (((mask) & D_CANTMASK) != 0 || \
- ((libcfs_debug & (mask)) != 0 && \
- (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \
- libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, mask, \
- __FILE__, __FUNCTION__, __LINE__, \
- format, ## a); \
-} while (0)
-
-#define CDEBUG(mask, format, a...) __CDEBUG(NULL, mask, format, ## a)
-
-#define CDEBUG_LIMIT(mask, format, a...) \
-do { \
- static cfs_debug_limit_state_t cdls; \
- \
- __CDEBUG(&cdls, mask, format, ## a); \
-} while (0)
-
-#else /* CDEBUG_ENABLED */
-#define CDEBUG(mask, format, a...) (void)(0)
-#define CDEBUG_LIMIT(mask, format, a...) (void)(0)
-#warning "CDEBUG IS DISABLED. THIS SHOULD NEVER BE DONE FOR PRODUCTION!"
-#endif
-
-#else
-
-#define CDEBUG(mask, format, a...) \
-do { \
- if (((mask) & D_CANTMASK) != 0) \
- fprintf(stderr, "(%s:%d:%s()) " format, \
- __FILE__, __LINE__, __FUNCTION__, ## a); \
-} while (0)
-
-#define CDEBUG_LIMIT CDEBUG
-
-#endif /* !__KERNEL__ */
-
-/*
- * Lustre Error Checksum: calculates checksum
- * of Hex number by XORing each bit.
- */
-#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \
- ((hexnum) >> 8 & 0xf))
-
-#define CWARN(format, a...) CDEBUG_LIMIT(D_WARNING, format, ## a)
-#define CERROR(format, a...) CDEBUG_LIMIT(D_ERROR, format, ## a)
-#define CEMERG(format, a...) CDEBUG_LIMIT(D_EMERG, format, ## a)
-
-#define LCONSOLE(mask, format, a...) CDEBUG(D_CONSOLE | (mask), format, ## a)
-#define LCONSOLE_INFO(format, a...) CDEBUG_LIMIT(D_CONSOLE, format, ## a)
-#define LCONSOLE_WARN(format, a...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## a)
-#define LCONSOLE_ERROR_MSG(errnum, format, a...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \
- "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## a)
-#define LCONSOLE_ERROR(format, a...) LCONSOLE_ERROR_MSG(0x00, format, ## a)
-
-#define LCONSOLE_EMERG(format, a...) CDEBUG(D_CONSOLE | D_EMERG, format, ## a)
-
-#ifdef CDEBUG_ENABLED
-
-#define GOTO(label, rc) \
-do { \
- long GOTO__ret = (long)(rc); \
- CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
- #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
- (signed long)GOTO__ret); \
- goto label; \
-} while (0)
-#else
-#define GOTO(label, rc) do { ((void)(rc)); goto label; } while (0)
-#endif
-
-/* Controlled via configure key */
-/* #define CDEBUG_ENTRY_EXIT */
-
-#ifdef CDEBUG_ENTRY_EXIT
-
-/*
- * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise
- * there will be a warning in osx.
- */
-#define RETURN(rc) \
-do { \
- typeof(rc) RETURN__ret = (rc); \
- CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \
- (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
- EXIT_NESTING; \
- return RETURN__ret; \
-} while (0)
-
-#define ENTRY \
-ENTRY_NESTING; \
-do { \
- CDEBUG(D_TRACE, "Process entered\n"); \
-} while (0)
-
-#define EXIT \
-do { \
- CDEBUG(D_TRACE, "Process leaving\n"); \
- EXIT_NESTING; \
-} while(0)
-#else /* !CDEBUG_ENTRY_EXIT */
-
-#define RETURN(rc) return (rc)
-#define ENTRY do { } while (0)
-#define EXIT do { } while (0)
-
-#endif /* !CDEBUG_ENTRY_EXIT */
-
-/*
- * Some (nomina odiosa sunt) platforms define NULL as naked 0. This confuses
- * Lustre RETURN(NULL) macro.
- */
-#if defined(NULL)
-#undef NULL
-#endif
-
-#define NULL ((void *)0)
-
-#define LUSTRE_SRV_LNET_PID LUSTRE_LNET_PID
-
-#ifdef __KERNEL__
-
-#include <libcfs/list.h>
-
-struct libcfs_ioctl_data; /* forward ref */
-
-struct libcfs_ioctl_handler {
- struct list_head item;
- int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_data *data);
-};
-
-#define DECLARE_IOCTL_HANDLER(ident, func) \
- struct libcfs_ioctl_handler ident = { \
- /* .item = */ CFS_LIST_HEAD_INIT(ident.item), \
- /* .handle_ioctl = */ func \
- }
-
-int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
-int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
-
-/* libcfs tcpip */
-int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
-int libcfs_ipif_enumerate(char ***names);
-void libcfs_ipif_free_enumeration(char **names, int n);
-int libcfs_sock_listen(cfs_socket_t **sockp, __u32 ip, int port, int backlog);
-int libcfs_sock_accept(cfs_socket_t **newsockp, cfs_socket_t *sock);
-void libcfs_sock_abort_accept(cfs_socket_t *sock);
-int libcfs_sock_connect(cfs_socket_t **sockp, int *fatal,
- __u32 local_ip, int local_port,
- __u32 peer_ip, int peer_port);
-int libcfs_sock_setbuf(cfs_socket_t *socket, int txbufsize, int rxbufsize);
-int libcfs_sock_getbuf(cfs_socket_t *socket, int *txbufsize, int *rxbufsize);
-int libcfs_sock_getaddr(cfs_socket_t *socket, int remote, __u32 *ip, int *port);
-int libcfs_sock_write(cfs_socket_t *sock, void *buffer, int nob, int timeout);
-int libcfs_sock_read(cfs_socket_t *sock, void *buffer, int nob, int timeout);
-void libcfs_sock_release(cfs_socket_t *sock);
-
-/* libcfs watchdogs */
-struct lc_watchdog;
-
-/* Add a watchdog which fires after "time" milliseconds of delay. You have to
- * touch it once to enable it. */
-struct lc_watchdog *lc_watchdog_add(int time,
- void (*cb)(pid_t pid, void *),
- void *data);
-
-/* Enables a watchdog and resets its timer. */
-void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms);
-void lc_watchdog_touch(struct lc_watchdog *lcw);
-
-/* Disable a watchdog; touch it to restart it. */
-void lc_watchdog_disable(struct lc_watchdog *lcw);
-
-/* Clean up the watchdog */
-void lc_watchdog_delete(struct lc_watchdog *lcw);
-
-/* Dump a debug log */
-void lc_watchdog_dumplog(pid_t pid, void *data);
-
-/* __KERNEL__ */
-#endif
-
-/* need both kernel and user-land acceptor */
-#define LNET_ACCEPTOR_MIN_RESERVED_PORT 512
-#define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023
-
-/*
- * libcfs pseudo device operations
- *
- * struct cfs_psdev_t and
- * cfs_psdev_register() and
- * cfs_psdev_deregister() are declared in
- * libcfs/<os>/cfs_prim.h
- *
- * It's just draft now.
- */
-
-struct cfs_psdev_file {
- unsigned long off;
- void *private_data;
- unsigned long reserved1;
- unsigned long reserved2;
-};
-
-struct cfs_psdev_ops {
- int (*p_open)(unsigned long, void *);
- int (*p_close)(unsigned long, void *);
- int (*p_read)(struct cfs_psdev_file *, char *, unsigned long);
- int (*p_write)(struct cfs_psdev_file *, char *, unsigned long);
- int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *);
-};
-
-/*
- * generic time manipulation functions.
- */
-
-static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2)
-{
- return cfs_time_before(t2, t1);
-}
-
-static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2)
-{
- return cfs_time_beforeq(t2, t1);
-}
-
-/*
- * return seconds since UNIX epoch
- */
-static inline time_t cfs_unix_seconds(void)
-{
- cfs_fs_time_t t;
-
- cfs_fs_time_current(&t);
- return (time_t)cfs_fs_time_sec(&t);
-}
-
-static inline cfs_time_t cfs_time_shift(int seconds)
-{
- return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds));
-}
-
-static inline long cfs_timeval_sub(struct timeval *large, struct timeval *small,
- struct timeval *result)
-{
- long r = (long) (
- (large->tv_sec - small->tv_sec) * ONE_MILLION +
- (large->tv_usec - small->tv_usec));
- if (result != NULL) {
- result->tv_usec = r % ONE_MILLION;
- result->tv_sec = r / ONE_MILLION;
- }
- return r;
-}
-
-#define CFS_RATELIMIT(seconds) \
-({ \
- /* \
- * XXX nikita: non-portable initializer \
- */ \
- static time_t __next_message = 0; \
- int result; \
- \
- if (cfs_time_after(cfs_time_current(), __next_message)) \
- result = 1; \
- else { \
- __next_message = cfs_time_shift(seconds); \
- result = 0; \
- } \
- result; \
-})
-
-struct libcfs_debug_msg_data {
- cfs_debug_limit_state_t *msg_cdls;
- int msg_subsys;
- const char *msg_file;
- const char *msg_fn;
- int msg_line;
-};
-
-#define DEBUG_MSG_DATA_INIT(cdls, subsystem, file, func, ln ) { \
- .msg_cdls = (cdls), \
- .msg_subsys = (subsystem), \
- .msg_file = (file), \
- .msg_fn = (func), \
- .msg_line = (ln) \
- }
-
-
-extern int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls,
- int subsys, int mask,
- const char *file, const char *fn, const int line,
- const char *format1, va_list args,
- const char *format2, ...)
- __attribute__ ((format (printf, 9, 10)));
-
-#define libcfs_debug_vmsg(cdls, subsys, mask, file, fn, line, format, args) \
- libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,format,args,NULL,NULL)
-
-#define libcfs_debug_msg(cdls, subsys, mask, file, fn, line, format, a...) \
- libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,NULL,NULL,format, ##a)
-
-#define cdebug_va(cdls, mask, file, func, line, fmt, args) do { \
- CHECK_STACK(); \
- \
- if (((mask) & D_CANTMASK) != 0 || \
- ((libcfs_debug & (mask)) != 0 && \
- (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \
- libcfs_debug_vmsg(cdls, DEBUG_SUBSYSTEM, (mask), \
- (file), (func), (line), fmt, args); \
-} while(0);
-
-#define cdebug(cdls, mask, file, func, line, fmt, a...) do { \
- CHECK_STACK(); \
- \
- if (((mask) & D_CANTMASK) != 0 || \
- ((libcfs_debug & (mask)) != 0 && \
- (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \
- libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, (mask), \
- (file), (func), (line), fmt, ## a); \
-} while(0);
-
-extern void libcfs_assertion_failed(const char *expr, const char *file,
- const char *fn, const int line);
-
-static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg)
-{
- if (cfs_time_after(cfs_time_current(),
- cfs_time_add(now, cfs_time_seconds(15))))
- CERROR("slow %s "CFS_TIME_T" sec\n", msg,
- cfs_duration_sec(cfs_time_sub(cfs_time_current(),now)));
-}
-
-/*
- * helper function similar to do_gettimeofday() of Linux kernel
- */
-static inline void cfs_fs_timeval(struct timeval *tv)
-{
- cfs_fs_time_t time;
-
- cfs_fs_time_current(&time);
- cfs_fs_time_usec(&time, tv);
-}
-
-/*
- * return valid time-out based on user supplied one. Currently we only check
- * that time-out is not shorted than allowed.
- */
-static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout)
-{
- if (timeout < CFS_TICK)
- timeout = CFS_TICK;
- return timeout;
-}
-
-/*
- * Universal memory allocator API
- */
-enum cfs_alloc_flags {
- /* allocation is not allowed to block */
- CFS_ALLOC_ATOMIC = 0x1,
- /* allocation is allowed to block */
- CFS_ALLOC_WAIT = 0x2,
- /* allocation should return zeroed memory */
- CFS_ALLOC_ZERO = 0x4,
- /* allocation is allowed to call file-system code to free/clean
- * memory */
- CFS_ALLOC_FS = 0x8,
- /* allocation is allowed to do io to free/clean memory */
- CFS_ALLOC_IO = 0x10,
- /* don't report allocation failure to the console */
- CFS_ALLOC_NOWARN = 0x20,
- /* standard allocator flag combination */
- CFS_ALLOC_STD = CFS_ALLOC_FS | CFS_ALLOC_IO,
- CFS_ALLOC_USER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO,
-};
-
-/* flags for cfs_page_alloc() in addition to enum cfs_alloc_flags */
-enum cfs_alloc_page_flags {
- /* allow to return page beyond KVM. It has to be mapped into KVM by
- * cfs_page_map(); */
- CFS_ALLOC_HIGH = 0x40,
- CFS_ALLOC_HIGHUSER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO | CFS_ALLOC_HIGH,
-};
-
-/*
- * Drop into debugger, if possible. Implementation is provided by platform.
- */
-
-void cfs_enter_debugger(void);
-
-/*
- * Defined by platform
- */
-void cfs_daemonize(char *str);
-int cfs_daemonize_ctxt(char *str);
-cfs_sigset_t cfs_get_blocked_sigs(void);
-cfs_sigset_t cfs_block_allsigs(void);
-cfs_sigset_t cfs_block_sigs(cfs_sigset_t bits);
-void cfs_restore_sigs(cfs_sigset_t);
-int cfs_signal_pending(void);
-void cfs_clear_sigpending(void);
-/*
- * XXX Liang:
- * these macros should be removed in the future,
- * we keep them just for keeping libcfs compatible
- * with other branches.
- */
-#define libcfs_daemonize(s) cfs_daemonize(s)
-#define cfs_sigmask_lock(f) do { f= 0; } while (0)
-#define cfs_sigmask_unlock(f) do { f= 0; } while (0)
-
-int convert_server_error(__u64 ecode);
-int convert_client_oflag(int cflag, int *result);
-
-/*
- * Stack-tracing filling.
- */
-
-/*
- * Platform-dependent data-type to hold stack frames.
- */
-struct cfs_stack_trace;
-
-/*
- * Fill @trace with current back-trace.
- */
-void cfs_stack_trace_fill(struct cfs_stack_trace *trace);
-
-/*
- * Return instruction pointer for frame @frame_no. NULL if @frame_no is
- * invalid.
- */
-void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no);
-
-/*
- * Universal open flags.
- */
-#define CFS_O_ACCMODE 0003
-#define CFS_O_CREAT 0100
-#define CFS_O_EXCL 0200
-#define CFS_O_NOCTTY 0400
-#define CFS_O_TRUNC 01000
-#define CFS_O_APPEND 02000
-#define CFS_O_NONBLOCK 04000
-#define CFS_O_NDELAY CFS_O_NONBLOCK
-#define CFS_O_SYNC 010000
-#define CFS_O_ASYNC 020000
-#define CFS_O_DIRECT 040000
-#define CFS_O_LARGEFILE 0100000
-#define CFS_O_DIRECTORY 0200000
-#define CFS_O_NOFOLLOW 0400000
-#define CFS_O_NOATIME 01000000
-
-/* convert local open flags to universal open flags */
-int cfs_oflags2univ(int flags);
-/* convert universal open flags to local open flags */
-int cfs_univ2oflags(int flags);
-
-#define _LIBCFS_H
-
-#endif /* _LIBCFS_H */
+++ /dev/null
-Makefile.in
-Makefile
+++ /dev/null
-EXTRA_DIST := kp30.h libcfs.h linux-fs.h linux-lock.h linux-mem.h \
- linux-prim.h linux-time.h linux-tcpip.h lltrace.h \
- portals_compat25.h portals_utils.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LIBCFS_LINUX_KP30_H__
-#define __LIBCFS_LINUX_KP30_H__
-
-#ifndef __LIBCFS_KP30_H__
-#error Do not #include this file directly. #include <libcfs/kp30.h> instead
-#endif
-
-#ifdef __KERNEL__
-#ifndef AUTOCONF_INCLUDED
-# include <linux/config.h>
-#endif
-# include <linux/kernel.h>
-# include <linux/mm.h>
-# include <linux/string.h>
-# include <linux/stat.h>
-# include <linux/init.h>
-# include <linux/errno.h>
-# include <linux/unistd.h>
-# include <asm/system.h>
-# include <linux/kmod.h>
-# include <linux/notifier.h>
-# include <linux/fs.h>
-# include <linux/miscdevice.h>
-# include <linux/vmalloc.h>
-# include <linux/time.h>
-# include <linux/slab.h>
-# include <linux/interrupt.h>
-# include <linux/highmem.h>
-# include <linux/module.h>
-# include <linux/version.h>
-# include <lnet/lnet.h>
-# include <linux/smp_lock.h>
-# include <asm/atomic.h>
-# include <asm/uaccess.h>
-# include <linux/rwsem.h>
-# include <linux/proc_fs.h>
-# include <linux/file.h>
-# include <linux/smp.h>
-# include <linux/ctype.h>
-# include <linux/compiler.h>
-# ifdef HAVE_MM_INLINE
-# include <linux/mm_inline.h>
-# endif
-# if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-# include <linux/kallsyms.h>
-# include <linux/moduleparam.h>
-# endif
-
-#include <libcfs/linux/portals_compat25.h>
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define schedule_work schedule_task
-#define prepare_work(wq,cb,cbdata) \
-do { \
- INIT_TQUEUE((wq), 0, 0); \
- PREPARE_TQUEUE((wq), (cb), (cbdata)); \
-} while (0)
-
-#define PageUptodate Page_Uptodate
-#define our_recalc_sigpending(current) recalc_sigpending(current)
-#define num_online_cpus() smp_num_cpus
-static inline void our_cond_resched(void)
-{
- if (current->need_resched)
- schedule ();
-}
-#define work_struct_t struct tq_struct
-#define cfs_get_work_data(type,field,data) (data)
-#else
-
-#ifdef HAVE_3ARGS_INIT_WORK
-
-#define prepare_work(wq,cb,cbdata) \
-do { \
- INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \
-} while (0)
-
-#define cfs_get_work_data(type,field,data) (data)
-
-#else
-
-#define prepare_work(wq,cb,cbdata) \
-do { \
- INIT_WORK((wq), (void *)(cb)); \
-} while (0)
-
-#define cfs_get_work_data(type,field,data) container_of(data,type,field)
-
-#endif
-
-#define wait_on_page wait_on_page_locked
-#define our_recalc_sigpending(current) recalc_sigpending()
-#define strtok(a,b) strpbrk(a, b)
-static inline void our_cond_resched(void)
-{
- cond_resched();
-}
-#define work_struct_t struct work_struct
-
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
-
-#ifdef CONFIG_SMP
-#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock))
-#else
-#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
-#endif
-#define LASSERT_SEM_LOCKED(sem) LASSERT(down_trylock(sem) != 0)
-
-#define LIBCFS_PANIC(msg) panic(msg)
-
-/* ------------------------------------------------------------------- */
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-
-#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x)
-#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x)
-
-#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x))
-#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
-
-#define PORTAL_MODULE_USE MOD_INC_USE_COUNT
-#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT
-#else
-
-#define PORTAL_SYMBOL_REGISTER(x)
-#define PORTAL_SYMBOL_UNREGISTER(x)
-
-#define PORTAL_SYMBOL_GET(x) symbol_get(x)
-#define PORTAL_SYMBOL_PUT(x) symbol_put(x)
-
-#define PORTAL_MODULE_USE try_module_get(THIS_MODULE)
-#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE)
-
-#endif
-
-/******************************************************************************/
-/* Module parameter support */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-# define CFS_MODULE_PARM(name, t, type, perm, desc) \
- MODULE_PARM(name, t);\
- MODULE_PARM_DESC(name, desc)
-
-#else
-# define CFS_MODULE_PARM(name, t, type, perm, desc) \
- module_param(name, type, perm);\
- MODULE_PARM_DESC(name, desc)
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9))
-# define CFS_SYSFS_MODULE_PARM 0 /* no sysfs module parameters */
-#else
-# define CFS_SYSFS_MODULE_PARM 1 /* module parameters accessible via sysfs */
-#endif
-/******************************************************************************/
-
-#if (__GNUC__)
-/* Use the special GNU C __attribute__ hack to have the compiler check the
- * printf style argument string against the actual argument count and
- * types.
- */
-#ifdef printf
-# warning printf has been defined as a macro...
-# undef printf
-#endif
-
-#endif /* __GNUC__ */
-
-# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
-# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
-# define time(a) CURRENT_TIME
-
-#ifndef num_possible_cpus
-#define num_possible_cpus() NR_CPUS
-#endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
-#define i_size_read(a) ((a)->i_size)
-#endif
-
-#else /* !__KERNEL__ */
-# include <stdio.h>
-# include <stdlib.h>
-#if defined(__CYGWIN__)
-# include <cygwin-ioctl.h>
-#else
-# include <stdint.h>
-#endif
-# include <unistd.h>
-# include <time.h>
-# include <limits.h>
-# include <errno.h>
-# include <sys/ioctl.h> /* for _IOWR */
-#ifndef _IOWR
-#include "ioctl.h"
-#endif
-
-# define CFS_MODULE_PARM(name, t, type, perm, desc)
-#define PORTAL_SYMBOL_GET(x) inter_module_get(#x)
-#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
-
-#endif /* End of !__KERNEL__ */
-
-/******************************************************************************/
-/* Light-weight trace
- * Support for temporary event tracing with minimal Heisenberg effect. */
-#define LWT_SUPPORT 0
-
-#define LWT_MEMORY (16<<20)
-
-#ifndef KLWT_SUPPORT
-# if defined(__KERNEL__)
-# if !defined(BITS_PER_LONG)
-# error "BITS_PER_LONG not defined"
-# endif
-# elif !defined(__WORDSIZE)
-# error "__WORDSIZE not defined"
-# else
-# define BITS_PER_LONG __WORDSIZE
-# endif
-
-/* kernel hasn't defined this? */
-typedef struct {
- long long lwte_when;
- char *lwte_where;
- void *lwte_task;
- long lwte_p1;
- long lwte_p2;
- long lwte_p3;
- long lwte_p4;
-# if BITS_PER_LONG > 32
- long lwte_pad;
-# endif
-} lwt_event_t;
-#endif /* !KLWT_SUPPORT */
-
-#if LWT_SUPPORT
-# ifdef __KERNEL__
-# if !KLWT_SUPPORT
-
-typedef struct _lwt_page {
- struct list_head lwtp_list;
- struct page *lwtp_page;
- lwt_event_t *lwtp_events;
-} lwt_page_t;
-
-typedef struct {
- int lwtc_current_index;
- lwt_page_t *lwtc_current_page;
-} lwt_cpu_t;
-
-extern int lwt_enabled;
-extern lwt_cpu_t lwt_cpus[];
-
-/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
- * This stuff is meant for finding specific problems; it never stays in
- * production code... */
-
-#define LWTSTR(n) #n
-#define LWTWHERE(f,l) f ":" LWTSTR(l)
-#define LWT_EVENTS_PER_PAGE (CFS_PAGE_SIZE / sizeof (lwt_event_t))
-
-#define LWT_EVENT(p1, p2, p3, p4) \
-do { \
- unsigned long flags; \
- lwt_cpu_t *cpu; \
- lwt_page_t *p; \
- lwt_event_t *e; \
- \
- if (lwt_enabled) { \
- local_irq_save (flags); \
- \
- cpu = &lwt_cpus[smp_processor_id()]; \
- p = cpu->lwtc_current_page; \
- e = &p->lwtp_events[cpu->lwtc_current_index++]; \
- \
- if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \
- cpu->lwtc_current_page = \
- list_entry (p->lwtp_list.next, \
- lwt_page_t, lwtp_list); \
- cpu->lwtc_current_index = 0; \
- } \
- \
- e->lwte_when = get_cycles(); \
- e->lwte_where = LWTWHERE(__FILE__,__LINE__); \
- e->lwte_task = current; \
- e->lwte_p1 = (long)(p1); \
- e->lwte_p2 = (long)(p2); \
- e->lwte_p3 = (long)(p3); \
- e->lwte_p4 = (long)(p4); \
- \
- local_irq_restore (flags); \
- } \
-} while (0)
-
-#endif /* !KLWT_SUPPORT */
-
-extern int lwt_init (void);
-extern void lwt_fini (void);
-extern int lwt_lookup_string (int *size, char *knlptr,
- char *usrptr, int usrsize);
-extern int lwt_control (int enable, int clear);
-extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
- void *user_ptr, int user_size);
-# else /* __KERNEL__ */
-# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */
-# endif /* __KERNEL__ */
-#endif /* LWT_SUPPORT */
-
-/* ------------------------------------------------------------------ */
-
-#define IOCTL_LIBCFS_TYPE long
-
-#ifdef __CYGWIN__
-# ifndef BITS_PER_LONG
-# if (~0UL) == 0xffffffffUL
-# define BITS_PER_LONG 32
-# else
-# define BITS_PER_LONG 64
-# endif
-# endif
-#endif
-
-#if BITS_PER_LONG > 32
-# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
-# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
-# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
-#else
-# define LI_POISON ((int)0x5a5a5a5a)
-# define LL_POISON ((long)0x5a5a5a5a)
-# define LP_POISON ((void *)(long)0x5a5a5a5a)
-#endif
-
-/* this is a bit chunky */
-
-#if defined(__KERNEL__)
- #define _LWORDSIZE BITS_PER_LONG
-#else
- #define _LWORDSIZE __WORDSIZE
-#endif
-
-#if (defined(__x86_64__) && (defined(__KERNEL__) || defined(CRAY_XT3))) || defined(HAVE_U64_LONG_LONG)
-/* x86_64 defines __u64 as "long" in userspace, but "long long" in the kernel */
-# define LPU64 "%Lu"
-# define LPD64 "%Ld"
-# define LPX64 "%#Lx"
-# define LPF64 "L"
-#elif (_LWORDSIZE == 32)
-# define LPU64 "%Lu"
-# define LPD64 "%Ld"
-# define LPX64 "%#Lx"
-# define LPF64 "L"
-#elif (_LWORDSIZE == 64)
-# define LPU64 "%lu"
-# define LPD64 "%ld"
-# define LPX64 "%#lx"
-# define LPF64 "l"
-#endif
-
-#ifdef HAVE_SIZE_T_LONG
-# define LPSZ "%lu"
-#else
-# define LPSZ "%u"
-#endif
-
-#ifdef HAVE_SSIZE_T_LONG
-# define LPSSZ "%ld"
-#else
-# define LPSSZ "%d"
-#endif
-
-#ifndef LPU64
-# error "No word size defined"
-#endif
-
-#undef _LWORDSIZE
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LIBCFS_LINUX_LIBCFS_H__
-#define __LIBCFS_LINUX_LIBCFS_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef HAVE_ASM_TYPES_H
-#include <asm/types.h>
-#else
-#include <libcfs/types.h>
-#endif
-
-#include <stdarg.h>
-#include <libcfs/linux/linux-time.h>
-#include <libcfs/linux/linux-mem.h>
-#include <libcfs/linux/linux-prim.h>
-#include <libcfs/linux/linux-lock.h>
-#include <libcfs/linux/linux-fs.h>
-#include <libcfs/linux/linux-tcpip.h>
-
-
-#ifdef __KERNEL__
-# include <linux/types.h>
-# include <linux/time.h>
-# include <asm/timex.h>
-#else
-# include <sys/types.h>
-# include <sys/time.h>
-# define do_gettimeofday(tv) gettimeofday(tv, NULL);
-typedef unsigned long long cycles_t;
-#endif
-
-#ifndef __KERNEL__
-/* Userpace byte flipping */
-# include <endian.h>
-# include <byteswap.h>
-# define __swab16(x) bswap_16(x)
-# define __swab32(x) bswap_32(x)
-# define __swab64(x) bswap_64(x)
-# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0)
-# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0)
-# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0)
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define le16_to_cpu(x) (x)
-# define cpu_to_le16(x) (x)
-# define le32_to_cpu(x) (x)
-# define cpu_to_le32(x) (x)
-# define le64_to_cpu(x) (x)
-# define cpu_to_le64(x) (x)
-
-# define be16_to_cpu(x) bswap_16(x)
-# define cpu_to_be16(x) bswap_16(x)
-# define be32_to_cpu(x) bswap_32(x)
-# define cpu_to_be32(x) bswap_32(x)
-# define be64_to_cpu(x) bswap_64(x)
-# define cpu_to_be64(x) bswap_64(x)
-
-# else
-# if __BYTE_ORDER == __BIG_ENDIAN
-# define le16_to_cpu(x) bswap_16(x)
-# define cpu_to_le16(x) bswap_16(x)
-# define le32_to_cpu(x) bswap_32(x)
-# define cpu_to_le32(x) bswap_32(x)
-# define le64_to_cpu(x) bswap_64(x)
-# define cpu_to_le64(x) bswap_64(x)
-
-# define be16_to_cpu(x) (x)
-# define cpu_to_be16(x) (x)
-# define be32_to_cpu(x) (x)
-# define cpu_to_be32(x) (x)
-# define be64_to_cpu(x) (x)
-# define cpu_to_be64(x) (x)
-
-# else
-# error "Unknown byte order"
-# endif /* __BIG_ENDIAN */
-# endif /* __LITTLE_ENDIAN */
-#endif /* ! __KERNEL__ */
-
-struct ptldebug_header {
- __u32 ph_len;
- __u32 ph_flags;
- __u32 ph_subsys;
- __u32 ph_mask;
- __u32 ph_cpu_id;
- __u32 ph_sec;
- __u64 ph_usec;
- __u32 ph_stack;
- __u32 ph_pid;
- __u32 ph_extern_pid;
- __u32 ph_line_num;
-} __attribute__((packed));
-
-#ifdef __KERNEL__
-# include <linux/sched.h> /* THREAD_SIZE */
-#else
-# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
-# define THREAD_SIZE 8192
-# endif
-#endif
-
-#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
-
-#if defined(__KERNEL__) && !defined(__x86_64__)
-# ifdef __ia64__
-# define CDEBUG_STACK() (THREAD_SIZE - \
- ((unsigned long)__builtin_dwarf_cfa() & \
- (THREAD_SIZE - 1)))
-# else
-# define CDEBUG_STACK() (THREAD_SIZE - \
- ((unsigned long)__builtin_frame_address(0) & \
- (THREAD_SIZE - 1)))
-# endif /* __ia64__ */
-
-#define __CHECK_STACK(file, func, line) \
-do { \
- unsigned long _stack = CDEBUG_STACK(); \
- \
- if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) { \
- libcfs_stack = _stack; \
- libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING, \
- file, func, line, \
- "maximum lustre stack %lu\n", _stack); \
- /*panic("LBUG");*/ \
- } \
-} while (0)
-#define CHECK_STACK() __CHECK_STACK(__FILE__, __func__, __LINE__)
-#else /* !__KERNEL__ */
-#define __CHECK_STACK(X, Y, Z) do { } while(0)
-#define CHECK_STACK() do { } while(0)
-#define CDEBUG_STACK() (0L)
-#endif /* __KERNEL__ */
-
-/* initial pid */
-#define LUSTRE_LNET_PID 12345
-
-#define ENTRY_NESTING_SUPPORT (1)
-#define ENTRY_NESTING do {;} while (0)
-#define EXIT_NESTING do {;} while (0)
-#define __current_nesting_level() (0)
-
-/*
- * Platform specific declarations for cfs_curproc API (libcfs/curproc.h)
- *
- * Implementation is in linux-curproc.c
- */
-#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm)
-
-#if defined(__KERNEL__)
-#include <linux/capability.h>
-typedef kernel_cap_t cfs_kernel_cap_t;
-#else
-typedef __u32 cfs_kernel_cap_t;
-#endif
-
-#if defined(__KERNEL__)
-/*
- * No stack-back-tracing in Linux for now.
- */
-struct cfs_stack_trace {
-};
-
-#ifndef WITH_WATCHDOG
-#define WITH_WATCHDOG
-#endif
-
-#endif
-
-#endif /* _LINUX_LIBCFS_H */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_LINUX_CFS_FS_H__
-#define __LIBCFS_LINUX_CFS_FS_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/mount.h>
-#else /* !__KERNEL__ */
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/mount.h>
-#include <mntent.h>
-#endif /* __KERNEL__ */
-
-typedef struct file cfs_file_t;
-typedef struct dentry cfs_dentry_t;
-typedef struct dirent64 cfs_dirent_t;
-
-#ifdef __KERNEL__
-#define cfs_filp_size(f) (i_size_read((f)->f_dentry->d_inode))
-#define cfs_filp_poff(f) (&(f)->f_pos)
-
-/*
- * XXX Do we need to parse flags and mode in cfs_filp_open?
- */
-cfs_file_t *cfs_filp_open (const char *name, int flags, int mode, int *err);
-#define cfs_filp_close(f) filp_close(f, NULL)
-#define cfs_filp_read(fp, buf, size, pos) (fp)->f_op->read((fp), (buf), (size), pos)
-#define cfs_filp_write(fp, buf, size, pos) (fp)->f_op->write((fp), (buf), (size), pos)
-#define cfs_filp_fsync(fp) (fp)->f_op->fsync((fp), (fp)->f_dentry, 1)
-
-#define cfs_get_file(f) get_file(f)
-#define cfs_put_file(f) fput(f)
-#define cfs_file_count(f) file_count(f)
-
-typedef struct file_lock cfs_flock_t;
-#define cfs_flock_type(fl) ((fl)->fl_type)
-#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0)
-#define cfs_flock_pid(fl) ((fl)->fl_pid)
-#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0)
-#define cfs_flock_start(fl) ((fl)->fl_start)
-#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0)
-#define cfs_flock_end(fl) ((fl)->fl_end)
-#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0)
-
-ssize_t cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset);
-
-#endif
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_LINUX_CFS_LOCK_H__
-#define __LIBCFS_LINUX_CFS_LOCK_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-#include <linux/smp_lock.h>
-
-/*
- * IMPORTANT !!!!!!!!
- *
- * All locks' declaration are not guaranteed to be initialized,
- * Althought some of they are initialized in Linux. All locks
- * declared by CFS_DECL_* should be initialized explicitly.
- */
-
-
-/*
- * spin_lock (use Linux kernel's primitives)
- *
- * - spin_lock_init(x)
- * - spin_lock(x)
- * - spin_unlock(x)
- * - spin_trylock(x)
- *
- * - spin_lock_irqsave(x, f)
- * - spin_unlock_irqrestore(x, f)
- */
-
-/*
- * rw_semaphore (use Linux kernel's primitives)
- *
- * - init_rwsem(x)
- * - down_read(x)
- * - up_read(x)
- * - down_write(x)
- * - up_write(x)
- */
-
-/*
- * rwlock_t (use Linux kernel's primitives)
- *
- * - rwlock_init(x)
- * - read_lock(x)
- * - read_unlock(x)
- * - write_lock(x)
- * - write_unlock(x)
- */
-
-/*
- * mutex:
- *
- * - init_mutex(x)
- * - init_mutex_locked(x)
- * - mutex_up(x)
- * - mutex_down(x)
- */
-#define init_mutex(x) init_MUTEX(x)
-#define init_mutex_locked(x) init_MUTEX_LOCKED(x)
-#define mutex_up(x) up(x)
-#define mutex_down(x) down(x)
-#define mutex_down_trylock(x) down_trylock(x)
-
-/*
- * completion (use Linux kernel's primitives)
- *
- * - init_complition(c)
- * - complete(c)
- * - wait_for_completion(c)
- */
-
-/* __KERNEL__ */
-#else
-
-#include "../user-lock.h"
-
-/* __KERNEL__ */
-#endif
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_LINUX_CFS_MEM_H__
-#define __LIBCFS_LINUX_CFS_MEM_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-# include <linux/mm.h>
-# include <linux/vmalloc.h>
-# include <linux/pagemap.h>
-# include <linux/slab.h>
-# ifdef HAVE_MM_INLINE
-# include <linux/mm_inline.h>
-# endif
-
-typedef struct page cfs_page_t;
-#define CFS_PAGE_SIZE PAGE_CACHE_SIZE
-#define CFS_PAGE_SHIFT PAGE_CACHE_SHIFT
-#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1))
-
-static inline void *cfs_page_address(cfs_page_t *page)
-{
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
- return page_address(page);
-}
-
-static inline void *cfs_kmap(cfs_page_t *page)
-{
- return kmap(page);
-}
-
-static inline void cfs_kunmap(cfs_page_t *page)
-{
- kunmap(page);
-}
-
-static inline void cfs_get_page(cfs_page_t *page)
-{
- get_page(page);
-}
-
-static inline int cfs_page_count(cfs_page_t *page)
-{
- return page_count(page);
-}
-
-#define cfs_page_index(p) ((p)->index)
-
-/*
- * Memory allocator
- * XXX Liang: move these declare to public file
- */
-extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
-extern void cfs_free(void *addr);
-
-extern void *cfs_alloc_large(size_t nr_bytes);
-extern void cfs_free_large(void *addr);
-
-extern cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order);
-extern void __cfs_free_pages(cfs_page_t *page, unsigned int order);
-
-#define cfs_alloc_page(flags) cfs_alloc_pages(flags, 0)
-#define __cfs_free_page(page) __cfs_free_pages(page, 0)
-#define cfs_free_page(p) __free_pages(p, 0)
-
-/*
- * In Linux there is no way to determine whether current execution context is
- * blockable.
- */
-#define CFS_ALLOC_ATOMIC_TRY CFS_ALLOC_ATOMIC
-
-/*
- * SLAB allocator
- * XXX Liang: move these declare to public file
- */
-#ifdef HAVE_KMEM_CACHE
-typedef struct kmem_cache cfs_mem_cache_t;
-#else
-typedef kmem_cache_t cfs_mem_cache_t;
-#endif
-extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long);
-extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
-extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
-extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
-
-/*
- */
-#define CFS_DECL_MMSPACE mm_segment_t __oldfs
-#define CFS_MMSPACE_OPEN do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
-#define CFS_MMSPACE_CLOSE set_fs(__oldfs)
-
-#else /* !__KERNEL__ */
-#ifdef HAVE_ASM_PAGE_H
-#include <asm/page.h> /* needed for PAGE_SIZE - rread */
-#endif
-
-#include <libcfs/user-prim.h>
-/* __KERNEL__ */
-#endif
-
-#endif /* __LINUX_CFS_MEM_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_LINUX_CFS_PRIM_H__
-#define __LIBCFS_LINUX_CFS_PRIM_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/version.h>
-#include <linux/proc_fs.h>
-#include <linux/mm.h>
-#include <linux/timer.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-
-#include <linux/miscdevice.h>
-#include <libcfs/linux/portals_compat25.h>
-#include <asm/div64.h>
-
-#include <libcfs/linux/linux-time.h>
-
-/*
- * Pseudo device register
- */
-typedef struct miscdevice cfs_psdev_t;
-#define cfs_psdev_register(dev) misc_register(dev)
-#define cfs_psdev_deregister(dev) misc_deregister(dev)
-
-/*
- * Sysctl register
- */
-typedef struct ctl_table cfs_sysctl_table_t;
-typedef struct ctl_table_header cfs_sysctl_table_header_t;
-
-#ifdef HAVE_2ARGS_REGISTER_SYSCTL
-#define cfs_register_sysctl_table(t, a) register_sysctl_table(t, a)
-#else
-#define cfs_register_sysctl_table(t, a) register_sysctl_table(t)
-#endif
-#define cfs_unregister_sysctl_table(t) unregister_sysctl_table(t)
-
-/*
- * Symbol register
- */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define cfs_symbol_register(s, p) inter_module_register(s, THIS_MODULE, p)
-#define cfs_symbol_unregister(s) inter_module_unregister(s)
-#define cfs_symbol_get(s) inter_module_get(s)
-#define cfs_symbol_put(s) inter_module_put(s)
-#define cfs_module_get() MOD_INC_USE_COUNT
-#define cfs_module_put() MOD_DEC_USE_COUNT
-#else
-#define cfs_symbol_register(s, p) do {} while(0)
-#define cfs_symbol_unregister(s) do {} while(0)
-#define cfs_symbol_get(s) symbol_get(s)
-#define cfs_symbol_put(s) symbol_put(s)
-#define cfs_module_get() try_module_get(THIS_MODULE)
-#define cfs_module_put() module_put(THIS_MODULE)
-#endif
-
-/*
- * Proc file system APIs
- */
-typedef read_proc_t cfs_read_proc_t;
-typedef write_proc_t cfs_write_proc_t;
-typedef struct proc_dir_entry cfs_proc_dir_entry_t;
-#define cfs_create_proc_entry(n, m, p) create_proc_entry(n, m, p)
-#define cfs_free_proc_entry(e) free_proc_entry(e)
-#define cfs_remove_proc_entry(n, e) remove_proc_entry(n, e)
-
-/*
- * Wait Queue
- */
-#define CFS_TASK_INTERRUPTIBLE TASK_INTERRUPTIBLE
-#define CFS_TASK_UNINT TASK_UNINTERRUPTIBLE
-
-typedef wait_queue_t cfs_waitlink_t;
-typedef wait_queue_head_t cfs_waitq_t;
-
-typedef long cfs_task_state_t;
-
-#define cfs_waitq_init(w) init_waitqueue_head(w)
-#define cfs_waitlink_init(l) init_waitqueue_entry(l, current)
-#define cfs_waitq_add(w, l) add_wait_queue(w, l)
-#define cfs_waitq_add_exclusive(w, l) add_wait_queue_exclusive(w, l)
-#define cfs_waitq_forward(l, w) do {} while(0)
-#define cfs_waitq_del(w, l) remove_wait_queue(w, l)
-#define cfs_waitq_active(w) waitqueue_active(w)
-#define cfs_waitq_signal(w) wake_up(w)
-#define cfs_waitq_signal_nr(w,n) wake_up_nr(w, n)
-#define cfs_waitq_broadcast(w) wake_up_all(w)
-#define cfs_waitq_wait(l, s) schedule()
-#define cfs_waitq_timedwait(l, s, t) schedule_timeout(t)
-#define cfs_schedule_timeout(s, t) schedule_timeout(t)
-#define cfs_schedule() schedule()
-
-/* Kernel thread */
-typedef int (*cfs_thread_t)(void *);
-
-static inline int cfs_kernel_thread(int (*fn)(void *),
- void *arg, unsigned long flags)
-{
- void *orig_info = current->journal_info;
- int rc;
-
- current->journal_info = NULL;
- rc = kernel_thread(fn, arg, flags);
- current->journal_info = orig_info;
- return rc;
-}
-
-
-/*
- * Task struct
- */
-typedef struct task_struct cfs_task_t;
-#define cfs_current() current
-#define cfs_task_lock(t) task_lock(t)
-#define cfs_task_unlock(t) task_unlock(t)
-#define CFS_DECL_JOURNAL_DATA void *journal_info
-#define CFS_PUSH_JOURNAL do { \
- journal_info = current->journal_info; \
- current->journal_info = NULL; \
- } while(0)
-#define CFS_POP_JOURNAL do { \
- current->journal_info = journal_info; \
- } while(0)
-
-/* Module interfaces */
-#define cfs_module(name, version, init, fini) \
-module_init(init); \
-module_exit(fini)
-
-/*
- * Signal
- */
-typedef sigset_t cfs_sigset_t;
-
-/*
- * Timer
- */
-typedef struct timer_list cfs_timer_t;
-typedef void (*timer_func_t)(unsigned long);
-
-#define cfs_init_timer(t) init_timer(t)
-
-static inline void cfs_timer_init(cfs_timer_t *t, void (*func)(unsigned long), void *arg)
-{
- init_timer(t);
- t->function = (timer_func_t)func;
- t->data = (unsigned long)arg;
-}
-
-static inline void cfs_timer_done(cfs_timer_t *t)
-{
- return;
-}
-
-static inline void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline)
-{
- mod_timer(t, deadline);
-}
-
-static inline void cfs_timer_disarm(cfs_timer_t *t)
-{
- del_timer(t);
-}
-
-static inline int cfs_timer_is_armed(cfs_timer_t *t)
-{
- return timer_pending(t);
-}
-
-static inline cfs_time_t cfs_timer_deadline(cfs_timer_t *t)
-{
- return t->expires;
-}
-
-
-/* deschedule for a bit... */
-static inline void cfs_pause(cfs_duration_t ticks)
-{
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ticks);
-}
-
-#ifndef wait_event_timeout /* Only for RHEL3 2.4.21 kernel */
-#define __wait_event_timeout(wq, condition, timeout, ret) \
-do { \
- int __ret = 0; \
- if (!(condition)) { \
- wait_queue_t __wait; \
- unsigned long expire; \
- \
- init_waitqueue_entry(&__wait, current); \
- expire = timeout + jiffies; \
- add_wait_queue(&wq, &__wait); \
- for (;;) { \
- set_current_state(TASK_UNINTERRUPTIBLE); \
- if (condition) \
- break; \
- if (jiffies > expire) { \
- ret = jiffies - expire; \
- break; \
- } \
- schedule_timeout(timeout); \
- } \
- current->state = TASK_RUNNING; \
- remove_wait_queue(&wq, &__wait); \
- } \
-} while (0)
-/*
- retval == 0; condition met; we're good.
- retval > 0; timed out.
-*/
-#define cfs_waitq_wait_event_timeout(wq, condition, timeout) \
-({ \
- int __ret = 0; \
- if (!(condition)) \
- __wait_event_timeout(wq, condition, timeout, __ret); \
- __ret; \
-})
-#else
-#define cfs_waitq_wait_event_timeout wait_event_timeout
-#endif
-
-#ifndef wait_event_interruptible_timeout /* Only for RHEL3 2.4.21 kernel */
-#define __wait_event_interruptible_timeout(wq, condition, timeout, ret) \
-do { \
- int __ret = 0; \
- if (!(condition)) { \
- wait_queue_t __wait; \
- unsigned long expire; \
- \
- init_waitqueue_entry(&__wait, current); \
- expire = timeout + jiffies; \
- add_wait_queue(&wq, &__wait); \
- for (;;) { \
- set_current_state(TASK_INTERRUPTIBLE); \
- if (condition) \
- break; \
- if (jiffies > expire) { \
- ret = jiffies - expire; \
- break; \
- } \
- if (!signal_pending(current)) { \
- schedule_timeout(timeout); \
- continue; \
- } \
- ret = -ERESTARTSYS; \
- break; \
- } \
- current->state = TASK_RUNNING; \
- remove_wait_queue(&wq, &__wait); \
- } \
-} while (0)
-
-/*
- retval == 0; condition met; we're good.
- retval < 0; interrupted by signal.
- retval > 0; timed out.
-*/
-#define cfs_waitq_wait_event_interruptible_timeout(wq, condition, timeout) \
-({ \
- int __ret = 0; \
- if (!(condition)) \
- __wait_event_interruptible_timeout(wq, condition, \
- timeout, __ret); \
- __ret; \
-})
-#else
-#define cfs_waitq_wait_event_interruptible_timeout wait_event_interruptible_timeout
-#endif
-
-#else /* !__KERNEL__ */
-
-typedef struct proc_dir_entry cfs_proc_dir_entry_t;
-#include "../user-prim.h"
-
-#endif /* __KERNEL__ */
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_LINUX_CFS_TCP_H__
-#define __LIBCFS_LINUX_CFS_TCP_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-#include <net/sock.h>
-
-typedef struct socket cfs_socket_t;
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72))
-# define sk_allocation allocation
-# define sk_data_ready data_ready
-# define sk_write_space write_space
-# define sk_user_data user_data
-# define sk_prot prot
-# define sk_sndbuf sndbuf
-# define sk_rcvbuf rcvbuf
-# define sk_socket socket
-# define sk_sleep sleep
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
-# define sk_wmem_queued wmem_queued
-# define sk_err err
-# define sk_route_caps route_caps
-#endif
-
-#define SOCK_SNDBUF(so) ((so)->sk->sk_sndbuf)
-#define SOCK_WMEM_QUEUED(so) ((so)->sk->sk_wmem_queued)
-#define SOCK_ERROR(so) ((so)->sk->sk_err)
-#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags)
-
-#else /* !__KERNEL__ */
-
-#include "../user-tcpip.h"
-
-#endif /* __KERNEL__ */
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- * Implementation of portable time API for Linux (kernel and user-level).
- *
- */
-
-#ifndef __LIBCFS_LINUX_LINUX_TIME_H__
-#define __LIBCFS_LINUX_LINUX_TIME_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-/* Portable time API */
-
-/*
- * Platform provides three opaque data-types:
- *
- * cfs_time_t represents point in time. This is internal kernel
- * time rather than "wall clock". This time bears no
- * relation to gettimeofday().
- *
- * cfs_duration_t represents time interval with resolution of internal
- * platform clock
- *
- * cfs_fs_time_t represents instance in world-visible time. This is
- * used in file-system time-stamps
- *
- * cfs_time_t cfs_time_current(void);
- * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t);
- * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t);
- * int cfs_time_before (cfs_time_t, cfs_time_t);
- * int cfs_time_beforeq(cfs_time_t, cfs_time_t);
- *
- * cfs_duration_t cfs_duration_build(int64_t);
- *
- * time_t cfs_duration_sec (cfs_duration_t);
- * void cfs_duration_usec(cfs_duration_t, struct timeval *);
- * void cfs_duration_nsec(cfs_duration_t, struct timespec *);
- *
- * void cfs_fs_time_current(cfs_fs_time_t *);
- * time_t cfs_fs_time_sec (cfs_fs_time_t *);
- * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *);
- * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *);
- * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
- * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
- *
- * CFS_TIME_FORMAT
- * CFS_DURATION_FORMAT
- *
- */
-
-#define ONE_BILLION ((u_int64_t)1000000000)
-#define ONE_MILLION 1000000
-
-#ifdef __KERNEL__
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/version.h>
-#include <linux/time.h>
-#include <asm/div64.h>
-
-#include <libcfs/linux/portals_compat25.h>
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-
-/*
- * old kernels---CURRENT_TIME is struct timeval
- */
-typedef struct timeval cfs_fs_time_t;
-
-static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
-{
- *v = *t;
-}
-
-static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
-{
- s->tv_sec = t->tv_sec;
- s->tv_nsec = t->tv_usec * 1000;
-}
-
-/*
- * internal helper function used by cfs_fs_time_before*()
- */
-static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t)
-{
- return (unsigned long long)t->tv_sec * ONE_MILLION + t->tv_usec;
-}
-
-#define CURRENT_KERN_TIME xtime
-
-#else
-/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */
-
-/*
- * post 2.5 kernels.
- */
-
-#include <linux/jiffies.h>
-
-typedef struct timespec cfs_fs_time_t;
-
-static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
-{
- v->tv_sec = t->tv_sec;
- v->tv_usec = t->tv_nsec / 1000;
-}
-
-static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
-{
- *s = *t;
-}
-
-/*
- * internal helper function used by cfs_fs_time_before*()
- */
-static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t)
-{
- return (unsigned long long)t->tv_sec * ONE_BILLION + t->tv_nsec;
-}
-
-#define CURRENT_KERN_TIME CURRENT_TIME
-
-/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */
-#endif
-
-/*
- * Generic kernel stuff
- */
-
-typedef unsigned long cfs_time_t; /* jiffies */
-typedef long cfs_duration_t;
-
-
-static inline cfs_time_t cfs_time_current(void)
-{
- return jiffies;
-}
-
-static inline time_t cfs_time_current_sec(void)
-{
- return CURRENT_SECONDS;
-}
-
-static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
-{
- return t + d;
-}
-
-static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
-{
- return t1 - t2;
-}
-
-static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
-{
- return time_before(t1, t2);
-}
-
-static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
-{
- return time_before_eq(t1, t2);
-}
-
-static inline void cfs_fs_time_current(cfs_fs_time_t *t)
-{
- *t = CURRENT_KERN_TIME;
-}
-
-static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
-{
- return t->tv_sec;
-}
-
-static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
-{
- return __cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2);
-}
-
-static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
-{
- return __cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2);
-}
-
-#if 0
-static inline cfs_duration_t cfs_duration_build(int64_t nano)
-{
-#if (BITS_PER_LONG == 32)
- /* We cannot use do_div(t, ONE_BILLION), do_div can only process
- * 64 bits n and 32 bits base */
- int64_t t = nano * HZ;
- do_div(t, 1000);
- do_div(t, 1000000);
- return (cfs_duration_t)t;
-#else
- return (nano * HZ / ONE_BILLION);
-#endif
-}
-#endif
-
-static inline cfs_duration_t cfs_time_seconds(int seconds)
-{
- return ((cfs_duration_t)seconds) * HZ;
-}
-
-static inline time_t cfs_duration_sec(cfs_duration_t d)
-{
- return d / HZ;
-}
-
-static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
-{
-#if (BITS_PER_LONG == 32) && (HZ > 4096)
- __u64 t;
-
- s->tv_sec = d / HZ;
- t = (d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION;
- do_div(t, HZ);
- s->tv_usec = t;
-#else
- s->tv_sec = d / HZ;
- s->tv_usec = ((d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION) / HZ;
-#endif
-}
-
-static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
-{
-#if (BITS_PER_LONG == 32)
- __u64 t;
-
- s->tv_sec = d / HZ;
- t = (d - s->tv_sec * HZ) * ONE_BILLION;
- do_div(t, HZ);
- s->tv_nsec = t;
-#else
- s->tv_sec = d / HZ;
- s->tv_nsec = ((d - s->tv_sec * HZ) * ONE_BILLION) / HZ;
-#endif
-}
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-
-#define cfs_time_current_64 get_jiffies_64
-
-static inline __u64 cfs_time_add_64(__u64 t, __u64 d)
-{
- return t + d;
-}
-
-static inline __u64 cfs_time_shift_64(int seconds)
-{
- return cfs_time_add_64(cfs_time_current_64(),
- cfs_time_seconds(seconds));
-}
-
-static inline int cfs_time_before_64(__u64 t1, __u64 t2)
-{
- return (__s64)t2 - (__s64)t1 > 0;
-}
-
-static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2)
-{
- return (__s64)t2 - (__s64)t1 >= 0;
-}
-
-#else
-#define cfs_time_current_64 cfs_time_current
-#define cfs_time_add_64 cfs_time_add
-#define cfs_time_shift_64 cfs_time_shift
-#define cfs_time_before_64 cfs_time_before
-#define cfs_time_beforeq_64 cfs_time_beforeq
-#endif
-
-/*
- * One jiffy
- */
-#define CFS_TICK (1)
-
-#define CFS_TIME_T "%lu"
-#define CFS_DURATION_T "%ld"
-
-#else /* !__KERNEL__ */
-
-/*
- * Liblustre. time(2) based implementation.
- */
-
-#define CFS_TIME_T "%lu"
-
-#include <libcfs/user-time.h>
-
-#endif /* __KERNEL__ */
-
-/* __LIBCFS_LINUX_LINUX_TIME_H__ */
-#endif
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LIBCFS_LINUX_LLTRACE_H__
-#define __LIBCFS_LINUX_LLTRACE_H__
-
-#ifndef __LIBCFS_LLTRACE_H__
-#error Do not #include this file directly. #include <libcfs/lltrace.h> instead
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <string.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <lnet/types.h>
-#include <libcfs/kp30.h>
-#include <lnet/lnetctl.h>
-#include <linux/limits.h>
-#include <asm/page.h>
-#include <linux/version.h>
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__
-#define __LIBCFS_LINUX_PORTALS_COMPAT_H__
-
-// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
-#if defined(SPINLOCK_DEBUG) && SPINLOCK_DEBUG
-# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
-# define SIGNAL_MASK_ASSERT() \
- LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
-# else
-# define SIGNAL_MASK_ASSERT() \
- LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
-# endif
-#else
-# define SIGNAL_MASK_ASSERT()
-#endif
-// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-
-# define SIGNAL_MASK_LOCK(task, flags) \
- spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags) \
- spin_unlock_irqrestore(&task->sighand->siglock, flags)
-# define USERMODEHELPER(path, argv, envp) \
- call_usermodehelper(path, argv, envp, 1)
-# define RECALC_SIGPENDING recalc_sigpending()
-# define CLEAR_SIGPENDING clear_tsk_thread_flag(current, \
- TIF_SIGPENDING)
-# define CURRENT_SECONDS get_seconds()
-# define smp_num_cpus num_online_cpus()
-
-
-#elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */
-
-# define SIGNAL_MASK_LOCK(task, flags) \
- spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags) \
- spin_unlock_irqrestore(&task->sighand->siglock, flags)
-# define USERMODEHELPER(path, argv, envp) \
- call_usermodehelper(path, argv, envp)
-# define RECALC_SIGPENDING recalc_sigpending()
-# define CLEAR_SIGPENDING (current->sigpending = 0)
-# define CURRENT_SECONDS CURRENT_TIME
-# define wait_event_interruptible_exclusive(wq, condition) \
- wait_event_interruptible(wq, condition)
-
-#else /* 2.4.x */
-
-# define SIGNAL_MASK_LOCK(task, flags) \
- spin_lock_irqsave(&task->sigmask_lock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags) \
- spin_unlock_irqrestore(&task->sigmask_lock, flags)
-# define USERMODEHELPER(path, argv, envp) \
- call_usermodehelper(path, argv, envp)
-# define RECALC_SIGPENDING recalc_sigpending(current)
-# define CLEAR_SIGPENDING (current->sigpending = 0)
-# define CURRENT_SECONDS CURRENT_TIME
-# define wait_event_interruptible_exclusive(wq, condition) \
- wait_event_interruptible(wq, condition)
-
-#endif
-
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-#define UML_PID(tsk) ((tsk)->thread.extern_pid)
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define UML_PID(tsk) ((tsk)->thread.mode.tt.extern_pid)
-#else
-#define UML_PID(tsk) ((tsk)->pid)
-#endif
-
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-# define THREAD_NAME(comm, len, fmt, a...) \
- snprintf(comm, len,fmt"|%d", ## a, UML_PID(current))
-#else
-# define THREAD_NAME(comm, len, fmt, a...) \
- snprintf(comm, len, fmt, ## a)
-#endif
-
-#ifdef HAVE_PAGE_LIST
-/* 2.4 alloc_page users can use page->list */
-#define PAGE_LIST_ENTRY list
-#define PAGE_LIST(page) ((page)->list)
-#else
-/* 2.6 alloc_page users can use page->lru */
-#define PAGE_LIST_ENTRY lru
-#define PAGE_LIST(page) ((page)->lru)
-#endif
-
-#ifndef HAVE_CPU_ONLINE
-#define cpu_online(cpu) ((1<<cpu) & (cpu_online_map))
-#endif
-#ifndef HAVE_CPUMASK_T
-typedef unsigned long cpumask_t;
-#define cpu_set(cpu, map) set_bit(cpu, &(map))
-#define cpus_clear(map) memset(&(map), 0, sizeof(cpumask_t))
-#endif
-
-#ifndef __user
-#define __user
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)
-#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \
- proc_dointvec(table, write, filp, buffer, lenp)
-#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \
- proc_dostring(table, write, filp, buffer, lenp)
-#define LL_PROC_PROTO(name) \
- name(cfs_sysctl_table_t *table, int write, struct file *filp, \
- void __user *buffer, size_t *lenp)
-#define DECLARE_LL_PROC_PPOS_DECL loff_t *ppos = &filp->f_pos
-#else
-#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \
- proc_dointvec(table, write, filp, buffer, lenp, ppos);
-#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \
- proc_dostring(table, write, filp, buffer, lenp, ppos);
-#define LL_PROC_PROTO(name) \
- name(cfs_sysctl_table_t *table, int write, struct file *filp, \
- void __user *buffer, size_t *lenp, loff_t *ppos)
-#define DECLARE_LL_PROC_PPOS_DECL
-#endif
-
-#endif /* _PORTALS_COMPAT_H */
+++ /dev/null
-#ifndef __LIBCFS_LINUX_PORTALS_UTILS_H__
-#define __LIBCFS_LINUX_PORTALS_UTILS_H__
-
-#ifndef __LIBCFS_PORTALS_UTILS_H__
-#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead
-#endif
-
-#ifdef __KERNEL__
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
-#include <linux/smp_lock.h>
-#include <linux/poll.h>
-#include <linux/random.h>
-
-#include <asm/unistd.h>
-#include <asm/semaphore.h>
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-# include <linux/tqueue.h>
-#else /* (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) */
-# include <linux/workqueue.h>
-#endif
-#include <libcfs/linux/linux-mem.h>
-#include <libcfs/linux/linux-prim.h>
-#else /* !__KERNEL__ */
-
-#include <endian.h>
-#include <libcfs/list.h>
-
-#ifdef HAVE_LINUX_VERSION_H
-# include <linux/version.h>
-
-# if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-# define BUG() /* workaround for module.h includes */
-# include <linux/module.h>
-# endif
-#endif /* !HAVE_LINUX_VERSION_H */
-
-#ifndef __CYGWIN__
-# include <sys/syscall.h>
-#else /* __CYGWIN__ */
-# include <windows.h>
-# include <windef.h>
-# include <netinet/in.h>
-#endif /* __CYGWIN__ */
-
-#endif /* !__KERNEL__ */
-#endif
+++ /dev/null
-#ifndef __LIBCFS_LIST_H__
-#define __LIBCFS_LIST_H__
-
-#if defined (__linux__) && defined(__KERNEL__)
-
-#include <linux/list.h>
-
-#define CFS_LIST_HEAD_INIT(n) LIST_HEAD_INIT(n)
-#define CFS_LIST_HEAD(n) LIST_HEAD(n)
-#define CFS_INIT_LIST_HEAD(p) INIT_LIST_HEAD(p)
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define CFS_HLIST_HEAD_INIT HLIST_HEAD_INIT
-#define CFS_HLIST_HEAD(n) HLIST_HEAD(n)
-#define CFS_INIT_HLIST_HEAD(p) INIT_HLIST_HEAD(p)
-#define CFS_INIT_HLIST_NODE(p) INIT_HLIST_NODE(p)
-#endif
-
-#else /* !defined (__linux__) || !defined(__KERNEL__) */
-
-/*
- * Simple doubly linked list implementation.
- *
- * Some of the internal functions ("__xxx") are useful when
- * manipulating whole lists rather than single entries, as
- * sometimes we already know the next/prev entries and we can
- * generate better code by using them directly rather than
- * using the generic single-entry routines.
- */
-
-#ifndef __WINNT__
-#define prefetch(a) ((void)a)
-#else
-#define prefetch(a) ((void *)a)
-#endif
-
-struct list_head {
- struct list_head *next, *prev;
-};
-
-typedef struct list_head list_t;
-
-#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define CFS_LIST_HEAD(name) \
- struct list_head name = CFS_LIST_HEAD_INIT(name)
-
-#define CFS_INIT_LIST_HEAD(ptr) do { \
- (ptr)->next = (ptr); (ptr)->prev = (ptr); \
-} while (0)
-
-/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_add(struct list_head * new,
- struct list_head * prev,
- struct list_head * next)
-{
- next->prev = new;
- new->next = next;
- new->prev = prev;
- prev->next = new;
-}
-
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
- __list_add(new, head, head->next);
-}
-
-/**
- * list_add_tail - add a new entry
- * @new: new entry to be added
- * @head: list head to add it before
- *
- * Insert a new entry before the specified head.
- * This is useful for implementing queues.
- */
-static inline void list_add_tail(struct list_head *new, struct list_head *head)
-{
- __list_add(new, head->prev, head);
-}
-
-/*
- * Delete a list entry by making the prev/next entries
- * point to each other.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
- next->prev = prev;
- prev->next = next;
-}
-
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
- */
-static inline void list_del(struct list_head *entry)
-{
- __list_del(entry->prev, entry->next);
-}
-
-/**
- * list_del_init - deletes entry from list and reinitialize it.
- * @entry: the element to delete from the list.
- */
-static inline void list_del_init(struct list_head *entry)
-{
- __list_del(entry->prev, entry->next);
- CFS_INIT_LIST_HEAD(entry);
-}
-
-/**
- * list_move - delete from one list and add as another's head
- * @list: the entry to move
- * @head: the head that will precede our entry
- *
- * This is not safe to use if @list is already on the same list as @head.
- */
-static inline void list_move(struct list_head *list, struct list_head *head)
-{
- __list_del(list->prev, list->next);
- list_add(list, head);
-}
-
-/**
- * list_move_tail - delete from one list and add as another's tail
- * @list: the entry to move
- * @head: the head that will follow our entry
- *
- * This is not safe to use if @list is already on the same list as @head.
- */
-static inline void list_move_tail(struct list_head *list,
- struct list_head *head)
-{
- __list_del(list->prev, list->next);
- list_add_tail(list, head);
-}
-
-/**
- * list_empty - tests whether a list is empty
- * @head: the list to test.
- */
-static inline int list_empty(struct list_head *head)
-{
- return head->next == head;
-}
-
-static inline void __list_splice(struct list_head *list,
- struct list_head *head)
-{
- struct list_head *first = list->next;
- struct list_head *last = list->prev;
- struct list_head *at = head->next;
-
- first->prev = head;
- head->next = first;
-
- last->next = at;
- at->prev = last;
-}
-
-/**
- * list_splice - join two lists
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice(struct list_head *list, struct list_head *head)
-{
- if (!list_empty(list))
- __list_splice(list, head);
-}
-
-/**
- * list_splice_init - join two lists and reinitialise the emptied list.
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * The list at @list is reinitialised
- */
-static inline void list_splice_init(struct list_head *list,
- struct list_head *head)
-{
- if (!list_empty(list)) {
- __list_splice(list, head);
- CFS_INIT_LIST_HEAD(list);
- }
-}
-
-/**
- * list_entry - get the struct for this entry
- * @ptr: the &struct list_head pointer.
- * @type: the type of the struct this is embedded in.
- * @member: the name of the list_struct within the struct.
- */
-#define list_entry(ptr, type, member) \
- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-
-/**
- * list_for_each - iterate over a list
- * @pos: the &struct list_head to use as a loop counter.
- * @head: the head for your list.
- */
-#define list_for_each(pos, head) \
- for (pos = (head)->next, prefetch(pos->next); pos != (head); \
- pos = pos->next, prefetch(pos->next))
-
-/**
- * list_for_each_safe - iterate over a list safe against removal of list entry
- * @pos: the &struct list_head to use as a loop counter.
- * @n: another &struct list_head to use as temporary storage
- * @head: the head for your list.
- */
-#define list_for_each_safe(pos, n, head) \
- for (pos = (head)->next, n = pos->next; pos != (head); \
- pos = n, n = pos->next)
-
-/*
- * Double linked lists with a single pointer list head.
- * Mostly useful for hash tables where the two pointer list head is
- * too wasteful.
- * You lose the ability to access the tail in O(1).
- */
-
-struct hlist_head {
- struct hlist_node *first;
-};
-
-struct hlist_node {
- struct hlist_node *next, **pprev;
-};
-
-/*
- * "NULL" might not be defined at this point
- */
-#ifdef NULL
-#define NULL_P NULL
-#else
-#define NULL_P ((void *)0)
-#endif
-
-#define CFS_HLIST_HEAD_INIT { .first = NULL_P }
-#define CFS_HLIST_HEAD(name) struct hlist_head name = { .first = NULL_P }
-#define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P)
-#define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P)
-
-#define HLIST_HEAD_INIT CFS_HLIST_HEAD_INIT
-#define HLIST_HEAD(n) CFS_HLIST_HEAD(n)
-#define INIT_HLIST_HEAD(p) CFS_INIT_HLIST_HEAD(p)
-#define INIT_HLIST_NODE(p) CFS_INIT_HLIST_NODE(p)
-
-static inline int hlist_unhashed(const struct hlist_node *h)
-{
- return !h->pprev;
-}
-
-static inline int hlist_empty(const struct hlist_head *h)
-{
- return !h->first;
-}
-
-static inline void __hlist_del(struct hlist_node *n)
-{
- struct hlist_node *next = n->next;
- struct hlist_node **pprev = n->pprev;
- *pprev = next;
- if (next)
- next->pprev = pprev;
-}
-
-static inline void hlist_del(struct hlist_node *n)
-{
- __hlist_del(n);
-}
-
-static inline void hlist_del_init(struct hlist_node *n)
-{
- if (n->pprev) {
- __hlist_del(n);
- INIT_HLIST_NODE(n);
- }
-}
-
-static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
-{
- struct hlist_node *first = h->first;
- n->next = first;
- if (first)
- first->pprev = &n->next;
- h->first = n;
- n->pprev = &h->first;
-}
-
-/* next must be != NULL */
-static inline void hlist_add_before(struct hlist_node *n,
- struct hlist_node *next)
-{
- n->pprev = next->pprev;
- n->next = next;
- next->pprev = &n->next;
- *(n->pprev) = n;
-}
-
-static inline void hlist_add_after(struct hlist_node *n,
- struct hlist_node *next)
-{
- next->next = n->next;
- n->next = next;
- next->pprev = &n->next;
-
- if(next->next)
- next->next->pprev = &next->next;
-}
-
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
-
-#define hlist_for_each(pos, head) \
- for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
- pos = pos->next)
-
-#define hlist_for_each_safe(pos, n, head) \
- for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
- pos = n)
-
-/**
- * hlist_for_each_entry - iterate over list of given type
- * @tpos: the type * to use as a loop counter.
- * @pos: the &struct hlist_node to use as a loop counter.
- * @head: the head for your list.
- * @member: the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry(tpos, pos, head, member) \
- for (pos = (head)->first; \
- pos && ({ prefetch(pos->next); 1;}) && \
- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
- pos = pos->next)
-
-/**
- * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point
- * @tpos: the type * to use as a loop counter.
- * @pos: the &struct hlist_node to use as a loop counter.
- * @member: the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_continue(tpos, pos, member) \
- for (pos = (pos)->next; \
- pos && ({ prefetch(pos->next); 1;}) && \
- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
- pos = pos->next)
-
-/**
- * hlist_for_each_entry_from - iterate over a hlist continuing from existing point
- * @tpos: the type * to use as a loop counter.
- * @pos: the &struct hlist_node to use as a loop counter.
- * @member: the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_from(tpos, pos, member) \
- for (; pos && ({ prefetch(pos->next); 1;}) && \
- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
- pos = pos->next)
-
-/**
- * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @tpos: the type * to use as a loop counter.
- * @pos: the &struct hlist_node to use as a loop counter.
- * @n: another &struct hlist_node to use as temporary storage
- * @head: the head for your list.
- * @member: the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \
- for (pos = (head)->first; \
- pos && ({ n = pos->next; 1; }) && \
- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
- pos = n)
-
-#endif /* __linux__ && __KERNEL__ */
-
-#ifndef list_for_each_prev
-/**
- * list_for_each_prev - iterate over a list in reverse order
- * @pos: the &struct list_head to use as a loop counter.
- * @head: the head for your list.
- */
-#define list_for_each_prev(pos, head) \
- for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
- pos = pos->prev, prefetch(pos->prev))
-
-#endif /* list_for_each_prev */
-
-#ifndef list_for_each_entry
-/**
- * list_for_each_entry - iterate over list of given type
- * @pos: the type * to use as a loop counter.
- * @head: the head for your list.
- * @member: the name of the list_struct within the struct.
- */
-#define list_for_each_entry(pos, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member), \
- prefetch(pos->member.next); \
- &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member), \
- prefetch(pos->member.next))
-#endif /* list_for_each_entry */
-
-#ifndef list_for_each_entry_reverse
-/**
- * list_for_each_entry_reverse - iterate backwards over list of given type.
- * @pos: the type * to use as a loop counter.
- * @head: the head for your list.
- * @member: the name of the list_struct within the struct.
- */
-#define list_for_each_entry_reverse(pos, head, member) \
- for (pos = list_entry((head)->prev, typeof(*pos), member); \
- prefetch(pos->member.prev), &pos->member != (head); \
- pos = list_entry(pos->member.prev, typeof(*pos), member))
-#endif /* list_for_each_entry_reverse */
-
-#ifndef list_for_each_entry_safe
-/**
- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @pos: the type * to use as a loop counter.
- * @n: another type * to use as temporary storage
- * @head: the head for your list.
- * @member: the name of the list_struct within the struct.
- */
-#define list_for_each_entry_safe(pos, n, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member), \
- n = list_entry(pos->member.next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = n, n = list_entry(n->member.next, typeof(*n), member))
-#endif /* list_for_each_entry_safe */
-
-#endif /* __LIBCFS_LUSTRE_LIST_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Compile with:
- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
- */
-#ifndef __LIBCFS_LLTRACE_H__
-#define __LIBCFS_LLTRACE_H__
-
-#if defined(__linux__)
-#include <libcfs/linux/lltrace.h>
-#elif defined(__APPLE__)
-#include <libcfs/darwin/lltrace.h>
-#elif defined(__WINNT__)
-#include <libcfs/winnt/lltrace.h>
-#else
-#error Unsupported Operating System
-#endif
-
-static inline int ltrace_write_file(char* fname)
-{
- char* argv[3];
-
- argv[0] = "debug_kernel";
- argv[1] = fname;
- argv[2] = "1";
-
- fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
-
- return jt_dbg_debug_kernel(3, argv);
-}
-
-static inline int ltrace_clear()
-{
- char* argv[1];
-
- argv[0] = "clear";
-
- fprintf(stderr, "[ptlctl] %s\n", argv[0]);
-
- return jt_dbg_clear_debug_buf(1, argv);
-}
-
-static inline int ltrace_mark(int indent_level, char* text)
-{
- char* argv[2];
- char mark_buf[PATH_MAX];
-
- snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
-
- argv[0] = "mark";
- argv[1] = mark_buf;
- return jt_dbg_mark_debug_buf(2, argv);
-}
-
-static inline int ltrace_applymasks()
-{
- char* argv[2];
- argv[0] = "list";
- argv[1] = "applymasks";
-
- fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
-
- return jt_dbg_list(2, argv);
-}
-
-
-static inline int ltrace_filter(char* subsys_or_mask)
-{
- char* argv[2];
- argv[0] = "filter";
- argv[1] = subsys_or_mask;
- return jt_dbg_filter(2, argv);
-}
-
-static inline int ltrace_show(char* subsys_or_mask)
-{
- char* argv[2];
- argv[0] = "show";
- argv[1] = subsys_or_mask;
- return jt_dbg_show(2, argv);
-}
-
-static inline int ltrace_start()
-{
- int rc = 0;
- dbg_initialize(0, NULL);
-#ifdef LNET_DEV_ID
- rc = register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH,
- LNET_DEV_MAJOR, LNET_DEV_MINOR);
-#endif
- ltrace_filter("class");
- ltrace_filter("nal");
- ltrace_filter("portals");
-
- ltrace_show("all_types");
- ltrace_filter("trace");
- ltrace_filter("malloc");
- ltrace_filter("net");
- ltrace_filter("page");
- ltrace_filter("other");
- ltrace_filter("info");
- ltrace_applymasks();
-
- return rc;
-}
-
-
-static inline void ltrace_stop()
-{
-#ifdef LNET_DEV_ID
- unregister_ioc_dev(LNET_DEV_ID);
-#endif
-}
-
-static inline int not_uml()
-{
- /* Return Values:
- * 0 when run under UML
- * 1 when run on host
- * <0 when lookup failed
- */
- struct stat buf;
- int rc = stat("/dev/ubd", &buf);
- rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc;
- if (rc<0) {
- fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno));
- rc = 1; /* Assume host */
- }
- return rc;
-}
-
-#define LTRACE_MAX_NOB 256
-static inline void ltrace_add_processnames(char* fname)
-{
- char cmdbuf[LTRACE_MAX_NOB];
- struct timeval tv;
- struct timezone tz;
- int nob;
- int underuml = !not_uml();
-
- gettimeofday(&tv, &tz);
-
- nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
-
- /* Careful - these format strings need to match the CDEBUG
- * formats in portals/linux/debug.c EXACTLY
- */
- nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ",
- S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec);
-
- if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) {
- nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
- "(%s:%d:%s() %d | %d+%lu): ",
- "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L);
- }
- else {
- nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
- "(%s:%d:%s() %d+%lu): ",
- "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
- }
-
- nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
- system(cmdbuf);
-}
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LIBCFS_PORTALS_UTILS_H__
-#define __LIBCFS_PORTALS_UTILS_H__
-
-/*
- * portals_utils.h
- *
- */
-#if defined(__linux__)
-#include <libcfs/linux/portals_utils.h>
-#elif defined(__APPLE__)
-#include <libcfs/darwin/portals_utils.h>
-#elif defined(__WINNT__)
-#include <libcfs/winnt/portals_utils.h>
-#else
-#error Unsupported Operating System
-#endif
-
-#endif
+++ /dev/null
-#ifndef _LIBCFS_TYPES_H
-#define _LIBCFS_TYPES_H
-
-/*
- * This file was inttroduced to resolve XT3 (Catamount) build issues.
- * The orignal idea was to move <lustre/types.h> here however at
- * the time of this writing
- * it's unclear what external dependencies are tied
- * to that file (It's not just some source file #including it)
- * there is some build/packaging infrastructure that includes it.
- * Hopefully that will be resolved shortly, that file will
- * be removed, its contents copied here and this comment can be deleted.
- */
-
-#include <lustre/types.h>
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- * Implementation of portable time API for user-level.
- *
- */
-
-#ifndef __LIBCFS_USER_BITOPS_H__
-#define __LIBCFS_USER_BITOPS_H__
-
-/* test if bit nr is set in bitmap addr; returns previous value of bit nr */
-static __inline__ int set_bit(int nr, unsigned long * addr)
-{
- long mask;
-
- addr += nr / BITS_PER_LONG;
- mask = 1UL << (nr & (BITS_PER_LONG - 1));
- nr = (mask & *addr) != 0;
- *addr |= mask;
- return nr;
-}
-
-/* clear bit nr in bitmap addr; returns previous value of bit nr*/
-static __inline__ int clear_bit(int nr, unsigned long * addr)
-{
- long mask;
-
- addr += nr / BITS_PER_LONG;
- mask = 1UL << (nr & (BITS_PER_LONG - 1));
- nr = (mask & *addr) != 0;
- *addr &= ~mask;
- return nr;
-}
-
-static __inline__ int test_bit(int nr, const unsigned long * addr)
-{
- return ((1UL << (nr & (BITS_PER_LONG - 1))) & ((addr)[nr / BITS_PER_LONG])) != 0;
-}
-
-/* using binary seach */
-static __inline__ unsigned long __ffs(long data)
-{
- int pos = 0;
-
-#if BITS_PER_LONG == 64
- if ((data & 0xFFFFFFFF) == 0) {
- pos += 32;
- data >>= 32;
- }
-#endif
- if ((data & 0xFFFF) == 0) {
- pos += 16;
- data >>= 16;
- }
- if ((data & 0xFF) == 0) {
- pos += 8;
- data >>= 8;
- }
- if ((data & 0xF) == 0) {
- pos += 4;
- data >>= 4;
- }
- if ((data & 0x3) == 0) {
- pos += 2;
- data >>= 2;
- }
- if ((data & 0x1) == 0)
- pos += 1;
-
- return pos;
-}
-
-#define __ffz(x) __ffs(~(x))
-
-unsigned long find_next_bit(unsigned long *addr,
- unsigned long size, unsigned long offset);
-
-unsigned long find_next_zero_bit(unsigned long *addr,
- unsigned long size, unsigned long offset);
-
-#define find_first_bit(addr,size) (find_next_bit((addr),(size),0))
-#define find_first_zero_bit(addr,size) (find_next_zero_bit((addr),(size),0))
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- * Implementation of portable time API for user-level.
- *
- */
-
-#ifndef __LIBCFS_USER_LOCK_H__
-#define __LIBCFS_USER_LOCK_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-/* Implementations of portable synchronization APIs for liblustre */
-
-/*
- * liblustre is single-threaded, so most "synchronization" APIs are trivial.
- *
- * XXX Liang: There are several branches share lnet with b_hd_newconfig,
- * if we define lock APIs at here, there will be conflict with liblustre
- * in other branches.
- */
-
-#ifndef __KERNEL__
-#include <stdio.h>
-#include <stdlib.h>
-
-#if 0
-/*
- * Optional debugging (magic stamping and checking ownership) can be added.
- */
-
-/*
- * spin_lock
- *
- * - spin_lock_init(x)
- * - spin_lock(x)
- * - spin_unlock(x)
- * - spin_trylock(x)
- *
- * - spin_lock_irqsave(x, f)
- * - spin_unlock_irqrestore(x, f)
- *
- * No-op implementation.
- */
-struct spin_lock {int foo;};
-
-typedef struct spin_lock spinlock_t;
-
-#define SPIN_LOCK_UNLOCKED (spinlock_t) { }
-#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
-
-void spin_lock_init(spinlock_t *lock);
-void spin_lock(spinlock_t *lock);
-void spin_unlock(spinlock_t *lock);
-int spin_trylock(spinlock_t *lock);
-void spin_lock_bh_init(spinlock_t *lock);
-void spin_lock_bh(spinlock_t *lock);
-void spin_unlock_bh(spinlock_t *lock);
-static inline int spin_is_locked(spinlock_t *l) {return 1;}
-
-static inline void spin_lock_irqsave(spinlock_t *l, unsigned long f){}
-static inline void spin_unlock_irqrestore(spinlock_t *l, unsigned long f){}
-
-/*
- * Semaphore
- *
- * - sema_init(x, v)
- * - __down(x)
- * - __up(x)
- */
-typedef struct semaphore {
- int foo;
-} mutex_t;
-
-void sema_init(struct semaphore *s, int val);
-void __down(struct semaphore *s);
-void __up(struct semaphore *s);
-
-/*
- * Mutex:
- *
- * - init_mutex(x)
- * - init_mutex_locked(x)
- * - mutex_up(x)
- * - mutex_down(x)
- */
-#define mutex_up(s) __up(s)
-#define mutex_down(s) __down(s)
-
-#define init_mutex(x) sema_init(x, 1)
-#define init_mutex_locked(x) sema_init(x, 0)
-
-/*
- * Completion:
- *
- * - init_completion(c)
- * - complete(c)
- * - wait_for_completion(c)
- */
-#if 0
-struct completion {};
-
-void init_completion(struct completion *c);
-void complete(struct completion *c);
-void wait_for_completion(struct completion *c);
-#endif
-
-/*
- * rw_semaphore:
- *
- * - init_rwsem(x)
- * - down_read(x)
- * - up_read(x)
- * - down_write(x)
- * - up_write(x)
- */
-struct rw_semaphore {};
-
-void init_rwsem(struct rw_semaphore *s);
-void down_read(struct rw_semaphore *s);
-int down_read_trylock(struct rw_semaphore *s);
-void down_write(struct rw_semaphore *s);
-int down_write_trylock(struct rw_semaphore *s);
-void up_read(struct rw_semaphore *s);
-void up_write(struct rw_semaphore *s);
-
-/*
- * read-write lock : Need to be investigated more!!
- * XXX nikita: for now, let rwlock_t to be identical to rw_semaphore
- *
- * - DECLARE_RWLOCK(l)
- * - rwlock_init(x)
- * - read_lock(x)
- * - read_unlock(x)
- * - write_lock(x)
- * - write_unlock(x)
- */
-typedef struct rw_semaphore rwlock_t;
-
-#define rwlock_init(pl) init_rwsem(pl)
-
-#define read_lock(l) down_read(l)
-#define read_unlock(l) up_read(l)
-#define write_lock(l) down_write(l)
-#define write_unlock(l) up_write(l)
-
-static inline void
-write_lock_irqsave(rwlock_t *l, unsigned long f) { write_lock(l); }
-static inline void
-write_unlock_irqrestore(rwlock_t *l, unsigned long f) { write_unlock(l); }
-
-static inline void
-read_lock_irqsave(rwlock_t *l, unsigned long f) { read_lock(l); }
-static inline void
-read_unlock_irqrestore(rwlock_t *l, unsigned long f) { read_unlock(l); }
-
-/*
- * Atomic for user-space
- * Copied from liblustre
- */
-typedef struct { volatile int counter; } atomic_t;
-
-#define ATOMIC_INIT(i) { (i) }
-#define atomic_read(a) ((a)->counter)
-#define atomic_set(a,b) do {(a)->counter = b; } while (0)
-#define atomic_dec_and_test(a) ((--((a)->counter)) == 0)
-#define atomic_inc(a) (((a)->counter)++)
-#define atomic_dec(a) do { (a)->counter--; } while (0)
-#define atomic_add(b,a) do {(a)->counter += b;} while (0)
-#define atomic_add_return(n,a) ((a)->counter = n)
-#define atomic_inc_return(a) atomic_add_return(1,a)
-#define atomic_sub(b,a) do {(a)->counter -= b;} while (0)
-
-#endif
-
-#ifdef HAVE_LIBPTHREAD
-#include <pthread.h>
-
-/*
- * Completion
- */
-
-struct cfs_completion {
- int c_done;
- pthread_cond_t c_cond;
- pthread_mutex_t c_mut;
-};
-
-void cfs_init_completion(struct cfs_completion *c);
-void cfs_fini_completion(struct cfs_completion *c);
-void cfs_complete(struct cfs_completion *c);
-void cfs_wait_for_completion(struct cfs_completion *c);
-
-/*
- * atomic.h
- */
-
-typedef struct { volatile int counter; } cfs_atomic_t;
-
-int cfs_atomic_read(cfs_atomic_t *a);
-void cfs_atomic_set(cfs_atomic_t *a, int b);
-int cfs_atomic_dec_and_test(cfs_atomic_t *a);
-void cfs_atomic_inc(cfs_atomic_t *a);
-void cfs_atomic_dec(cfs_atomic_t *a);
-void cfs_atomic_add(int b, cfs_atomic_t *a);
-void cfs_atomic_sub(int b, cfs_atomic_t *a);
-
-#endif /* HAVE_LIBPTHREAD */
-
-/* !__KERNEL__ */
-#endif
-
-/* __LIBCFS_USER_LOCK_H__ */
-#endif
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- * Implementation of portable time API for user-level.
- *
- */
-
-#ifndef __LIBCFS_USER_PRIM_H__
-#define __LIBCFS_USER_PRIM_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-/* Implementations of portable APIs for liblustre */
-
-/*
- * liblustre is single-threaded, so most "synchronization" APIs are trivial.
- */
-
-#ifndef __KERNEL__
-
-#include <stdlib.h>
-#include <string.h>
-#include <sys/signal.h>
-#include <sys/mman.h>
-#include <libcfs/list.h>
-#include <libcfs/user-time.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#ifdef HAVE_LIBPTHREAD
-#include <pthread.h>
-#endif
-
-
-/*
- * Wait Queue. No-op implementation.
- */
-
-typedef struct cfs_waitlink {
- struct list_head sleeping;
- void *process;
-} cfs_waitlink_t;
-
-typedef struct cfs_waitq {
- struct list_head sleepers;
-} cfs_waitq_t;
-
-void cfs_waitq_init(struct cfs_waitq *waitq);
-void cfs_waitlink_init(struct cfs_waitlink *link);
-void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
-void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
- struct cfs_waitlink *link);
-void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq);
-void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
-int cfs_waitq_active(struct cfs_waitq *waitq);
-void cfs_waitq_signal(struct cfs_waitq *waitq);
-void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
-void cfs_waitq_broadcast(struct cfs_waitq *waitq);
-void cfs_waitq_wait(struct cfs_waitlink *link, int state);
-int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout);
-#define cfs_schedule_timeout(s, t) \
- do { \
- cfs_waitlink_t l; \
- cfs_waitq_timedwait(&l, s, t); \
- } while (0)
-
-#define CFS_TASK_INTERRUPTIBLE (0)
-#define CFS_TASK_UNINT (0)
-
-/* 2.4 defines */
-
-/* XXX
- * for this moment, liblusre will not rely OST for non-page-aligned write
- */
-#define LIBLUSTRE_HANDLE_UNALIGNED_PAGE
-
-struct page {
- void *addr;
- unsigned long index;
- struct list_head list;
- unsigned long private;
-
- /* internally used by liblustre file i/o */
- int _offset;
- int _count;
-#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE
- int _managed;
-#endif
- struct list_head _node;
-};
-
-typedef struct page cfs_page_t;
-
-#ifndef PAGE_SIZE
-
-/* 4K */
-#define CFS_PAGE_SHIFT 12
-#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT)
-#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1))
-
-#else
-
-#define CFS_PAGE_SIZE PAGE_SIZE
-#define CFS_PAGE_SHIFT PAGE_SHIFT
-#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1))
-
-#endif
-
-cfs_page_t *cfs_alloc_page(unsigned int flags);
-void cfs_free_page(cfs_page_t *pg);
-void *cfs_page_address(cfs_page_t *pg);
-void *cfs_kmap(cfs_page_t *pg);
-void cfs_kunmap(cfs_page_t *pg);
-
-#define cfs_get_page(p) __I_should_not_be_called__(at_all)
-#define cfs_page_count(p) __I_should_not_be_called__(at_all)
-#define cfs_page_index(p) ((p)->index)
-
-/*
- * Memory allocator
- * Inline function, so utils can use them without linking of libcfs
- */
-#define __ALLOC_ZERO (1 << 2)
-static inline void *cfs_alloc(size_t nr_bytes, u_int32_t flags)
-{
- void *result;
-
- result = malloc(nr_bytes);
- if (result != NULL && (flags & __ALLOC_ZERO))
- memset(result, 0, nr_bytes);
- return result;
-}
-
-#define cfs_free(addr) free(addr)
-#define cfs_alloc_large(nr_bytes) cfs_alloc(nr_bytes, 0)
-#define cfs_free_large(addr) cfs_free(addr)
-
-#define CFS_ALLOC_ATOMIC_TRY (0)
-/*
- * SLAB allocator
- */
-typedef struct {
- int size;
-} cfs_mem_cache_t;
-
-#define SLAB_HWCACHE_ALIGN 0
-#define SLAB_KERNEL 0
-#define SLAB_NOFS 0
-
-cfs_mem_cache_t *
-cfs_mem_cache_create(const char *, size_t, size_t, unsigned long);
-int cfs_mem_cache_destroy(cfs_mem_cache_t *c);
-void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp);
-void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr);
-
-typedef int (cfs_read_proc_t)(char *page, char **start, off_t off,
- int count, int *eof, void *data);
-
-struct file; /* forward ref */
-typedef int (cfs_write_proc_t)(struct file *file, const char *buffer,
- unsigned long count, void *data);
-
-/*
- * Signal
- */
-typedef sigset_t cfs_sigset_t;
-
-/*
- * Timer
- */
-#include <sys/time.h>
-
-typedef struct {
- struct list_head tl_list;
- void (*function)(unsigned long unused);
- unsigned long data;
- long expires;
-} cfs_timer_t;
-
-#define cfs_init_timer(t) do {} while(0)
-#define cfs_jiffies \
-({ \
- unsigned long _ret = 0; \
- struct timeval tv; \
- if (gettimeofday(&tv, NULL) == 0) \
- _ret = tv.tv_sec; \
- _ret; \
-})
-
-static inline int cfs_timer_init(cfs_timer_t *l, void (* func)(unsigned long), void *arg)
-{
- CFS_INIT_LIST_HEAD(&l->tl_list);
- l->function = func;
- l->data = (unsigned long)arg;
- return 0;
-}
-
-static inline int cfs_timer_is_armed(cfs_timer_t *l)
-{
- if (cfs_time_before(cfs_jiffies, l->expires))
- return 1;
- else
- return 0;
-}
-
-static inline void cfs_timer_arm(cfs_timer_t *l, int thetime)
-{
- l->expires = thetime;
-}
-
-static inline void cfs_timer_disarm(cfs_timer_t *l)
-{
-}
-
-static inline long cfs_timer_deadline(cfs_timer_t *l)
-{
- return l->expires;
-}
-
-#if 0
-#define cfs_init_timer(t) do {} while(0)
-void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg);
-void cfs_timer_done(struct cfs_timer *t);
-void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline);
-void cfs_timer_disarm(struct cfs_timer *t);
-int cfs_timer_is_armed(struct cfs_timer *t);
-
-cfs_time_t cfs_timer_deadline(struct cfs_timer *t);
-#endif
-
-#define in_interrupt() (0)
-
-static inline void cfs_pause(cfs_duration_t d)
-{
- struct timespec s;
-
- cfs_duration_nsec(d, &s);
- nanosleep(&s, NULL);
-}
-
-typedef void cfs_psdev_t;
-
-static inline int cfs_psdev_register(cfs_psdev_t *foo)
-{
- return 0;
-}
-
-static inline int cfs_psdev_deregister(cfs_psdev_t *foo)
-{
- return 0;
-}
-
-#define cfs_lock_kernel() do {} while (0)
-#define cfs_sigfillset(l) do {} while (0)
-#define cfs_recalc_sigpending(l) do {} while (0)
-#define cfs_kernel_thread(l,m,n) LBUG()
-
-#ifdef HAVE_LIBPTHREAD
-typedef int (*cfs_thread_t)(void *);
-int cfs_create_thread(cfs_thread_t func, void *arg);
-#else
-#define cfs_create_thread(l,m) LBUG()
-#endif
-
-int cfs_parse_int_tunable(int *value, char *name);
-uid_t cfs_curproc_uid(void);
-
-#define LIBCFS_REALLOC(ptr, size) realloc(ptr, size)
-
-#define cfs_online_cpus() sysconf(_SC_NPROCESSORS_ONLN)
-
-// static inline void local_irq_save(unsigned long flag) {return;}
-// static inline void local_irq_restore(unsigned long flag) {return;}
-
-enum {
- CFS_STACK_TRACE_DEPTH = 16
-};
-
-struct cfs_stack_trace {
- void *frame[CFS_STACK_TRACE_DEPTH];
-};
-
-/*
- * arithmetic
- */
-#define do_div(a,b) \
- ({ \
- unsigned long remainder;\
- remainder = (a) % (b); \
- (a) = (a) / (b); \
- (remainder); \
- })
-
-/* !__KERNEL__ */
-#endif
-
-/* __LIBCFS_USER_PRIM_H__ */
-#endif
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __LIBCFS_USER_TCPIP_H__
-#define __LIBCFS_USER_TCPIP_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifndef __KERNEL__
-
-#include <sys/uio.h>
-
-/*
- * Functions to get network interfaces info
- */
-
-int libcfs_sock_ioctl(int cmd, unsigned long arg);
-int libcfs_ipif_query (char *name, int *up, __u32 *ip);
-void libcfs_ipif_free_enumeration (char **names, int n);
-int libcfs_ipif_enumerate (char ***namesp);
-
-/*
- * Network function used by user-land lnet acceptor
- */
-
-int libcfs_sock_listen (int *sockp, __u32 local_ip, int local_port, int backlog);
-int libcfs_sock_accept (int *newsockp, int sock, __u32 *peer_ip, int *peer_port);
-int libcfs_sock_read (int sock, void *buffer, int nob, int timeout);
-void libcfs_sock_abort_accept(__u16 port);
-
-/*
- * Network functions of common use
- */
-
-int libcfs_getpeername(int sock_fd, __u32 *ipaddr_p, __u16 *port_p);
-int libcfs_socketpair(int *fdp);
-int libcfs_fcntl_nonblock(int fd);
-int libcfs_sock_set_nagle(int fd, int nagle);
-int libcfs_sock_set_bufsiz(int fd, int bufsiz);
-int libcfs_sock_create(int *fdp);
-int libcfs_sock_bind_to_port(int fd, __u16 port);
-int libcfs_sock_connect(int fd, __u32 ip, __u16 port);
-int libcfs_sock_writev(int fd, const struct iovec *vector, int count);
-int libcfs_sock_readv(int fd, const struct iovec *vector, int count);
-
-/*
- * Macros for easy printing IP-adresses
- */
-
-#define NIPQUAD(addr) \
- ((unsigned char *)&addr)[0], \
- ((unsigned char *)&addr)[1], \
- ((unsigned char *)&addr)[2], \
- ((unsigned char *)&addr)[3]
-
-#if defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)
-#define HIPQUAD(addr) \
- ((unsigned char *)&addr)[3], \
- ((unsigned char *)&addr)[2], \
- ((unsigned char *)&addr)[1], \
- ((unsigned char *)&addr)[0]
-#elif defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)
-#define HIPQUAD NIPQUAD
-#else
-#error "Undefined byteorder??"
-#endif /* __LITTLE_ENDIAN */
-
-#endif /* !__KERNEL__ */
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- * Implementation of portable time API for user-level.
- *
- */
-
-#ifndef __LIBCFS_USER_TIME_H__
-#define __LIBCFS_USER_TIME_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-/* Portable time API */
-
-/*
- * Platform provides three opaque data-types:
- *
- * cfs_time_t represents point in time. This is internal kernel
- * time rather than "wall clock". This time bears no
- * relation to gettimeofday().
- *
- * cfs_duration_t represents time interval with resolution of internal
- * platform clock
- *
- * cfs_fs_time_t represents instance in world-visible time. This is
- * used in file-system time-stamps
- *
- * cfs_time_t cfs_time_current(void);
- * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t);
- * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t);
- * int cfs_time_before (cfs_time_t, cfs_time_t);
- * int cfs_time_beforeq(cfs_time_t, cfs_time_t);
- *
- * cfs_duration_t cfs_duration_build(int64_t);
- *
- * time_t cfs_duration_sec (cfs_duration_t);
- * void cfs_duration_usec(cfs_duration_t, struct timeval *);
- * void cfs_duration_nsec(cfs_duration_t, struct timespec *);
- *
- * void cfs_fs_time_current(cfs_fs_time_t *);
- * time_t cfs_fs_time_sec (cfs_fs_time_t *);
- * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *);
- * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *);
- * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
- * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
- *
- * CFS_TIME_FORMAT
- * CFS_DURATION_FORMAT
- *
- */
-
-#ifndef __KERNEL__
-
-#define ONE_BILLION ((u_int64_t)1000000000)
-#define ONE_MILLION 1000000
-
-/*
- * Liblustre. time(2) based implementation.
- */
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <time.h>
-
-typedef time_t cfs_fs_time_t;
-typedef time_t cfs_time_t;
-typedef long cfs_duration_t;
-
-static inline cfs_time_t cfs_time_current(void)
-{
- return time(NULL);
-}
-
-static inline cfs_duration_t cfs_time_seconds(int seconds)
-{
- return seconds;
-}
-
-static inline time_t cfs_time_current_sec(void)
-{
- return cfs_time_seconds(cfs_time_current());
-}
-
-static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
-{
- return t1 < t2;
-}
-
-static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
-{
- return t1 <= t2;
-}
-
-static inline cfs_duration_t cfs_duration_build(int64_t nano)
-{
- return (cfs_duration_t) (nano / ONE_BILLION);
-}
-
-static inline time_t cfs_duration_sec(cfs_duration_t d)
-{
- return d;
-}
-
-static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
-{
- s->tv_sec = d;
- s->tv_usec = 0;
-}
-
-static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
-{
- s->tv_sec = d;
- s->tv_nsec = 0;
-}
-
-static inline void cfs_fs_time_current(cfs_fs_time_t *t)
-{
- time(t);
-}
-
-static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
-{
- return *t;
-}
-
-static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
-{
- v->tv_sec = *t;
- v->tv_usec = 0;
-}
-
-static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
-{
- s->tv_sec = *t;
- s->tv_nsec = 0;
-}
-
-static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
-{
- return *t1 < *t2;
-}
-
-static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
-{
- return *t1 <= *t2;
-}
-
-#define CFS_TICK (1)
-
-static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
-{
- return t + d;
-}
-
-static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
-{
- return t1 - t2;
-}
-
-#define cfs_time_current_64 cfs_time_current
-#define cfs_time_add_64 cfs_time_add
-#define cfs_time_shift_64 cfs_time_shift
-#define cfs_time_before_64 cfs_time_before
-#define cfs_time_beforeq_64 cfs_time_beforeq
-
-#ifndef CFS_TIME_T
-#define CFS_TIME_T "%u"
-#endif
-
-#define CFS_DURATION_T "%ld"
-
-/* !__KERNEL__ */
-#endif
-
-/* __LIBCFS_USER_TIME_H__ */
-#endif
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef __LIBCFS_WINNT_KP30_H__
-#define __LIBCFS_WINNT_KP30_H__
-
-#ifndef __LIBCFS_KP30_H__
-#error Do not #include this file directly. #include <libcfs/kp30.h> instead
-#endif
-
-#include <libcfs/winnt/portals_compat25.h>
-#include <lnet/types.h>
-
-#ifdef __KERNEL__
-
-/* Module parameter support */
-#define CFS_MODULE_PARM(name, t, type, perm, desc)
-
-#define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */
-
-
-static inline void our_cond_resched()
-{
- schedule_timeout(1i64);
-}
-
-#ifdef CONFIG_SMP
-#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */
-#else
-#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
-#endif
-
-#error Need a winnt version of panic()
-#define LIBCFS_PANIC(msg) KeBugCheckEx(msg, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL)
-#error libcfs_register_panic_notifier() missing
-#error libcfs_unregister_panic_notifier() missing
-
-#define cfs_work_struct_t WORK_QUEUE_ITEM
-#define cfs_prepare_work(tq, routine, contex)
-#define cfs_schedule_work(tq)
-#define cfs_get_work_data(type,field,data) (data)
-
-/* ------------------------------------------------------------------- */
-
-#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x)
-#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x)
-
-#define PORTAL_SYMBOL_GET(x) (cfs_symbol_get(#x))
-#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x)
-
-#define PORTAL_MODULE_USE do{}while(0)
-#define PORTAL_MODULE_UNUSE do{}while(0)
-
-#define printk DbgPrint
-#define ptintf DbgPrint
-
-#else /* !__KERNEL__ */
-
-# include <stdio.h>
-# include <stdlib.h>
-#ifdef __CYGWIN__
-# include <cygwin-ioctl.h>
-#endif
-# include <time.h>
-
-#endif /* End of !__KERNEL__ */
-
-/******************************************************************************/
-/* Light-weight trace
- * Support for temporary event tracing with minimal Heisenberg effect. */
-#define LWT_SUPPORT 0
-
-/* kernel hasn't defined this? */
-typedef struct {
- __s64 lwte_when;
- char *lwte_where;
- void *lwte_task;
- long_ptr lwte_p1;
- long_ptr lwte_p2;
- long_ptr lwte_p3;
- long_ptr lwte_p4;
-# if BITS_PER_LONG > 32
- long_ptr lwte_pad;
-# endif
-} lwt_event_t;
-
-
-# define LWT_EVENT(p1,p2,p3,p4)
-
-
-/* ------------------------------------------------------------------ */
-
-#define IOCTL_LIBCFS_TYPE long_ptr
-
-#ifdef __CYGWIN__
-# ifndef BITS_PER_LONG
-# if (~0UL) == 0xffffffffUL
-# define BITS_PER_LONG 32
-# else
-# define BITS_PER_LONG 64
-# endif
-# endif
-#endif
-
-#if BITS_PER_LONG > 32
-# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
-# define LL_POISON ((long_ptr)0x5a5a5a5a5a5a5a5a)
-# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a5a5a5a5a)
-#else
-# define LI_POISON ((int)0x5a5a5a5a)
-# define LL_POISON ((long_ptr)0x5a5a5a5a)
-# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a)
-#endif
-
-#if defined(__x86_64__)
-# define LPU64 "%I64u"
-# define LPD64 "%I64d"
-# define LPX64 "%I64x"
-# define LPSZ "%lu"
-# define LPSSZ "%ld"
-#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
-# define LPU64 "%I64u"
-# define LPD64 "%I64d"
-# define LPX64 "%I64x"
-# define LPSZ "%u"
-# define LPSSZ "%d"
-#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
-# define LPU64 "%I64u"
-# define LPD64 "%I64d"
-# define LPX64 "%I64x"
-# define LPSZ "%u"
-# define LPSSZ "%d"
-#endif
-#ifndef LPU64
-# error "No word size defined"
-#endif
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef __LIBCFS_WINNT_LIBCFS_H__
-#define __LIBCFS_WINNT_LIBCFS_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-/* workgroud for VC compiler */
-#ifndef __FUNCTION__
-#define __FUNCTION__ "generic"
-#endif
-
-#include <libcfs/winnt/winnt-types.h>
-#include <libcfs/portals_utils.h>
-#include <libcfs/winnt/winnt-time.h>
-#include <libcfs/winnt/winnt-lock.h>
-#include <libcfs/winnt/winnt-mem.h>
-#include <libcfs/winnt/winnt-prim.h>
-#include <libcfs/winnt/winnt-fs.h>
-#include <libcfs/winnt/winnt-tcpip.h>
-
-struct ptldebug_header {
- __u32 ph_len;
- __u32 ph_flags;
- __u32 ph_subsys;
- __u32 ph_mask;
- __u32 ph_cpu_id;
- __u32 ph_sec;
- __u64 ph_usec;
- __u32 ph_stack;
- __u32 ph_pid;
- __u32 ph_extern_pid;
- __u32 ph_line_num;
-} __attribute__((packed));
-
-#ifdef __KERNEL__
-
-enum {
- /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */
- CFS_STACK_TRACE_DEPTH = 16
-};
-
-struct cfs_stack_trace {
- void *frame[CFS_STACK_TRACE_DEPTH];
-};
-
-static inline __u32 query_stack_size()
-{
- ULONG LowLimit, HighLimit;
-
- IoGetStackLimits(&LowLimit, &HighLimit);
- ASSERT(HighLimit > LowLimit);
-
- return (__u32) (HighLimit - LowLimit);
-}
-#else
-static inline __u32 query_stack_size()
-{
- return 4096;
-}
-#endif
-
-
-#ifndef THREAD_SIZE
-# define THREAD_SIZE query_stack_size()
-#endif
-
-#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
-
-#ifdef __KERNEL__
-# ifdef __ia64__
-# define CDEBUG_STACK() (THREAD_SIZE - \
- ((ulong_ptr)__builtin_dwarf_cfa() & \
- (THREAD_SIZE - 1)))
-# else
-# define CDEBUG_STACK (IoGetRemainingStackSize())
-# error "This doesn't seem right; CDEBUG_STACK should grow with the stack"
-# endif /* __ia64__ */
-
-#define CHECK_STACK() \
-do { \
- unsigned long _stack = CDEBUG_STACK(); \
- \
- if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) { \
- libcfs_stack = _stack; \
- libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING, \
- __FILE__, NULL, __LINE__, \
- "maximum lustre stack %lu\n", _stack); \
- } \
-} while (0)
-#else /* !__KERNEL__ */
-#define CHECK_STACK() do { } while(0)
-#define CDEBUG_STACK() (0L)
-#endif /* __KERNEL__ */
-
-/* initial pid */
-#define LUSTRE_LNET_PID 12345
-
-#define ENTRY_NESTING_SUPPORT (0)
-#define ENTRY_NESTING do {;} while (0)
-#define EXIT_NESTING do {;} while (0)
-#define __current_nesting_level() (0)
-
-#endif /* _WINNT_LIBCFS_H */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_WINNT_LLTRACE_H__
-#define __LIBCFS_WINNT_LLTRACE_H__
-
-#ifndef __LIBCFS_LLTRACE_H__
-#error Do not #include this file directly. #include <libcfs/lltrace.h> instead
-#endif
-
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef __LIBCFS_WINNT_PORTALS_COMPAT_H__
-#define __LIBCFS_WINNT_PORTALS_COMPAT_H__
-
-
-
-#endif /* _PORTALS_COMPAT_H */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_WINNT_PORTALS_UTILS_H__
-#define __LIBCFS_WINNT_PORTALS_UTILS_H__
-
-#ifndef __LIBCFS_PORTALS_UTILS_H__
-#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead
-#endif
-
-#ifndef cfs_is_flag_set
-#define cfs_is_flag_set(x,f) (((x)&(f))==(f))
-#endif
-
-#ifndef cfs_set_flag
-#define cfs_set_flag(x,f) ((x) |= (f))
-#endif
-
-#ifndef cfs_clear_flag
-#define cfs_clear_flag(x,f) ((x) &= ~(f))
-#endif
-
-
-static inline __u32 __do_div(__u32 * n, __u32 b)
-{
- __u32 mod;
-
- mod = *n % b;
- *n = *n / b;
- return mod;
-}
-
-#define do_div(n,base) __do_div((__u32 *)&(n), (__u32) (base))
-
-#ifdef __KERNEL__
-
-#include <stdlib.h>
-#include <libcfs/winnt/winnt-types.h>
-
-char * strsep(char **s, const char *ct);
-static inline size_t strnlen(const char * s, size_t count) {
- size_t len = 0;
- while(len < count && s[len++]);
- return len;
-}
-char * ul2dstr(ulong_ptr address, char *buf, int len);
-
-#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3)
-#define simple_strtoll(a1, a2, a3) (__s64)strtoull(a1, a2, a3)
-#define simple_strtoull(a1, a2, a3) strtoull(a1, a2, a3)
-
-unsigned long simple_strtoul(const char *cp,char **endp, unsigned int base);
-
-static inline int test_bit(int nr, void * addr)
-{
- return ((1UL << (nr & 31)) & (((volatile ULONG *) addr)[nr >> 5])) != 0;
-}
-
-static inline void clear_bit(int nr, void * addr)
-{
- (((volatile ULONG *) addr)[nr >> 5]) &= (~(1UL << (nr & 31)));
-}
-
-
-static inline void set_bit(int nr, void * addr)
-{
- (((volatile ULONG *) addr)[nr >> 5]) |= (1UL << (nr & 31));
-}
-
-static inline void read_random(char *buf, int len)
-{
- ULONG Seed = (ULONG) buf;
- Seed = RtlRandom(&Seed);
- while (len >0) {
- if (len > sizeof(ULONG)) {
- memcpy(buf, &Seed, sizeof(ULONG));
- len -= sizeof(ULONG);
- buf += sizeof(ULONG);
- } else {
- memcpy(buf, &Seed, len);
- len = 0;
- break;
- }
- }
-}
-#define get_random_bytes(buf, len) read_random(buf, len)
-
-/* do NOT use function or expression as parameters ... */
-
-#ifndef min_t
-#define min_t(type,x,y) (type)(x) < (type)(y) ? (x): (y)
-#endif
-
-#ifndef max_t
-#define max_t(type,x,y) (type)(x) < (type)(y) ? (y): (x)
-#endif
-
-
-#define NIPQUAD(addr) \
- ((unsigned char *)&addr)[0], \
- ((unsigned char *)&addr)[1], \
- ((unsigned char *)&addr)[2], \
- ((unsigned char *)&addr)[3]
-
-#define HIPQUAD(addr) \
- ((unsigned char *)&addr)[3], \
- ((unsigned char *)&addr)[2], \
- ((unsigned char *)&addr)[1], \
- ((unsigned char *)&addr)[0]
-
-static int copy_from_user(void *to, void *from, int c)
-{
- memcpy(to, from, c);
- return 0;
-}
-
-static int copy_to_user(void *to, void *from, int c)
-{
- memcpy(to, from, c);
- return 0;
-}
-
-
-#define put_user(x, ptr) \
-( \
- *(ptr) = x, \
- 0 \
-)
-
-
-#define get_user(x,ptr) \
-( \
- x = *(ptr), \
- 0 \
-)
-
-#define num_physpages (64 * 1024)
-
-#define snprintf _snprintf
-#define vsnprintf _vsnprintf
-
-
-#endif /* !__KERNEL__ */
-
-int cfs_error_code(NTSTATUS);
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * File operations & routines.
- *
- */
-
-#ifndef __LIBCFS_WINNT_CFS_FS_H__
-#define __LIBCFS_WINNT_CFS_FS_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-
-#define MINORBITS 8
-#define MINORMASK ((1U << MINORBITS) - 1)
-
-#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
-#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
-#define NODEV 0
-#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
-
-
-#ifdef __KERNEL__
-
-struct file_operations
-{
- loff_t (*lseek)(struct file * file, loff_t offset, int origin);
- ssize_t (*read) (struct file * file, char * buf, size_t nbytes, loff_t *ppos);
- ssize_t (*write)(struct file * file, const char * buffer,
- size_t count, loff_t *ppos);
- int (*ioctl) (struct file *, unsigned int, ulong_ptr);
- int (*open) (struct file *);
- int (*release) (struct file *);
-};
-
-struct file {
-
- cfs_handle_t f_handle;
- unsigned int f_flags;
- mode_t f_mode;
- ulong_ptr f_count;
-
- //struct list_head f_list;
- //struct dentry * f_dentry;
-
- cfs_proc_entry_t * proc_dentry;
- cfs_file_operations_t * f_op;
-
- size_t f_size;
- loff_t f_pos;
- unsigned int f_uid, f_gid;
- int f_error;
-
- ulong_ptr f_version;
-
- void * private_data;
-
- char f_name[1];
-
-};
-
-#define cfs_filp_size(f) ((f)->f_size)
-#define cfs_filp_poff(f) (&(f)->f_pos)
-
-cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err);
-int cfs_filp_close(cfs_file_t *fp);
-int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos);
-int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos);
-int cfs_filp_fsync(cfs_file_t *fp);
-int cfs_get_file(cfs_file_t *fp);
-int cfs_put_file(cfs_file_t *fp);
-int cfs_file_count(cfs_file_t *fp);
-
-
-
-/*
- * CFS_FLOCK routines
- */
-
-typedef struct file_lock{
- int fl_type;
- pid_t fl_pid;
- size_t fl_len;
- off_t fl_start;
- off_t fl_end;
-} cfs_flock_t;
-
-#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
-#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t)
-
-#define cfs_flock_type(fl) ((fl)->fl_type)
-#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0)
-#define cfs_flock_pid(fl) ((fl)->fl_pid)
-#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0)
-#define cfs_flock_start(fl) ((fl)->fl_start)
-#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0)
-#define cfs_flock_end(fl) ((fl)->fl_end)
-#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0)
-
-#define ATTR_MODE 0x0001
-#define ATTR_UID 0x0002
-#define ATTR_GID 0x0004
-#define ATTR_SIZE 0x0008
-#define ATTR_ATIME 0x0010
-#define ATTR_MTIME 0x0020
-#define ATTR_CTIME 0x0040
-#define ATTR_ATIME_SET 0x0080
-#define ATTR_MTIME_SET 0x0100
-#define ATTR_FORCE 0x0200 /* Not a change, but a change it */
-#define ATTR_ATTR_FLAG 0x0400
-#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */
-#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */
-//#define ATTR_CTIME_SET 0x2000
-#define ATTR_BLOCKS 0x4000
-#define ATTR_KILL_SUID 0
-#define ATTR_KILL_SGID 0
-
-#define in_group_p(x) (0)
-
-/*
- * proc fs routines
- */
-
-int proc_init_fs();
-void proc_destroy_fs();
-
-
-/*
- * misc
- */
-
-static inline void *ERR_PTR(long_ptr error)
-{
- return (void *) error;
-}
-
-static inline long_ptr PTR_ERR(const void *ptr)
-{
- return (long_ptr) ptr;
-}
-
-static inline long_ptr IS_ERR(const void *ptr)
-{
- return (ulong_ptr)ptr > (ulong_ptr)-1000L;
-}
-
-#else /* !__KERNEL__ */
-
-#define CREATE_NEW 1
-#define CREATE_ALWAYS 2
-#define OPEN_EXISTING 3
-#define OPEN_ALWAYS 4
-#define TRUNCATE_EXISTING 5
-
-#define SECTION_QUERY 0x0001
-#define SECTION_MAP_WRITE 0x0002
-#define SECTION_MAP_READ 0x0004
-#define SECTION_MAP_EXECUTE 0x0008
-#define SECTION_EXTEND_SIZE 0x0010
-
-#define FILE_MAP_COPY SECTION_QUERY
-#define FILE_MAP_WRITE SECTION_MAP_WRITE
-#define FILE_MAP_READ SECTION_MAP_READ
-#define FILE_MAP_ALL_ACCESS SECTION_ALL_ACCESS
-
-
-NTSYSAPI
-HANDLE
-NTAPI
-CreateFileA(
- IN LPCSTR lpFileName,
- IN DWORD dwDesiredAccess,
- IN DWORD dwShareMode,
- IN PVOID lpSecurityAttributes,
- IN DWORD dwCreationDisposition,
- IN DWORD dwFlagsAndAttributes,
- IN HANDLE hTemplateFile
- );
-
-#define CreateFile CreateFileA
-
-NTSYSAPI
-BOOL
-NTAPI
-CloseHandle(
- IN OUT HANDLE hObject
- );
-
-NTSYSAPI
-HANDLE
-NTAPI
-CreateFileMappingA(
- IN HANDLE hFile,
- IN PVOID lpFileMappingAttributes,
- IN DWORD flProtect,
- IN DWORD dwMaximumSizeHigh,
- IN DWORD dwMaximumSizeLow,
- IN LPCSTR lpName
- );
-#define CreateFileMapping CreateFileMappingA
-
-NTSYSAPI
-DWORD
-NTAPI
-GetFileSize(
- IN HANDLE hFile,
- OUT DWORD * lpFileSizeHigh
- );
-
-NTSYSAPI
-PVOID
-NTAPI
-MapViewOfFile(
- IN HANDLE hFileMappingObject,
- IN DWORD dwDesiredAccess,
- IN DWORD dwFileOffsetHigh,
- IN DWORD dwFileOffsetLow,
- IN SIZE_T dwNumberOfBytesToMap
- );
-
-NTSYSAPI
-BOOL
-NTAPI
-UnmapViewOfFile(
- IN PVOID lpBaseAddress
- );
-
-#endif /* __KERNEL__ */
-
-typedef struct {
- void *d;
-} cfs_dentry_t;
-
-
-#endif /* __LIBCFS_WINNT_CFS_FS_H__*/
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_WINNT_CFS_LOCK_H__
-#define __LIBCFS_WINNT_CFS_LOCK_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-
-
-/*
- * nt specific part ...
- */
-
-
-/* atomic */
-
-typedef struct { volatile int counter; } atomic_t;
-
-#define ATOMIC_INIT(i) { i }
-
-#define atomic_read(v) ((v)->counter)
-#define atomic_set(v,i) (((v)->counter) = (i))
-
-void FASTCALL atomic_add(int i, atomic_t *v);
-void FASTCALL atomic_sub(int i, atomic_t *v);
-
-int FASTCALL atomic_sub_and_test(int i, atomic_t *v);
-
-void FASTCALL atomic_inc(atomic_t *v);
-void FASTCALL atomic_dec(atomic_t *v);
-
-int FASTCALL atomic_dec_and_test(atomic_t *v);
-int FASTCALL atomic_inc_and_test(atomic_t *v);
-
-
-/* event */
-
-typedef KEVENT event_t;
-
-/*
- * cfs_init_event
- * To initialize the event object
- *
- * Arguments:
- * event: pointer to the event object
- * type: Non Zero: SynchronizationEvent
- * Zero: NotificationEvent
- * status: the initial stats of the event
- * Non Zero: signaled
- * Zero: un-signaled
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-static inline void
- cfs_init_event(event_t *event, int type, int status)
-{
- KeInitializeEvent(
- event,
- (type) ? SynchronizationEvent: NotificationEvent,
- (status) ? TRUE : FALSE
- );
-}
-
-/*
- * cfs_wait_event
- * To wait on an event to syncrhonize the process
- *
- * Arguments:
- * event: pointer to the event object
- * timeout: the timeout for waitting or 0 means infinite time.
- *
- * Return Value:
- * Zero: waiting timeouts
- * Non Zero: event signaled ...
- *
- * Notes:
- * N/A
- */
-
-static inline int64_t
-cfs_wait_event(event_t * event, int64_t timeout)
-{
- NTSTATUS Status;
- LARGE_INTEGER TimeOut;
-
- TimeOut.QuadPart = -1 * (10000000/HZ) * timeout;
-
- Status = KeWaitForSingleObject(
- event,
- Executive,
- KernelMode,
- FALSE,
- (timeout != 0) ? (&TimeOut) : (NULL)
- );
-
- if (Status == STATUS_TIMEOUT) {
- return 0;
- }
-
- return TRUE; // signaled case
-}
-
-/*
- * cfs_wake_event
- * To signal the event object
- *
- * Arguments:
- * event: pointer to the event object
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline int
-cfs_wake_event(event_t * event)
-{
- return (KeSetEvent(event, 0, FALSE) != 0);
-}
-
-/*
- * cfs_clear_event
- * To clear/reset the status of the event object
- *
- * Arguments:
- * event: pointer to the event object
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void
-cfs_clear_event(event_t * event)
-{
- KeResetEvent(event);
-}
-
-
-/*
- * IMPORTANT !!!!!!!!
- *
- * All locks' declaration are not guaranteed to be initialized,
- * Althought some of they are initialized in Linux. All locks
- * declared by CFS_DECL_* should be initialized explicitly.
- */
-
-
-/*
- * spin lock defintions / routines
- */
-
-/*
- * Warning:
- *
- * for spinlock operations, try to grab nesting acquisition of
- * spinlock will cause dead-lock in MP system and current irql
- * overwritten for UP system. (UP system could allow nesting spin
- * acqisition, because it's not spin at all just raising the irql.)
- *
- */
-
-typedef struct spin_lock {
-
- KSPIN_LOCK lock;
- KIRQL irql;
-
-} spinlock_t;
-
-
-#define CFS_DECL_SPIN(name) spinlock_t name;
-#define CFS_DECL_SPIN_EXTERN(name) extern spinlock_t name;
-
-
-static inline void spin_lock_init(spinlock_t *lock)
-{
- KeInitializeSpinLock(&(lock->lock));
-}
-
-
-static inline void spin_lock(spinlock_t *lock)
-{
- KeAcquireSpinLock(&(lock->lock), &(lock->irql));
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
- KIRQL irql = lock->irql;
- KeReleaseSpinLock(&(lock->lock), irql);
-}
-
-
-#define spin_lock_irqsave(lock, flags) do {(flags) = 0; spin_lock(lock);} while(0)
-#define spin_unlock_irqrestore(lock, flags) do {spin_unlock(lock);} while(0)
-
-
-/* There's no corresponding routine in windows kernel.
- We must realize a light one of our own. But there's
- no way to identify the system is MP build or UP build
- on the runtime. We just uses a workaround for it. */
-
-extern int MPSystem;
-
-static int spin_trylock(spinlock_t *lock)
-{
- KIRQL Irql;
- int rc = 0;
-
- ASSERT(lock != NULL);
-
- KeRaiseIrql(DISPATCH_LEVEL, &Irql);
-
- if (MPSystem) {
- if (0 == (ulong_ptr)lock->lock) {
-#if _X86_
- __asm {
- mov edx, dword ptr [ebp + 8]
- lock bts dword ptr[edx], 0
- jb lock_failed
- mov rc, TRUE
- lock_failed:
- }
-#else
- KdBreakPoint();
-#endif
-
- }
- } else {
- rc = TRUE;
- }
-
- if (rc) {
- lock->irql = Irql;
- } else {
- KeLowerIrql(Irql);
- }
-
- return rc;
-}
-
-/* synchronization between cpus: it will disable all DPCs
- kernel task scheduler on the CPU */
-#define spin_lock_bh(x) spin_lock(x)
-#define spin_unlock_bh(x) spin_unlock(x)
-#define spin_lock_bh_init(x) spin_lock_init(x)
-
-/*
- * rw_semaphore (using ERESOURCE)
- */
-
-
-typedef struct rw_semaphore {
- ERESOURCE rwsem;
-} rw_semaphore_t;
-
-
-#define CFS_DECL_RWSEM(name) rw_semaphore_t name
-#define CFS_DECL_RWSEM_EXTERN(name) extern rw_semaphore_t name
-
-
-/*
- * init_rwsem
- * To initialize the the rw_semaphore_t structure
- *
- * Arguments:
- * rwsem: pointer to the rw_semaphore_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void init_rwsem(rw_semaphore_t *s)
-{
- ExInitializeResourceLite(&s->rwsem);
-}
-
-
-/*
- * fini_rwsem
- * To finilize/destroy the the rw_semaphore_t structure
- *
- * Arguments:
- * rwsem: pointer to the rw_semaphore_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * For winnt system, we need this routine to delete the ERESOURCE.
- * Just define it NULL for other systems.
- */
-
-static inline void fini_rwsem(rw_semaphore_t *s)
-{
- ExDeleteResourceLite(&s->rwsem);
-}
-
-/*
- * down_read
- * To acquire read-lock of the rw_semahore
- *
- * Arguments:
- * rwsem: pointer to the rw_semaphore_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void down_read(struct rw_semaphore *s)
-{
- ExAcquireResourceSharedLite(&s->rwsem, TRUE);
-}
-
-
-/*
- * down_read_trylock
- * To acquire read-lock of the rw_semahore without blocking
- *
- * Arguments:
- * rwsem: pointer to the rw_semaphore_t structure
- *
- * Return Value:
- * Zero: failed to acquire the read lock
- * Non-Zero: succeeded to acquire the read lock
- *
- * Notes:
- * This routine will return immediately without waiting.
- */
-
-static inline int down_read_trylock(struct rw_semaphore *s)
-{
- return ExAcquireResourceSharedLite(&s->rwsem, FALSE);
-}
-
-
-/*
- * down_write
- * To acquire write-lock of the rw_semahore
- *
- * Arguments:
- * rwsem: pointer to the rw_semaphore_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void down_write(struct rw_semaphore *s)
-{
- ExAcquireResourceExclusiveLite(&(s->rwsem), TRUE);
-}
-
-
-/*
- * down_write_trylock
- * To acquire write-lock of the rw_semahore without blocking
- *
- * Arguments:
- * rwsem: pointer to the rw_semaphore_t structure
- *
- * Return Value:
- * Zero: failed to acquire the write lock
- * Non-Zero: succeeded to acquire the read lock
- *
- * Notes:
- * This routine will return immediately without waiting.
- */
-
-static inline int down_write_trylock(struct rw_semaphore *s)
-{
- return ExAcquireResourceExclusiveLite(&(s->rwsem), FALSE);
-}
-
-
-/*
- * up_read
- * To release read-lock of the rw_semahore
- *
- * Arguments:
- * rwsem: pointer to the rw_semaphore_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void up_read(struct rw_semaphore *s)
-{
- ExReleaseResourceForThreadLite(
- &(s->rwsem),
- ExGetCurrentResourceThread());
-}
-
-
-/*
- * up_write
- * To release write-lock of the rw_semahore
- *
- * Arguments:
- * rwsem: pointer to the rw_semaphore_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void up_write(struct rw_semaphore *s)
-{
- ExReleaseResourceForThreadLite(
- &(s->rwsem),
- ExGetCurrentResourceThread());
-}
-
-/*
- * rwlock_t (using sempahore)
- *
- * - rwlock_init(x)
- * - read_lock(x)
- * - read_unlock(x)
- * - write_lock(x)
- * - write_unlock(x)
- */
-
-typedef struct {
- spinlock_t guard;
- int count;
-} rwlock_t;
-
-void rwlock_init(rwlock_t * rwlock);
-void rwlock_fini(rwlock_t * rwlock);
-
-void read_lock(rwlock_t * rwlock);
-void read_unlock(rwlock_t * rwlock);
-void write_lock(rwlock_t * rwlock);
-void write_unlock(rwlock_t * rwlock);
-
-#define write_lock_irqsave(l, f) do {f = 0; write_lock(l);} while(0)
-#define write_unlock_irqrestore(l, f) do {write_unlock(l);} while(0)
-#define read_lock_irqsave(l, f) do {f=0; read_lock(l);} while(0)
-#define read_unlock_irqrestore(l, f) do {read_unlock(l);} while(0)
-
-
-/*
- * Semaphore
- *
- * - sema_init(x, v)
- * - __down(x)
- * - __up(x)
- */
-
-typedef struct semaphore {
- KSEMAPHORE sem;
-} mutex_t;
-
-static inline void sema_init(struct semaphore *s, int val)
-{
- KeInitializeSemaphore(&s->sem, val, val);
-}
-
-static inline void __down(struct semaphore *s)
-{
- KeWaitForSingleObject( &(s->sem), Executive,
- KernelMode, FALSE, NULL );
-
-}
-
-static inline void __up(struct semaphore *s)
-{
- KeReleaseSemaphore(&s->sem, 0, 1, FALSE);
-}
-
-/*
- * mutex_t:
- *
- * - init_mutex(x)
- * - init_mutex_locked(x)
- * - mutex_up(x)
- * - mutex_down(x)
- */
-
-
-/*
- * init_mutex
- * To initialize a mutex_t structure
- *
- * Arguments:
- * mutex: pointer to the mutex_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void init_mutex(mutex_t *mutex)
-{
- sema_init(mutex, 1);
-}
-
-
-/*
- * mutex_down
- * To acquire the mutex lock
- *
- * Arguments:
- * mutex: pointer to the mutex_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void mutex_down(mutex_t *mutex)
-{
- __down(mutex);
-}
-
-
-/*
- * mutex_up
- * To release the mutex lock (acquired already)
- *
- * Arguments:
- * mutex: pointer to the mutex_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void mutex_up(mutex_t *mutex)
-{
- __up(mutex);
-}
-
-
-/*
- * init_mutex_locked
- * To initialize the mutex as acquired state
- *
- * Arguments:
- * mutex: pointer to the mutex_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline init_mutex_locked(mutex_t *mutex)
-{
- init_mutex(mutex);
- mutex_down(mutex);
-}
-
-/*
- * completion
- *
- * - init_complition(c)
- * - complete(c)
- * - wait_for_completion(c)
- */
-
-struct completion {
- event_t event;
-};
-
-
-/*
- * init_completion
- * To initialize the completion object
- *
- * Arguments:
- * c: pointer to the completion structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void init_completion(struct completion *c)
-{
- cfs_init_event(&(c->event), 1, FALSE);
-}
-
-
-/*
- * complete
- * To complete/signal the completion object
- *
- * Arguments:
- * c: pointer to the completion structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void complete(struct completion *c)
-{
- cfs_wake_event(&(c->event));
-}
-
-/*
- * wait_for_completion
- * To wait on the completion object. If the event is signaled,
- * this function will return to the call with the event un-singled.
- *
- * Arguments:
- * c: pointer to the completion structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-static inline void wait_for_completion(struct completion *c)
-{
- cfs_wait_event(&(c->event), 0);
-}
-
-/* __KERNEL__ */
-#else
-
-#include "../user-lock.h"
-
-/* __KERNEL__ */
-#endif
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines of memory manipulation routines .
- *
- */
-
-#ifndef __LIBCFS_WINNT_CFS_MEM_H__
-#define __LIBCFS_WINNT_CFS_MEM_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-#ifdef __KERNEL__
-
-#define CFS_PAGE_SIZE PAGE_SIZE
-#define CFS_PAGE_SHIFT PAGE_SHIFT
-#define CFS_PAGE_MASK (~(PAGE_SIZE - 1))
-
-typedef struct cfs_page {
- void * addr;
- atomic_t count;
-} cfs_page_t;
-
-
-cfs_page_t *cfs_alloc_page(int flags);
-void cfs_free_page(cfs_page_t *pg);
-
-static inline void *cfs_page_address(cfs_page_t *page)
-{
- return page->addr;
-}
-
-static inline void *cfs_kmap(cfs_page_t *page)
-{
- return page->addr;
-}
-
-static inline void cfs_kunmap(cfs_page_t *page)
-{
- return;
-}
-
-static inline void cfs_get_page(cfs_page_t *page)
-{
- atomic_inc(&page->count);
-}
-
-static inline void cfs_put_page(cfs_page_t *page)
-{
- atomic_dec(&page->count);
-}
-
-static inline int cfs_page_count(cfs_page_t *page)
-{
- return atomic_read(&page->count);
-}
-
-/*
- * Memory allocator
- */
-
-#define CFS_ALLOC_ATOMIC_TRY (0)
-
-extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
-extern void cfs_free(void *addr);
-
-extern void *cfs_alloc_large(size_t nr_bytes);
-extern void cfs_free_large(void *addr);
-
-/*
- * SLAB allocator
- */
-
-#define SLAB_HWCACHE_ALIGN 0
-
-/* The cache name is limited to 20 chars */
-
-typedef struct cfs_mem_cache {
-
- char name[20];
- ulong_ptr flags;
- NPAGED_LOOKASIDE_LIST npll;
-
-} cfs_mem_cache_t;
-
-
-extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, ulong_ptr);
-extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
-extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
-extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
-
-
-/*
- * Page allocator slabs
- */
-
-extern cfs_mem_cache_t *cfs_page_t_slab;
-extern cfs_mem_cache_t *cfs_page_p_slab;
-
-
-#define CFS_DECL_MMSPACE
-#define CFS_MMSPACE_OPEN do {} while(0)
-#define CFS_MMSPACE_CLOSE do {} while(0)
-
-
-#define mb() do {} while(0)
-#define rmb() mb()
-#define wmb() mb()
-
-
-/* __KERNEL__ */
-#endif
-
-#endif /* __WINNT_CFS_MEM_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef __LIBCFS_WINNT_CFS_PRIM_H__
-#define __LIBCFS_WINNT_CFS_PRIM_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-
-/*
- * libcfs proc device object
- */
-
-
-#define LUSTRE_PROC_DEVICE L"\\Device\\lproc" /* proc fs emulator device object */
-#define LUSTRE_PROC_SYMLNK L"\\DosDevices\\lproc" /* proc fs user-visible device */
-
-
-/*
- * Device IO Control Code Definitions
- */
-
-#define FILE_DEVICE_LIBCFS ('LC')
-
-#define FILE_DEVICE_LIBCFS ('LC')
-
-#define FUNC_LIBCFS_VERSION 0x101 // get version of current libcfs
-#define FUNC_LIBCFS_IOCTL 0x102 // Device i/o control to proc fs
-
-
-#define IOCTL_LIBCFS_VERSION \
- CTL_CODE (FILE_DEVICE_LIBCFS, FUNC_LIBCFS_VERSION, METHOD_BUFFERED, FILE_ANY_ACCESS)
-#define IOCTL_LIBCFS_ENTRY \
- CTL_CODE(FILE_DEVICE_LIBCFS, FUNC_LIBCFS_IOCTL, METHOD_BUFFERED, FILE_ANY_ACCESS)
-
-#pragma pack(4)
-
-typedef struct _CFS_PROC_IOCTL {
-
- ULONG cmd; // ioctl command identifier
- ULONG len; // length of data
-
- // UCHAR data[]; // content of the real ioctl
-
-} CFS_PROC_IOCTL, *PCFS_PROC_IOCTL;
-
-#pragma pack()
-
-#ifdef __KERNEL__
-
-#include <libcfs/list.h>
-
-/*
- * Symbol functions for libcfs
- *
- * OSX has no facility for use to register symbol.
- * So we have to implement it.
- */
-#define CFS_SYMBOL_LEN 64
-
-struct cfs_symbol {
- char name[CFS_SYMBOL_LEN];
- void *value;
- int ref;
- struct list_head sym_list;
-};
-
-extern int cfs_symbol_register(const char *, const void *);
-extern void cfs_symbol_unregister(const char *);
-extern void * cfs_symbol_get(const char *);
-extern void cfs_symbol_put(const char *);
-extern void cfs_symbol_clean();
-
-
-
-typedef struct file_operations cfs_file_operations_t;
-typedef struct file cfs_file_t;
-
-/*
- * Pseudo device register
- */
-
-typedef struct
-{
- int minor;
- const char * name;
- cfs_file_operations_t * fops;
-} cfs_psdev_t;
-
-int cfs_psdev_register(cfs_psdev_t * psdev);
-int cfs_psdev_deregister(cfs_psdev_t * psdev);
-
-
-/*
- * Proc emulator file system APIs
- */
-
-typedef int cfs_read_proc_t(char *page, char **start, off_t off,
- int count, int *eof, void *data);
-typedef int cfs_write_proc_t(struct file *file, const char *buffer,
- ulong_ptr count, void *data);
-
-#define CFS_PROC_ENTRY_MAGIC 'CPEM'
-
-#define CFS_PROC_FLAG_DIRECTORY 0x00000001 // directory node
-#define CFS_PROC_FLAG_ATTACHED 0x00000002 // node is attached to proc
-#define CFS_PROC_FLAG_MISCDEV 0x00000004 // miscellaneous device
-
-typedef struct cfs_proc_entry
-{
- ULONG magic; // Magic
- ULONG flags; // Flags
-
- struct _dir_entry { // proc directory entry
- PRTL_SPLAY_LINKS root;
- };
-
- struct _file_entry { // proc file / leaf entry
- cfs_read_proc_t * read_proc;
- cfs_write_proc_t * write_proc;
- };
-
- mode_t mode;
- unsigned short nlink;
-
-
- struct file_operations * proc_fops;
- void * data;
-
- // proc_dir_entry ended.
-
- RTL_SPLAY_LINKS s_link; // splay link
-
- //
- // Maximum length of proc entry name is 0x20
- //
-
- char name[0x20];
-
-} cfs_proc_entry_t, cfs_proc_dir_entry_t;
-
-typedef cfs_proc_entry_t cfs_proc_dir_entry_t;
-
-#define PROC_BLOCK_SIZE PAGE_SIZE
-
-/*
- * Sysctl register
- */
-
-typedef struct ctl_table cfs_sysctl_table_t;
-typedef struct ctl_table_header cfs_sysctl_table_header_t;
-
-
-typedef int ctl_handler (
- cfs_sysctl_table_t *table,
- int *name, int nlen,
- void *oldval, size_t *oldlenp,
- void *newval, size_t newlen,
- void **context );
-
-typedef int proc_handler (
- cfs_sysctl_table_t *ctl,
- int write, struct file * filp,
- void *buffer, size_t *lenp );
-
-
-int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp,
- void *buffer, size_t *lenp);
-
-int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp,
- void *buffer, size_t *lenp);
-
-int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen,
- void *oldval, size_t *oldlenp,
- void *newval, size_t newlen, void **context);
-
-
-/*
- * System io control definitions
- */
-
-#define CTL_MAXNAME 10
-
-#define CTL_ANY -1 /* Matches any name */
-#define CTL_NONE 0
-
-enum
-{
- CTL_KERN=1, /* General kernel info and control */
- CTL_VM=2, /* VM management */
- CTL_NET=3, /* Networking */
- CTL_PROC=4, /* Process info */
- CTL_FS=5, /* Filesystems */
- CTL_DEBUG=6, /* Debugging */
- CTL_DEV=7, /* Devices */
- CTL_BUS=8, /* Busses */
- CTL_ABI=9, /* Binary emulation */
- CTL_CPU=10 /* CPU stuff (speed scaling, etc) */
-};
-
-/* sysctl table definitons */
-struct ctl_table
-{
- int ctl_name;
- char *procname;
- void *data;
- int maxlen;
- mode_t mode;
- cfs_sysctl_table_t *child;
- proc_handler *proc_handler; /* text formatting callback */
- ctl_handler *strategy; /* read / write callback functions */
- cfs_proc_entry_t *de; /* proc entry block */
- void *extra1;
- void *extra2;
-};
-
-
-/* the mantaner of the cfs_sysctl_table trees */
-struct ctl_table_header
-{
- cfs_sysctl_table_t * ctl_table;
- struct list_head ctl_entry;
-};
-
-
-cfs_proc_entry_t * create_proc_entry(char *name, mode_t mod,
- cfs_proc_entry_t *parent);
-void proc_free_entry(cfs_proc_entry_t *de);
-void remove_proc_entry(char *name, cfs_proc_entry_t *entry);
-cfs_proc_entry_t * search_proc_entry(char * name,
- cfs_proc_entry_t * root );
-
-#define cfs_create_proc_entry create_proc_entry
-#define cfs_free_proc_entry proc_free_entry
-#define cfs_remove_proc_entry remove_proc_entry
-
-#define register_cfs_sysctl_table(t, a) register_sysctl_table(t, a)
-#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a)
-
-
-/*
- * declaration of proc kernel process routines
- */
-
-cfs_file_t *
-lustre_open_file(char * filename);
-
-int
-lustre_close_file(cfs_file_t * fh);
-
-int
-lustre_do_ioctl( cfs_file_t * fh,
- unsigned long cmd,
- ulong_ptr arg );
-
-int
-lustre_ioctl_file( cfs_file_t * fh,
- PCFS_PROC_IOCTL devctl);
-
-size_t
-lustre_read_file( cfs_file_t * fh,
- loff_t off,
- size_t size,
- char * buf
- );
-
-size_t
-lustre_write_file( cfs_file_t * fh,
- loff_t off,
- size_t size,
- char * buf
- );
-
-/*
- * Wait Queue
- */
-
-
-typedef int cfs_task_state_t;
-
-#define CFS_TASK_INTERRUPTIBLE 0x00000001
-#define CFS_TASK_UNINT 0x00000002
-
-
-
-#define CFS_WAITQ_MAGIC 'CWQM'
-#define CFS_WAITLINK_MAGIC 'CWLM'
-
-typedef struct cfs_waitq {
-
- unsigned int magic;
- unsigned int flags;
-
- spinlock_t guard;
- struct list_head waiters;
-
-} cfs_waitq_t;
-
-
-typedef struct cfs_waitlink cfs_waitlink_t;
-
-#define CFS_WAITQ_CHANNELS (2)
-
-#define CFS_WAITQ_CHAN_NORMAL (0)
-#define CFS_WAITQ_CHAN_FORWARD (1)
-
-
-
-typedef struct cfs_waitlink_channel {
- struct list_head link;
- cfs_waitq_t * waitq;
- cfs_waitlink_t * waitl;
-} cfs_waitlink_channel_t;
-
-struct cfs_waitlink {
-
- unsigned int magic;
- int flags;
- event_t * event;
- atomic_t * hits;
-
- cfs_waitlink_channel_t waitq[CFS_WAITQ_CHANNELS];
-};
-
-enum {
- CFS_WAITQ_EXCLUSIVE = 1
-};
-
-#define CFS_DECL_WAITQ(name) cfs_waitq_t name
-
-
-void cfs_waitq_init(struct cfs_waitq *waitq);
-void cfs_waitlink_init(struct cfs_waitlink *link);
-
-void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
-void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
- struct cfs_waitlink *link);
-void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq);
-void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
-int cfs_waitq_active(struct cfs_waitq *waitq);
-
-void cfs_waitq_signal(struct cfs_waitq *waitq);
-void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
-void cfs_waitq_broadcast(struct cfs_waitq *waitq);
-
-void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state);
-cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
- cfs_task_state_t state, cfs_duration_t timeout);
-
-
-
-/* Kernel thread */
-
-typedef int (*cfs_thread_t) (void *arg);
-
-typedef struct _cfs_thread_context {
- cfs_thread_t func;
- void * arg;
-} cfs_thread_context_t;
-
-int cfs_kernel_thread(int (*func)(void *), void *arg, int flag);
-
-/*
- * thread creation flags from Linux, not used in winnt
- */
-#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
-#define CLONE_VM 0x00000100 /* set if VM shared between processes */
-#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
-#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
-#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
-#define CLONE_PID 0x00001000 /* set if pid shared */
-#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
-#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
-#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
-#define CLONE_THREAD 0x00010000 /* Same thread group? */
-#define CLONE_NEWNS 0x00020000 /* New namespace group? */
-
-#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD)
-
-
-/*
- * sigset ...
- */
-
-typedef sigset_t cfs_sigset_t;
-
-/*
- * Task struct
- */
-
-#define MAX_SCHEDULE_TIMEOUT ((long_ptr)(~0UL>>12))
-
-
-#define NGROUPS 1
-#define CFS_CURPROC_COMM_MAX (16)
-typedef struct task_sruct{
- mode_t umask;
-
- pid_t pid;
- pid_t pgrp;
-
- uid_t uid,euid,suid,fsuid;
- gid_t gid,egid,sgid,fsgid;
-
- int ngroups;
- gid_t groups[NGROUPS];
- cfs_kernel_cap_t cap_effective,
- cap_inheritable,
- cap_permitted;
-
- char comm[CFS_CURPROC_COMM_MAX];
- void * journal_info;
-} cfs_task_t;
-
-
-/*
- * linux task struct emulator ...
- */
-
-#define TASKMAN_MAGIC 'TMAN' /* Task Manager */
-#define TASKSLT_MAGIC 'TSLT' /* Task Slot */
-
-typedef struct _TASK_MAN {
-
- ULONG Magic; /* Magic and Flags */
- ULONG Flags;
-
- spinlock_t Lock; /* Protection lock */
-
- cfs_mem_cache_t * slab; /* Memory slab for task slot */
-
- ULONG NumOfTasks; /* Total tasks (threads) */
- LIST_ENTRY TaskList; /* List of task slots */
-
-} TASK_MAN, *PTASK_MAN;
-
-typedef struct _TASK_SLOT {
-
- ULONG Magic; /* Magic and Flags */
- ULONG Flags;
-
- LIST_ENTRY Link; /* To be linked to TaskMan */
-
- event_t Event; /* Schedule event */
-
- HANDLE Pid; /* Process id */
- HANDLE Tid; /* Thread id */
- PETHREAD Tet; /* Pointer to ethread */
-
- atomic_t count; /* refer count */
- atomic_t hits; /* times of waken event singaled */
-
- KIRQL irql; /* irql for rwlock ... */
-
- cfs_task_t task; /* linux task part */
-
-} TASK_SLOT, *PTASK_SLOT;
-
-
-#define current cfs_current()
-#define set_current_state(s) do {;} while (0)
-
-#define wait_event(wq, condition) \
-do { \
- cfs_waitlink_t __wait; \
- \
- cfs_waitlink_init(&__wait); \
- while (TRUE) { \
- cfs_waitq_add(&wq, &__wait); \
- if (condition) { \
- break; \
- } \
- cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \
- cfs_waitq_del(&wq, &__wait); \
- } \
- cfs_waitq_del(&wq, &__wait); \
-} while(0)
-
-#define wait_event_interruptible(wq, condition, __ret) \
-do { \
- cfs_waitlink_t __wait; \
- \
- __ret = 0; \
- cfs_waitlink_init(&__wait); \
- while (TRUE) { \
- cfs_waitq_add(&wq, &__wait); \
- if (condition) { \
- break; \
- } \
- cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \
- cfs_waitq_del(&wq, &__wait); \
- } \
- cfs_waitq_del(&wq, &__wait); \
-} while(0)
-
-
-int init_task_manager();
-void cleanup_task_manager();
-cfs_task_t * cfs_current();
-int schedule_timeout(int64_t time);
-int schedule();
-int wake_up_process(cfs_task_t * task);
-#define cfs_schedule_timeout(state, time) schedule_timeout(time)
-void sleep_on(cfs_waitq_t *waitq);
-
-#define CFS_DECL_JOURNAL_DATA
-#define CFS_PUSH_JOURNAL do {;} while(0)
-#define CFS_POP_JOURNAL do {;} while(0)
-
-
-/* module related definitions */
-
-#ifndef __exit
-#define __exit
-#endif
-#ifndef __init
-#define __init
-#endif
-
-#define request_module(x) (0)
-
-#define EXPORT_SYMBOL(s)
-#define MODULE_AUTHOR(s)
-#define MODULE_DESCRIPTION(s)
-#define MODULE_LICENSE(s)
-#define MODULE_PARM(a, b)
-#define MODULE_PARM_DESC(a, b)
-
-#define module_init(X) int __init module_##X() {return X();}
-#define module_exit(X) void __exit module_##X() {X();}
-
-#define DECLARE_INIT(X) extern int __init module_##X(void)
-#define DECLARE_EXIT(X) extern void __exit module_##X(void)
-
-#define MODULE_INIT(X) do { int rc = module_##X(); \
- if (rc) goto errorout; \
- } while(0)
-
-#define MODULE_EXIT(X) do { module_##X(); } while(0)
-
-
-/* Module interfaces */
-#define cfs_module(name, version, init, fini) \
-module_init(init); \
-module_exit(fini)
-
-
-/*
- * Linux kernel version definition
- */
-
-#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
-#define LINUX_VERSION_CODE (2*100+6*10+7)
-
-
-/*
- * Signal
- */
-#define SIGNAL_MASK_ASSERT()
-
-/*
- * Timer
- */
-
-#define CFS_TIMER_FLAG_INITED 0x00000001 // Initialized already
-#define CFS_TIMER_FLAG_TIMERED 0x00000002 // KeSetTimer is called
-
-typedef struct cfs_timer {
-
- KSPIN_LOCK Lock;
-
- ULONG Flags;
-
- KDPC Dpc;
- KTIMER Timer;
-
- cfs_time_t deadline;
-
- void (*proc)(ulong_ptr);
- void * arg;
-
-} cfs_timer_t;
-
-
-typedef void (*timer_func_t)(ulong_ptr);
-
-#define cfs_init_timer(t)
-
-void cfs_timer_init(cfs_timer_t *timer, void (*func)(ulong_ptr), void *arg);
-void cfs_timer_done(cfs_timer_t *t);
-void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline);
-void cfs_timer_disarm(cfs_timer_t *t);
-int cfs_timer_is_armed(cfs_timer_t *t);
-cfs_time_t cfs_timer_deadline(cfs_timer_t *t);
-
-
-/* deschedule for a bit... */
-static inline void cfs_pause(cfs_duration_t ticks)
-{
- cfs_schedule_timeout(TASK_UNINTERRUPTIBLE, ticks);
-}
-
-
-static inline void cfs_enter_debugger(void)
-{
-#if _X86_
- __asm int 3;
-#else
- KdBreakPoint();
-#endif
-}
-
-/*
- * libcfs globals initialization/cleanup
- */
-
-int
-libcfs_arch_init(void);
-
-void
-libcfs_arch_cleanup(void);
-
-/*
- * SMP ...
- */
-
-#define SMP_CACHE_BYTES 128
-#define __cacheline_aligned
-#define NR_CPUS (2)
-#define smp_processor_id() KeGetCurrentProcessorNumber()
-#define smp_num_cpus NR_CPUS
-#define num_online_cpus() smp_num_cpus
-#define smp_call_function(f, a, n, w) do {} while(0)
-
-/*
- * Irp related
- */
-
-#define NR_IRQS 512
-#define in_interrupt() (0)
-
-/*
- * printk flags
- */
-
-#define KERN_EMERG "<0>" /* system is unusable */
-#define KERN_ALERT "<1>" /* action must be taken immediately */
-#define KERN_CRIT "<2>" /* critical conditions */
-#define KERN_ERR "<3>" /* error conditions */
-#define KERN_WARNING "<4>" /* warning conditions */
-#define KERN_NOTICE "<5>" /* normal but significant condition */
-#define KERN_INFO "<6>" /* informational */
-#define KERN_DEBUG "<7>" /* debug-level messages */
-
-/*
- * Misc
- */
-
-
-#define inter_module_get(n) cfs_symbol_get(n)
-#define inter_module_put(n) cfs_symbol_put(n)
-
-#ifndef likely
-#define likely(exp) (exp)
-#endif
-#ifndef unlikely
-#define unlikely(exp) (exp)
-#endif
-
-#define lock_kernel() do {} while(0)
-#define unlock_kernel() do {} while(0)
-
-#define CAP_SYS_ADMIN 0
-#define CAP_SYS_ROOT 1
-
-#define capable(a) (TRUE)
-
-#define USERMODEHELPER(path, argv, envp) (0)
-
-
-#define local_irq_save(x)
-#define local_irq_restore(x)
-
-#define cfs_assert ASSERT
-
-#define THREAD_NAME
-
-#else /* !__KERNEL__ */
-
-#define PAGE_CACHE_SIZE PAGE_SIZE
-#define PAGE_CACHE_MASK PAGE_MASK
-
-#define getpagesize() (PAGE_SIZE)
-
-
-typedef struct {
- int foo;
-} pthread_mutex_t;
-
-typedef struct {
- int foo;
-} pthread_cond_t;
-
-#define pthread_mutex_init(x, y) do {} while(0)
-#define pthread_cond_init(x, y) do {} while(0)
-
-#define pthread_mutex_lock(x) do {} while(0)
-#define pthread_mutex_unlock(x) do {} while(0)
-
-#define pthread_cond_wait(x,y) do {} while(0)
-#define pthread_cond_broadcast(x) do {} while(0)
-
-typedef struct file {
- int foo;
-} cfs_file_t;
-
-typedef struct cfs_proc_dir_entry{
- void *data;
-}cfs_proc_dir_entry_t;
-
-
-
-#include "../user-prim.h"
-
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#define strcasecmp strcmp
-#define strncasecmp strncmp
-#define snprintf _snprintf
-#define getpid() (0)
-
-
-#define getpwuid(x) (NULL)
-#define getgrgid(x) (NULL)
-
-int cfs_proc_mknod(const char *path, mode_t mode, dev_t dev);
-
-int gethostname(char * name, int namelen);
-
-#define setlinebuf(x) do {} while(0)
-
-
-NTSYSAPI VOID NTAPI DebugBreak();
-
-
-static inline void cfs_enter_debugger(void)
-{
-#if _X86_
- __asm int 3;
-#else
- DebugBreak();
-#endif
-}
-
-/* Maximum EA Information Length */
-#define EA_MAX_LENGTH (sizeof(FILE_FULL_EA_INFORMATION) + 15)
-
-
-/*
- * proc user mode routines
- */
-
-HANDLE cfs_proc_open (char * filename, int oflag);
-int cfs_proc_close(HANDLE handle);
-int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count);
-int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count);
-int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer);
-
-
-/*
- * Native API definitions
- */
-
-//
-// Disk I/O Routines
-//
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtReadFile(HANDLE FileHandle,
- HANDLE Event OPTIONAL,
- PIO_APC_ROUTINE ApcRoutine OPTIONAL,
- PVOID ApcContext OPTIONAL,
- PIO_STATUS_BLOCK IoStatusBlock,
- PVOID Buffer,
- ULONG Length,
- PLARGE_INTEGER ByteOffset OPTIONAL,
- PULONG Key OPTIONAL);
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtWriteFile(HANDLE FileHandle,
- HANDLE Event OPTIONAL,
- PIO_APC_ROUTINE ApcRoutine OPTIONAL,
- PVOID ApcContext OPTIONAL,
- PIO_STATUS_BLOCK IoStatusBlock,
- PVOID Buffer,
- ULONG Length,
- PLARGE_INTEGER ByteOffset OPTIONAL,
- PULONG Key OPTIONAL);
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtClose(HANDLE Handle);
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtCreateFile(PHANDLE FileHandle,
- ACCESS_MASK DesiredAccess,
- POBJECT_ATTRIBUTES ObjectAttributes,
- PIO_STATUS_BLOCK IoStatusBlock,
- PLARGE_INTEGER AllocationSize OPTIONAL,
- ULONG FileAttributes,
- ULONG ShareAccess,
- ULONG CreateDisposition,
- ULONG CreateOptions,
- PVOID EaBuffer OPTIONAL,
- ULONG EaLength);
-
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtDeviceIoControlFile(
- IN HANDLE FileHandle,
- IN HANDLE Event,
- IN PIO_APC_ROUTINE ApcRoutine,
- IN PVOID ApcContext,
- OUT PIO_STATUS_BLOCK IoStatusBlock,
- IN ULONG IoControlCode,
- IN PVOID InputBuffer,
- IN ULONG InputBufferLength,
- OUT PVOID OutputBuffer,
- OUT ULONG OutputBufferLength
- );
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtFsControlFile(
- IN HANDLE FileHandle,
- IN HANDLE Event OPTIONAL,
- IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
- IN PVOID ApcContext OPTIONAL,
- OUT PIO_STATUS_BLOCK IoStatusBlock,
- IN ULONG FsControlCode,
- IN PVOID InputBuffer OPTIONAL,
- IN ULONG InputBufferLength,
- OUT PVOID OutputBuffer OPTIONAL,
- IN ULONG OutputBufferLength
-);
-
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtQueryInformationFile(
- IN HANDLE FileHandle,
- OUT PIO_STATUS_BLOCK IoStatusBlock,
- OUT PVOID FileInformation,
- IN ULONG Length,
- IN FILE_INFORMATION_CLASS FileInformationClass
- );
-
-//
-// Random routines ...
-//
-
-NTSYSAPI
-ULONG
-NTAPI
-RtlRandom(
- IN OUT PULONG Seed
- );
-
-#endif /* __KERNEL__ */
-
-
-//
-// Inode flags (Linux uses octad number, but why ? strange!!!)
-//
-
-#undef S_IFMT
-#undef S_IFDIR
-#undef S_IFCHR
-#undef S_IFREG
-#undef S_IREAD
-#undef S_IWRITE
-#undef S_IEXEC
-
-#define S_IFMT 0x0F000 /* 017 0000 */
-#define S_IFSOCK 0x0C000 /* 014 0000 */
-#define S_IFLNK 0x0A000 /* 012 0000 */
-#define S_IFREG 0x08000 /* 010 0000 */
-#define S_IFBLK 0x06000 /* 006 0000 */
-#define S_IFDIR 0x04000 /* 004 0000 */
-#define S_IFCHR 0x02000 /* 002 0000 */
-#define S_IFIFO 0x01000 /* 001 0000 */
-#define S_ISUID 0x00800 /* 000 4000 */
-#define S_ISGID 0x00400 /* 000 2000 */
-#define S_ISVTX 0x00200 /* 000 1000 */
-
-#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
-#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
-#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
-#define S_ISFIL(m) (((m) & S_IFMT) == S_IFFIL)
-#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK)
-#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
-#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
-#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO)
-
-#define S_IPERMISSION_MASK 0x1FF /* */
-
-#define S_IRWXU 0x1C0 /* 0 0700 */
-#define S_IRUSR 0x100 /* 0 0400 */
-#define S_IWUSR 0x080 /* 0 0200 */
-#define S_IXUSR 0x040 /* 0 0100 */
-
-#define S_IRWXG 0x038 /* 0 0070 */
-#define S_IRGRP 0x020 /* 0 0040 */
-#define S_IWGRP 0x010 /* 0 0020 */
-#define S_IXGRP 0x008 /* 0 0010 */
-
-#define S_IRWXO 0x007 /* 0 0007 */
-#define S_IROTH 0x004 /* 0 0004 */
-#define S_IWOTH 0x002 /* 0 0002 */
-#define S_IXOTH 0x001 /* 0 0001 */
-
-#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO)
-#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
-#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH)
-#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH)
-#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
-
-/*
- * linux ioctl coding definitions
- */
-
-#define _IOC_NRBITS 8
-#define _IOC_TYPEBITS 8
-#define _IOC_SIZEBITS 14
-#define _IOC_DIRBITS 2
-
-#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
-#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1)
-#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1)
-#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1)
-
-#define _IOC_NRSHIFT 0
-#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
-#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
-#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS)
-
-/*
- * Direction bits.
- */
-#define _IOC_NONE 0U
-#define _IOC_WRITE 1U
-#define _IOC_READ 2U
-
-#define _IOC(dir,type,nr,size) \
- (((dir) << _IOC_DIRSHIFT) | \
- ((type) << _IOC_TYPESHIFT) | \
- ((nr) << _IOC_NRSHIFT) | \
- ((size) << _IOC_SIZESHIFT))
-
-/* used to create numbers */
-#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0)
-#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size))
-#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
-#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
-
-/* used to decode ioctl numbers.. */
-#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
-#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
-#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
-#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
-
-/*
- * Io vector ...
- */
-
-struct iovec
-{
- void *iov_base;
- size_t iov_len;
-};
-
-
-#define ULONG_LONG_MAX ((__u64)(0xFFFFFFFFFFFFFFFF))
-/*
- * Convert a string to an unsigned long long integer.
- *
- * Ignores `locale' stuff. Assumes that the upper and lower case
- * alphabets and digits are each contiguous.
- */
-static inline __u64
-strtoull(
- char *nptr,
- char **endptr,
- int base)
-{
- char *s = nptr;
- __u64 acc, cutoff;
- int c, neg = 0, any, cutlim;
-
- /*
- * See strtol for comments as to the logic used.
- */
- do {
- c = *s++;
- } while (isspace(c));
- if (c == '-') {
- neg = 1;
- c = *s++;
- } else if (c == '+')
- c = *s++;
- if ((base == 0 || base == 16) &&
- c == '0' && (*s == 'x' || *s == 'X')) {
- c = s[1];
- s += 2;
- base = 16;
- }
- if (base == 0)
- base = c == '0' ? 8 : 10;
- cutoff = (__u64)ULONG_LONG_MAX / (__u64)base;
- cutlim = (int)((__u64)ULONG_LONG_MAX % (__u64)base);
- for (acc = 0, any = 0;; c = *s++) {
- if (isdigit(c))
- c -= '0';
- else if (isalpha(c))
- c -= isupper(c) ? 'A' - 10 : 'a' - 10;
- else
- break;
- if (c >= base)
- break;
- if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
- any = -1;
- else {
- any = 1;
- acc *= base;
- acc += c;
- }
- }
- if (any < 0) {
- acc = ULONG_LONG_MAX;
- } else if (neg)
- acc = 0 - acc;
- if (endptr != 0)
- *endptr = (char *) (any ? s - 1 : nptr);
- return (acc);
-}
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- * Implementation of portable time API for Winnt (kernel and user-level).
- *
- */
-
-#ifndef __LIBCFS_WINNT_TCPIP_H__
-#define __LIBCFS_WINNT_TCPIP_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-
-#ifdef __KERNEL__
-
-//
-// ks definitions
-//
-
-// iovec is defined in libcfs: winnt_prim.h
-// lnetkiov_t is defined in lnet/types.h
-
-typedef struct socket ksock_tconn_t;
-typedef struct socket cfs_socket_t;
-
-// completion notification callback routine
-
-typedef VOID (*ksock_schedule_cb)(struct socket*, int, void *, ulong_ptr);
-
-/* completion routine to update tx structure for async sending */
-typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr);
-
-//
-// tdinal definitions
-//
-
-
-#if TDI_LIBCFS_DBG
-#define KsPrint(X) KsPrintf X
-#else
-#define KsPrint(X)
-#endif
-
-
-//
-// Socket Addresses Related ...
-//
-
-#define INADDR_ANY (ULONG)0x00000000
-#define INADDR_LOOPBACK (ULONG)0x7f000001
-#define INADDR_BROADCAST (ULONG)0xffffffff
-#define INADDR_NONE (ULONG)0xffffffff
-
-/*
- * TCP / IP options
- */
-
-#define SOL_TCP 6
-#define SOL_UDP 17
-
-
-#define TL_INSTANCE 0
-
-#define TCP_SOCKET_NODELAY 1 // disabling "Nagle"
-#define TCP_SOCKET_KEEPALIVE 2
-#define TCP_SOCKET_OOBINLINE 3
-#define TCP_SOCKET_BSDURGENT 4
-#define TCP_SOCKET_ATMARK 5
-#define TCP_SOCKET_WINDOW 6
-
-
-/* Flags we can use with send/ and recv.
- Added those for 1003.1g not all are supported yet
- */
-
-#define MSG_OOB 1
-#define MSG_PEEK 2
-#define MSG_DONTROUTE 4
-#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */
-#define MSG_CTRUNC 8
-#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */
-#define MSG_TRUNC 0x20
-#define MSG_DONTWAIT 0x40 /* Nonblocking io */
-#define MSG_EOR 0x80 /* End of record */
-#define MSG_WAITALL 0x100 /* Wait for a full request */
-#define MSG_FIN 0x200
-#define MSG_SYN 0x400
-#define MSG_CONFIRM 0x800 /* Confirm path validity */
-#define MSG_RST 0x1000
-#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */
-#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */
-#define MSG_MORE 0x8000 /* Sender will send more */
-
-#define MSG_EOF MSG_FIN
-
-
-//
-// Maximum TRANSPORT_ADDRESS Length
-//
-// it must >= FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address)
-// + TDI_ADDRESS_LENGTH_IP
-//
-// I define it a little large and 16 bytes aligned to avoid possible overflow.
-//
-
-#define MAX_ADDRESS_LENGTH (0x30)
-
-
-//
-// Maximum Listers Children Sockets
-//
-
-#define MAX_CHILD_LISTENERS (4)
-
-//
-// Maximum EA Information Length
-//
-
-#define EA_MAX_LENGTH ( sizeof(FILE_FULL_EA_INFORMATION) - 1 + \
- TDI_TRANSPORT_ADDRESS_LENGTH + 1 + \
- MAX_ADDRESS_LENGTH )
-
-
-#define UDP_DEVICE_NAME L"\\Device\\Udp"
-#define TCP_DEVICE_NAME L"\\Device\\Tcp"
-
-
-/*
- * TSDU definitions
- */
-
-#define TDINAL_TSDU_DEFAULT_SIZE (0x10000)
-
-#define KS_TSDU_MAGIC 'KSTD'
-
-#define KS_TSDU_ATTACHED 0x00000001 // Attached to the socket receive tsdu list
-
-typedef struct _KS_TSDU {
-
- ULONG Magic;
- ULONG Flags;
-
- struct list_head Link;
-
- ULONG TotalLength; // Total size of KS_TSDU
-
- ULONG StartOffset; // Start offset of the first Tsdu unit
- ULONG LastOffset; // End offset of the last Tsdu unit
-
-/*
- union {
- KS_TSDU_DAT[];
- KS_TSDU_BUF[];
- KS_TSDU_MDL[];
- }
-*/
-
-} KS_TSDU, *PKS_TSDU;
-
-#define TSDU_TYPE_BUF ((USHORT)0x5401)
-#define TSDU_TYPE_DAT ((USHORT)0x5402)
-#define TSDU_TYPE_MDL ((USHORT)0x5403)
-
-#define KS_TSDU_BUF_RECEIVING 0x0001
-typedef struct _KS_TSDU_BUF {
-
- USHORT TsduType;
- USHORT TsduFlags;
-
- ULONG DataLength;
- ULONG StartOffset;
-
- PVOID UserBuffer;
-
-} KS_TSDU_BUF, *PKS_TSDU_BUF;
-
-#define KS_TSDU_DAT_RECEIVING 0x0001
-
-typedef struct _KS_TSDU_DAT {
-
- USHORT TsduType;
- USHORT TsduFlags;
-
- ULONG DataLength;
- ULONG StartOffset;
-
- ULONG TotalLength;
-
- UCHAR Data[1];
-
-} KS_TSDU_DAT, *PKS_TSDU_DAT;
-
-#define KS_DWORD_ALIGN(x) (((x) + 0x03) & (~(0x03)))
-#define KS_TSDU_STRU_SIZE(Len) (KS_DWORD_ALIGN((Len) + FIELD_OFFSET(KS_TSDU_DAT, Data)))
-
-typedef struct _KS_TSDU_MDL {
-
- USHORT TsduType;
- USHORT TsduFlags;
-
- ULONG DataLength;
- ULONG StartOffset;
-
- PMDL Mdl;
- PVOID Descriptor;
-
-} KS_TSDU_MDL, *PKS_TSDU_MDL;
-
-
-typedef struct _KS_TSDUMGR {
-
- struct list_head TsduList;
- ULONG NumOfTsdu;
- ULONG TotalBytes;
- KEVENT Event;
-
-} KS_TSDUMGR, *PKS_TSDUMGR;
-
-
-typedef struct _KS_CHAIN {
-
- KS_TSDUMGR Normal;
- KS_TSDUMGR Expedited;
-
-} KS_CHAIN, *PKS_CHAIN;
-
-
-#define TDINAL_SCHED_FACTOR (1)
-#define CAN_BE_SCHED(Len, Limit) (Len >= ((Limit) >> TDINAL_SCHED_FACTOR))
-
-//
-// Handler Settings Indictor
-//
-
-#define TDI_EVENT_MAXIMUM_HANDLER (TDI_EVENT_ERROR_EX + 1)
-
-
-typedef struct _KS_EVENT_HANDLERS {
- BOOLEAN IsActive[TDI_EVENT_MAXIMUM_HANDLER];
- PVOID Handler [TDI_EVENT_MAXIMUM_HANDLER];
-} KS_EVENT_HANDLERS, *PKS_EVENT_HANDLERS;
-
-#define SetEventHandler(ha, ht, hr) do { \
- ha.IsActive[ht] = TRUE; \
- ha.Handler[ht] = (PVOID) (hr); \
- } while(0)
-
-//
-// KSock Internal Structures
-//
-
-typedef struct _KS_ADDRESS {
-
- union {
- TRANSPORT_ADDRESS Tdi;
- UCHAR Pading[MAX_ADDRESS_LENGTH];
- };
-
- HANDLE Handle;
- PFILE_OBJECT FileObject;
-
-} KS_ADDRESS, *PKS_ADDRESS;
-
-//
-// Structures for Disconnect Workitem
-//
-
-typedef struct _KS_DISCONNECT_WORKITEM {
-
- WORK_QUEUE_ITEM WorkItem; // Workitem to perform disconnection
- ksock_tconn_t * tconn; // tdi connecton
- ULONG Flags; // connection broken/discnnection flags
- KEVENT Event; // sync event
-
-} KS_DISCONNECT_WORKITEM, *PKS_DISCONNECT_WORKITEM;
-
-
-typedef struct _KS_CONNECTION {
-
- HANDLE Handle; // Handle of the tdi connection
- PFILE_OBJECT FileObject; // FileObject if the conn object
-
- PTRANSPORT_ADDRESS Remote; // the ConnectionInfo of this connection
- PTDI_CONNECTION_INFORMATION ConnectionInfo;
-
- ULONG nagle; // Tcp options
-
-} KS_CONNECTION, *PKS_CONNECTION;
-
-
-//
-// type definitions
-//
-
-typedef MDL ksock_mdl_t;
-typedef UNICODE_STRING ksock_unicode_name_t;
-typedef WORK_QUEUE_ITEM ksock_workitem_t;
-
-
-typedef KS_CHAIN ksock_chain_t;
-typedef KS_ADDRESS ksock_tdi_addr_t;
-typedef KS_CONNECTION ksock_tconn_info_t;
-typedef KS_DISCONNECT_WORKITEM ksock_disconnect_workitem_t;
-
-
-//
-// Structures for transmission done Workitem
-//
-
-typedef struct _KS_TCPX_FINILIZE {
- ksock_workitem_t item;
- void * tx;
-} ksock_tcpx_fini_t;
-
-
-typedef struct ksock_backlogs {
-
- struct list_head list; /* list to link the backlog connections */
- int num; /* number of backlogs in the list */
-
-} ksock_backlogs_t;
-
-
-typedef struct ksock_daemon {
-
- ksock_tconn_t * tconn; /* the listener connection object */
- unsigned short nbacklogs; /* number of listening backlog conns */
- unsigned short port; /* listening port number */
- int shutdown; /* daemon threads is to exit */
- struct list_head list; /* to be attached into ksock_nal_data_t*/
-
-} ksock_daemon_t ;
-
-
-typedef enum {
-
- kstt_sender = 0, // normal sending connection type, it's active connection, while
- // child tconn is for passive connection.
-
- kstt_listener, // listener daemon type, it just acts as a daemon, and it does
- // not have real connection. It manages children tcons to accept
- // or refuse the connecting request from remote peers.
-
- kstt_child, // accepted child connection type, it's parent must be Listener
- kstt_lasttype
-} ksock_tconn_type;
-
-typedef enum {
-
- ksts_uninited = 0, // tconn is just allocated (zero values), not initialized yet
-
- ksts_inited, // tconn structure initialized: so it now can be identified as
- // a sender, listener or a child
-
- ksts_bind, // tconn is bound: the local address object (ip/port) is created.
- // after being bound, we must call ksocknal_put_tconn to release
- // the tconn objects, it's not safe just to free the memory of tconn.
-
- ksts_associated, // the connection object is created and associated with the address
- // object. so it's ready for connection. only for child and sender.
-
- ksts_connecting, // only used by child tconn: in the ConnectEvent handler routine,
- // it indicts the child tconn is busy to be connected to the peer.
-
- ksts_connected, // the connection is built already: for sender and child
-
- ksts_listening, // listener daemon is working, only for listener tconn
-
- ksts_disconnected, // disconnected by user
- ksts_aborted, // un-exptected broken status
-
- ksts_last // total number of tconn statuses
-} ksock_tconn_state;
-
-#define KS_TCONN_MAGIC 'KSTM'
-
-#define KS_TCONN_HANDLERS_SET 0x00000001 // Conection handlers are set.
-#define KS_TCONN_DISCONNECT_BUSY 0x00010000 // Disconnect Workitem is queued ...
-#define KS_TCONN_DESTROY_BUSY 0x00020000 // Destory Workitem is queued ...
-
-#define KS_TCONN_DAEMON_STARTED 0x00100000 // indict the daemon is started,
- // only valid for listener
-
-struct socket {
-
- ulong_ptr kstc_magic; /* Magic & Flags */
- ulong_ptr kstc_flags;
-
- spinlock_t kstc_lock; /* serialise lock*/
- void * kstc_conn; /* ksock_conn_t */
-
- ksock_tconn_type kstc_type; /* tdi connection Type */
- ksock_tconn_state kstc_state; /* tdi connection state flag */
-
- ksock_unicode_name_t kstc_dev; /* tcp transport device name */
-
- ksock_tdi_addr_t kstc_addr; /* local address handlers / Objects */
-
- atomic_t kstc_refcount; /* reference count of ksock_tconn */
-
- struct list_head kstc_list; /* linked to global ksocknal_data */
-
- union {
-
- struct {
- int nbacklog; /* total number of backlog tdi connections */
- ksock_backlogs_t kstc_listening; /* listeing backlog child connections */
- ksock_backlogs_t kstc_accepted; /* connected backlog child connections */
- event_t kstc_accept_event; /* Signaled by AcceptedHander,
- ksocknal_wait_accpeted_conns waits on */
- event_t kstc_destroy_event; /* Signaled when accepted child is released */
- } listener;
-
- struct {
- ksock_tconn_info_t kstc_info; /* Connection Info if Connected */
- ksock_chain_t kstc_recv; /* tsdu engine for data receiving */
- ksock_chain_t kstc_send; /* tsdu engine for data sending */
-
- int kstc_queued; /* Attached to Parent->ChildList ... */
- int kstc_queueno; /* 0: Attached to Listening list
- 1: Attached to Accepted list */
-
- int kstc_busy; /* referred by ConnectEventCallback ? */
- int kstc_accepted; /* the connection is built ready ? */
-
- struct list_head kstc_link; /* linked to parent tdi connection */
- ksock_tconn_t * kstc_parent; /* pointers to it's listener parent */
- } child;
-
- struct {
- ksock_tconn_info_t kstc_info; /* Connection Info if Connected */
- ksock_chain_t kstc_recv; /* tsdu engine for data receiving */
- ksock_chain_t kstc_send; /* tsdu engine for data sending */
- } sender;
- };
-
- ulong_ptr kstc_snd_wnd; /* Sending window size */
- ulong_ptr kstc_rcv_wnd; /* Recving window size */
-
- ksock_workitem_t kstc_destroy; /* tconn destruction workitem */
- ksock_disconnect_workitem_t kstc_disconnect; /* connection disconnect workitem */
-
- ksock_schedule_cb kstc_sched_cb; /* notification callback routine of completion */
- ksock_update_tx kstc_update_tx; /* aync sending callback to update tx */
-};
-
-#define SOCK_WMEM_QUEUED(sock) (0)
-
-#define TDINAL_WINDOW_DEFAULT_SIZE (0x100000)
-
-
-struct _KS_UDP_COMPLETION_CONTEXT;
-struct _KS_TCP_COMPLETION_CONTEXT;
-
-
-typedef
-NTSTATUS
-(*PKS_UDP_COMPLETION_ROUTINE) (
- IN PIRP Irp,
- IN struct _KS_UDP_COMPLETION_CONTEXT
- *UdpContext
- );
-
-
-typedef
-NTSTATUS
-(*PKS_TCP_COMPLETION_ROUTINE) (
- IN PIRP Irp,
- IN struct _KS_TCP_COMPLETION_CONTEXT
- *TcpContext
- );
-
-//
-// Udp Irp Completion Context
-//
-
-typedef struct _KS_UDP_COMPLETION_CONTEXT {
-
- PKEVENT Event;
- union {
- PFILE_OBJECT AddressObject;
- ksock_tconn_t * tconn;
- };
-
- PKS_UDP_COMPLETION_ROUTINE CompletionRoutine;
- PVOID CompletionContext;
-
-} KS_UDP_COMPLETION_CONTEXT, *PKS_UDP_COMPLETION_CONTEXT;
-
-
-//
-// Tcp Irp Completion Context (used by tcp data recv/send)
-//
-
-typedef struct _KS_TCP_COMPLETION_CONTEXT {
-
- PKEVENT Event; // Event to be waited on by Irp caller ...
-
- ksock_tconn_t * tconn; // the tdi connection
-
- PKS_TCP_COMPLETION_ROUTINE CompletionRoutine;
- PVOID CompletionContext;
- PVOID CompletionContext2;
-
- PKS_TSDUMGR KsTsduMgr; // Tsdu buffer manager
-
- //
- // These tow new members are for NON_BLOCKING transmission
- //
-
- BOOLEAN bCounted; // To indict needing refcount to
- // execute CompetionRoutine
- ULONG ReferCount; // Refer count of this structure
-
-} KS_TCP_COMPLETION_CONTEXT, *PKS_TCP_COMPLETION_CONTEXT;
-
-typedef KS_TCP_COMPLETION_CONTEXT ksock_tdi_tx_t, ksock_tdi_rx_t;
-
-
-/*
- * tdi extensions
- */
-
-#define IOCTL_TCP_QUERY_INFORMATION_EX \
- CTL_CODE(FILE_DEVICE_NETWORK, 0, METHOD_NEITHER, FILE_ANY_ACCESS)
-#define IOCTL_TCP_SET_INFORMATION_EX \
- CTL_CODE(FILE_DEVICE_NETWORK, 1, METHOD_BUFFERED, FILE_WRITE_ACCESS)
-
-
-#define TcpBuildSetInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, Buffer, BufferLen)\
- { \
- PIO_STACK_LOCATION _IRPSP; \
- if ( CompRoutine != NULL) { \
- IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\
- } else { \
- IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE); \
- } \
- _IRPSP = IoGetNextIrpStackLocation (Irp); \
- _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL; \
- _IRPSP->DeviceObject = DevObj; \
- _IRPSP->FileObject = FileObj; \
- _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = 0; \
- _IRPSP->Parameters.DeviceIoControl.InputBufferLength = BufferLen; \
- _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_SET_INFORMATION_EX; \
- Irp->AssociatedIrp.SystemBuffer = Buffer; \
- }
-
-
-#define TcpBuildQueryInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, InBuffer, InLength, OutBuffer, OutLength)\
- { \
- PIO_STACK_LOCATION _IRPSP; \
- if ( CompRoutine != NULL) { \
- IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\
- } else { \
- IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE); \
- } \
- _IRPSP = IoGetNextIrpStackLocation (Irp); \
- _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL; \
- _IRPSP->DeviceObject = DevObj; \
- _IRPSP->FileObject = FileObj; \
- _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = OutLength; \
- _IRPSP->Parameters.DeviceIoControl.InputBufferLength = InLength; \
- _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_QUERY_INFORMATION_EX; \
- _IRPSP->Parameters.DeviceIoControl.Type3InputBuffer = InBuffer; \
- Irp->UserBuffer = OutBuffer; \
- }
-
-
-typedef struct ks_addr_slot {
- LIST_ENTRY link;
- int up;
- char iface[40];
- __u32 ip_addr;
- __u32 netmask;
- UNICODE_STRING devname;
- WCHAR buffer[1];
-} ks_addr_slot_t;
-
-typedef struct {
-
- /*
- * Tdi client information
- */
-
- UNICODE_STRING ksnd_client_name; /* tdi client module name */
- HANDLE ksnd_pnp_handle; /* the handle for pnp changes */
-
- spinlock_t ksnd_addrs_lock; /* serialize ip address list access */
- LIST_ENTRY ksnd_addrs_list; /* list of the ip addresses */
- int ksnd_naddrs; /* number of the ip addresses */
-
- /*
- * Tdilnd internal defintions
- */
-
- int ksnd_init; /* initialisation state */
-
- TDI_PROVIDER_INFO ksnd_provider; /* tdi tcp/ip provider's information */
-
- spinlock_t ksnd_tconn_lock; /* tdi connections access serialise */
-
- int ksnd_ntconns; /* number of tconns attached in list */
- struct list_head ksnd_tconns; /* tdi connections list */
- cfs_mem_cache_t * ksnd_tconn_slab; /* slabs for ksock_tconn_t allocations */
- event_t ksnd_tconn_exit; /* exit event to be signaled by the last tconn */
-
- spinlock_t ksnd_tsdu_lock; /* tsdu access serialise */
-
- int ksnd_ntsdus; /* number of tsdu buffers allocated */
- ulong_ptr ksnd_tsdu_size; /* the size of a signel tsdu buffer */
- cfs_mem_cache_t * ksnd_tsdu_slab; /* slab cache for tsdu buffer allocation */
-
- int ksnd_nfreetsdus; /* number of tsdu buffers in the freed list */
- struct list_head ksnd_freetsdus; /* List of the freed Tsdu buffer. */
-
- spinlock_t ksnd_daemon_lock; /* stabilize daemon ops */
- int ksnd_ndaemons; /* number of listening daemons */
- struct list_head ksnd_daemons; /* listening daemon list */
- event_t ksnd_daemon_exit; /* the last daemon quiting should singal it */
-
-} ks_data_t;
-
-int
-ks_init_tdi_data();
-
-void
-ks_fini_tdi_data();
-
-
-#endif /* __KERNEL__ */
-#endif /* __LIBCFS_WINNT_TCPIP_H__ */
-
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- * Implementation of portable time API for Winnt (kernel and user-level).
- *
- */
-
-#ifndef __LIBCFS_WINNT_LINUX_TIME_H__
-#define __LIBCFS_WINNT_LINUX_TIME_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-/* Portable time API */
-
-/*
- * Platform provides three opaque data-types:
- *
- * cfs_time_t represents point in time. This is internal kernel
- * time rather than "wall clock". This time bears no
- * relation to gettimeofday().
- *
- * cfs_duration_t represents time interval with resolution of internal
- * platform clock
- *
- * cfs_fs_time_t represents instance in world-visible time. This is
- * used in file-system time-stamps
- *
- * cfs_time_t cfs_time_current(void);
- * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t);
- * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t);
- * int cfs_time_before (cfs_time_t, cfs_time_t);
- * int cfs_time_beforeq(cfs_time_t, cfs_time_t);
- *
- * cfs_duration_t cfs_duration_build(int64_t);
- *
- * time_t cfs_duration_sec (cfs_duration_t);
- * void cfs_duration_usec(cfs_duration_t, struct timeval *);
- * void cfs_duration_nsec(cfs_duration_t, struct timespec *);
- *
- * void cfs_fs_time_current(cfs_fs_time_t *);
- * time_t cfs_fs_time_sec (cfs_fs_time_t *);
- * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *);
- * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *);
- * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
- * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
- *
- * CFS_TIME_FORMAT
- * CFS_DURATION_FORMAT
- *
- */
-
-#define ONE_BILLION ((u_int64_t)1000000000)
-#define ONE_MILLION ((u_int64_t) 1000000)
-
-#define HZ (100)
-
-struct timeval {
- time_t tv_sec; /* seconds */
- suseconds_t tv_usec; /* microseconds */
-};
-
-struct timespec {
- ulong_ptr tv_sec;
- ulong_ptr tv_nsec;
-};
-
-#ifdef __KERNEL__
-
-#include <libcfs/winnt/portals_compat25.h>
-
-/*
- * Generic kernel stuff
- */
-
-typedef struct timeval cfs_fs_time_t;
-
-typedef u_int64_t cfs_time_t;
-typedef int64_t cfs_duration_t;
-
-static inline void do_gettimeofday(struct timeval *tv)
-{
- LARGE_INTEGER Time;
-
- KeQuerySystemTime(&Time);
-
- tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000);
- tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10;
-}
-
-static inline cfs_time_t JIFFIES()
-{
- LARGE_INTEGER Tick;
- LARGE_INTEGER Elapse;
-
- KeQueryTickCount(&Tick);
-
- Elapse.QuadPart = Tick.QuadPart * KeQueryTimeIncrement();
- Elapse.QuadPart /= (10000000 / HZ);
-
- return Elapse.QuadPart;
-}
-
-static inline cfs_time_t cfs_time_current(void)
-{
- return JIFFIES();
-}
-
-static inline cfs_time_t cfs_time_current_sec(void)
-{
- return (JIFFIES() / HZ);
-}
-
-static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
-{
- return (t + d);
-}
-
-static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
-{
- return (t1 - t2);
-}
-
-static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
-{
- return ((int64_t)t1 - (int64_t)t2) < 0;
-}
-
-static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
-{
- return ((int64_t)t1 - (int64_t)t2) <= 0;
-}
-
-static inline void cfs_fs_time_current(cfs_fs_time_t *t)
-{
- ULONG Linux;
- LARGE_INTEGER Sys;
-
- KeQuerySystemTime(&Sys);
-
- RtlTimeToSecondsSince1970(&Sys, &Linux);
-
- t->tv_sec = Linux;
- t->tv_usec = (Sys.LowPart % 10000000) / 10;
-}
-
-static inline cfs_time_t cfs_fs_time_sec(cfs_fs_time_t *t)
-{
- return t->tv_sec;
-}
-
-static inline u_int64_t __cfs_fs_time_flat(cfs_fs_time_t *t)
-{
- return ((u_int64_t)t->tv_sec) * ONE_MILLION + t->tv_usec;
-}
-
-static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
-{
- return (__cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2));
-}
-
-static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
-{
- return (__cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2));
-}
-
-static inline cfs_duration_t cfs_time_seconds(int seconds)
-{
- return (cfs_duration_t)seconds * HZ;
-}
-
-static inline cfs_time_t cfs_duration_sec(cfs_duration_t d)
-{
- return d / HZ;
-}
-
-static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
-{
- s->tv_sec = (suseconds_t) (d / HZ);
- s->tv_usec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) *
- ONE_MILLION / HZ);
-}
-
-static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
-{
- s->tv_sec = (suseconds_t) (d / HZ);
- s->tv_nsec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) *
- ONE_BILLION / HZ);
-}
-
-static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
-{
- *v = *t;
-}
-
-static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
-{
- s->tv_sec = t->tv_sec;
- s->tv_nsec = t->tv_usec * 1000;
-}
-
-#define cfs_time_current_64 cfs_time_current
-#define cfs_time_add_64 cfs_time_add
-#define cfs_time_shift_64 cfs_time_shift
-#define cfs_time_before_64 cfs_time_before
-#define cfs_time_beforeq_64 cfs_time_beforeq
-
-/*
- * One jiffy
- */
-#define CFS_TICK (1)
-
-#define LTIME_S(t) (t)
-
-#define CFS_TIME_T "%I64u"
-#define CFS_DURATION_T "%I64d"
-
-#else /* !__KERNEL__ */
-
-/*
- * Liblustre. time(2) based implementation.
- */
-#include <libcfs/user-time.h>
-
-
-//
-// Time routines ...
-//
-
-NTSYSAPI
-CCHAR
-NTAPI
-NtQuerySystemTime(
- OUT PLARGE_INTEGER CurrentTime
- );
-
-
-NTSYSAPI
-BOOLEAN
-NTAPI
-RtlTimeToSecondsSince1970(
- IN PLARGE_INTEGER Time,
- OUT PULONG ElapsedSeconds
- );
-
-
-NTSYSAPI
-VOID
-NTAPI
-RtlSecondsSince1970ToTime(
- IN ULONG ElapsedSeconds,
- OUT PLARGE_INTEGER Time
- );
-
-NTSYSAPI
-VOID
-NTAPI
-Sleep(
- DWORD dwMilliseconds // sleep time in milliseconds
-);
-
-
-static inline void sleep(int time)
-{
- DWORD Time = 1000 * time;
- Sleep(Time);
-}
-
-
-static inline void do_gettimeofday(struct timeval *tv)
-{
- LARGE_INTEGER Time;
-
- NtQuerySystemTime(&Time);
-
- tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000);
- tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10;
-}
-
-static inline int gettimeofday(struct timeval *tv, void * tz)
-{
- do_gettimeofday(tv);
- return 0;
-}
-
-#endif /* __KERNEL__ */
-
-/* __LIBCFS_LINUX_LINUX_TIME_H__ */
-#endif
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic types definitions
- *
- */
-
-#ifndef _WINNT_TYPE_H
-#define _WINNT_TYPE_H
-
-#ifdef __KERNEL__
-
-#include <ntifs.h>
-#include <windef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-
-#include <tdi.h>
-#include <tdikrnl.h>
-#include <tdiinfo.h>
-
-#else
-
-#include <ntddk.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <time.h>
-#include <io.h>
-#include <string.h>
-#include <assert.h>
-
-#endif
-
-
-#define __LITTLE_ENDIAN
-
-#define inline __inline
-#define __inline__ __inline
-
-typedef unsigned __int8 __u8;
-typedef signed __int8 __s8;
-
-typedef signed __int64 __s64;
-typedef unsigned __int64 __u64;
-
-typedef signed __int16 __s16;
-typedef unsigned __int16 __u16;
-
-typedef signed __int32 __s32;
-typedef unsigned __int32 __u32;
-
-typedef signed __int64 __s64;
-typedef unsigned __int64 __u64;
-
-typedef unsigned long ULONG;
-
-
-#if defined(_WIN64)
- #define long_ptr __int64
- #define ulong_ptr unsigned __int64
- #define BITS_PER_LONG (64)
-#else
- #define long_ptr long
- #define ulong_ptr unsigned long
- #define BITS_PER_LONG (32)
-
-#endif
-
-/* bsd */
-typedef unsigned char u_char;
-typedef unsigned short u_short;
-typedef unsigned int u_int;
-typedef unsigned long u_long;
-
-/* sysv */
-typedef unsigned char unchar;
-typedef unsigned short ushort;
-typedef unsigned int uint;
-typedef unsigned long ulong;
-
-#ifndef __BIT_TYPES_DEFINED__
-#define __BIT_TYPES_DEFINED__
-
-typedef __u8 u_int8_t;
-typedef __s8 int8_t;
-typedef __u16 u_int16_t;
-typedef __s16 int16_t;
-typedef __u32 u_int32_t;
-typedef __s32 int32_t;
-
-#endif /* !(__BIT_TYPES_DEFINED__) */
-
-typedef __u8 uint8_t;
-typedef __u16 uint16_t;
-typedef __u32 uint32_t;
-
-typedef __u64 uint64_t;
-typedef __u64 u_int64_t;
-typedef __s64 int64_t;
-
-typedef long ssize_t;
-
-typedef __u32 suseconds_t;
-
-typedef __u32 pid_t, tid_t;
-
-typedef __u16 uid_t, gid_t;
-
-typedef __u16 mode_t;
-typedef __u16 umode_t;
-
-typedef ulong_ptr sigset_t;
-
-typedef uint64_t loff_t;
-typedef HANDLE cfs_handle_t;
-typedef uint64_t cycles_t;
-
-#ifndef INVALID_HANDLE_VALUE
-#define INVALID_HANDLE_VALUE ((HANDLE)-1)
-#endif
-
-
-#ifdef __KERNEL__ /* kernel */
-
-typedef __u32 off_t;
-typedef __u32 time_t;
-
-typedef unsigned short kdev_t;
-
-#else /* !__KERNEL__ */
-
-typedef int BOOL;
-typedef __u8 BYTE;
-typedef __u16 WORD;
-typedef __u32 DWORD;
-
-#endif /* __KERNEL__ */
-
-/*
- * Conastants suffix
- */
-
-#define ULL i64
-#define ull i64
-
-/*
- * Winnt kernel has no capabilities.
- */
-
-typedef __u32 cfs_kernel_cap_t;
-
-#define INT_MAX ((int)(~0U>>1))
-#define INT_MIN (-INT_MAX - 1)
-#define UINT_MAX (~0U)
-
-#endif /* _WINNT_TYPES_H */
-
-
-/*
- * Bytes order
- */
-
-//
-// Byte order swapping routines
-//
-
-
-#define ___swab16(x) RtlUshortByteSwap(x)
-#define ___swab32(x) RtlUlongByteSwap(x)
-#define ___swab64(x) RtlUlonglongByteSwap(x)
-
-#define ___constant_swab16(x) \
- ((__u16)( \
- (((__u16)(x) & (__u16)0x00ffU) << 8) | \
- (((__u16)(x) & (__u16)0xff00U) >> 8) ))
-
-#define ___constant_swab32(x) \
- ((__u32)( \
- (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
- (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \
- (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \
- (((__u32)(x) & (__u32)0xff000000UL) >> 24) ))
-
-#define ___constant_swab64(x) \
- ((__u64)( \
- (__u64)(((__u64)(x) & (__u64)0x00000000000000ffUL) << 56) | \
- (__u64)(((__u64)(x) & (__u64)0x000000000000ff00UL) << 40) | \
- (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000UL) << 24) | \
- (__u64)(((__u64)(x) & (__u64)0x00000000ff000000UL) << 8) | \
- (__u64)(((__u64)(x) & (__u64)0x000000ff00000000UL) >> 8) | \
- (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000UL) >> 24) | \
- (__u64)(((__u64)(x) & (__u64)0x00ff000000000000UL) >> 40) | \
- (__u64)(((__u64)(x) & (__u64)0xff00000000000000UL) >> 56) ))
-
-
-#define __swab16(x) ___constant_swab16(x)
-#define __swab32(x) ___constant_swab32(x)
-#define __swab64(x) ___constant_swab64(x)
-
-#define __swab16s(x) do { *(x) = __swab16((USHORT)(*(x)));} while(0)
-#define __swab32s(x) do { *(x) = __swab32((ULONG)(*(x)));} while(0)
-#define __swab64s(x) do { *(x) = __swab64((ULONGLONG)(*(x)));} while(0)
-
-#define __constant_htonl(x) ___constant_swab32((x))
-#define __constant_ntohl(x) ___constant_swab32((x))
-#define __constant_htons(x) ___constant_swab16((x))
-#define __constant_ntohs(x) ___constant_swab16((x))
-#define __constant_cpu_to_le64(x) ((__u64)(x))
-#define __constant_le64_to_cpu(x) ((__u64)(x))
-#define __constant_cpu_to_le32(x) ((__u32)(x))
-#define __constant_le32_to_cpu(x) ((__u32)(x))
-#define __constant_cpu_to_le16(x) ((__u16)(x))
-#define __constant_le16_to_cpu(x) ((__u16)(x))
-#define __constant_cpu_to_be64(x) ___constant_swab64((x))
-#define __constant_be64_to_cpu(x) ___constant_swab64((x))
-#define __constant_cpu_to_be32(x) ___constant_swab32((x))
-#define __constant_be32_to_cpu(x) ___constant_swab32((x))
-#define __constant_cpu_to_be16(x) ___constant_swab16((x))
-#define __constant_be16_to_cpu(x) ___constant_swab16((x))
-#define __cpu_to_le64(x) ((__u64)(x))
-#define __le64_to_cpu(x) ((__u64)(x))
-#define __cpu_to_le32(x) ((__u32)(x))
-#define __le32_to_cpu(x) ((__u32)(x))
-#define __cpu_to_le16(x) ((__u16)(x))
-#define __le16_to_cpu(x) ((__u16)(x))
-#define __cpu_to_be64(x) __swab64((x))
-#define __be64_to_cpu(x) __swab64((x))
-#define __cpu_to_be32(x) __swab32((x))
-#define __be32_to_cpu(x) __swab32((x))
-#define __cpu_to_be16(x) __swab16((x))
-#define __be16_to_cpu(x) __swab16((x))
-#define __cpu_to_le64p(x) (*(__u64*)(x))
-#define __le64_to_cpup(x) (*(__u64*)(x))
-#define __cpu_to_le32p(x) (*(__u32*)(x))
-#define __le32_to_cpup(x) (*(__u32*)(x))
-#define __cpu_to_le16p(x) (*(__u16*)(x))
-#define __le16_to_cpup(x) (*(__u16*)(x))
-#define __cpu_to_be64p(x) __swab64p((x))
-#define __be64_to_cpup(x) __swab64p((x))
-#define __cpu_to_be32p(x) __swab32p((x))
-#define __be32_to_cpup(x) __swab32p((x))
-#define __cpu_to_be16p(x) __swab16p((x))
-#define __be16_to_cpup(x) __swab16p((x))
-#define __cpu_to_le64s(x) do {} while (0)
-#define __le64_to_cpus(x) do {} while (0)
-#define __cpu_to_le32s(x) do {} while (0)
-#define __le32_to_cpus(x) do {} while (0)
-#define __cpu_to_le16s(x) do {} while (0)
-#define __le16_to_cpus(x) do {} while (0)
-#define __cpu_to_be64s(x) __swab64s((x))
-#define __be64_to_cpus(x) __swab64s((x))
-#define __cpu_to_be32s(x) __swab32s((x))
-#define __be32_to_cpus(x) __swab32s((x))
-#define __cpu_to_be16s(x) __swab16s((x))
-#define __be16_to_cpus(x) __swab16s((x))
-
-#ifndef cpu_to_le64
-#define cpu_to_le64 __cpu_to_le64
-#define le64_to_cpu __le64_to_cpu
-#define cpu_to_le32 __cpu_to_le32
-#define le32_to_cpu __le32_to_cpu
-#define cpu_to_le16 __cpu_to_le16
-#define le16_to_cpu __le16_to_cpu
-#endif
-
-#define cpu_to_be64 __cpu_to_be64
-#define be64_to_cpu __be64_to_cpu
-#define cpu_to_be32 __cpu_to_be32
-#define be32_to_cpu __be32_to_cpu
-#define cpu_to_be16 __cpu_to_be16
-#define be16_to_cpu __be16_to_cpu
-#define cpu_to_le64p __cpu_to_le64p
-#define le64_to_cpup __le64_to_cpup
-#define cpu_to_le32p __cpu_to_le32p
-#define le32_to_cpup __le32_to_cpup
-#define cpu_to_le16p __cpu_to_le16p
-#define le16_to_cpup __le16_to_cpup
-#define cpu_to_be64p __cpu_to_be64p
-#define be64_to_cpup __be64_to_cpup
-#define cpu_to_be32p __cpu_to_be32p
-#define be32_to_cpup __be32_to_cpup
-#define cpu_to_be16p __cpu_to_be16p
-#define be16_to_cpup __be16_to_cpup
-#define cpu_to_le64s __cpu_to_le64s
-#define le64_to_cpus __le64_to_cpus
-#define cpu_to_le32s __cpu_to_le32s
-#define le32_to_cpus __le32_to_cpus
-#define cpu_to_le16s __cpu_to_le16s
-#define le16_to_cpus __le16_to_cpus
-#define cpu_to_be64s __cpu_to_be64s
-#define be64_to_cpus __be64_to_cpus
-#define cpu_to_be32s __cpu_to_be32s
-#define be32_to_cpus __be32_to_cpus
-#define cpu_to_be16s __cpu_to_be16s
-#define be16_to_cpus __be16_to_cpus
-
-
-//
-// Network to host byte swap functions
-//
-
-#define ntohl(x) ( ( ( ( x ) & 0x000000ff ) << 24 ) | \
- ( ( ( x ) & 0x0000ff00 ) << 8 ) | \
- ( ( ( x ) & 0x00ff0000 ) >> 8 ) | \
- ( ( ( x ) & 0xff000000 ) >> 24 ) )
-
-#define ntohs(x) ( ( ( ( x ) & 0xff00 ) >> 8 ) | \
- ( ( ( x ) & 0x00ff ) << 8 ) )
-
-
-#define htonl(x) ntohl(x)
-#define htons(x) ntohs(x)
-
-
-
-#ifndef _I386_ERRNO_H
-#define _I386_ERRNO_H
-
-#define EPERM 1 /* Operation not permitted */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* Interrupted system call */
-#define EIO 5 /* I/O error */
-#define ENXIO 6 /* No such device or address */
-#define E2BIG 7 /* Arg list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file number */
-#define ECHILD 10 /* No child processes */
-#define EAGAIN 11 /* Try again */
-#define ENOMEM 12 /* Out of memory */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#define ENOTBLK 15 /* Block device required */
-#define EBUSY 16 /* Device or resource busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* No such device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* File table overflow */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Not a typewriter */
-#define ETXTBSY 26 /* Text file busy */
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read-only file system */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-#define EDOM 33 /* Math argument out of domain of func */
-#define ERANGE 34 /* Math result not representable */
-#undef EDEADLK
-#define EDEADLK 35 /* Resource deadlock would occur */
-#undef ENAMETOOLONG
-#define ENAMETOOLONG 36 /* File name too long */
-#undef ENOLCK
-#define ENOLCK 37 /* No record locks available */
-#undef ENOSYS
-#define ENOSYS 38 /* Function not implemented */
-#undef ENOTEMPTY
-#define ENOTEMPTY 39 /* Directory not empty */
-#define ELOOP 40 /* Too many symbolic links encountered */
-#define EWOULDBLOCK EAGAIN /* Operation would block */
-#define ENOMSG 42 /* No message of desired type */
-#define EIDRM 43 /* Identifier removed */
-#define ECHRNG 44 /* Channel number out of range */
-#define EL2NSYNC 45 /* Level 2 not synchronized */
-#define EL3HLT 46 /* Level 3 halted */
-#define EL3RST 47 /* Level 3 reset */
-#define ELNRNG 48 /* Link number out of range */
-#define EUNATCH 49 /* Protocol driver not attached */
-#define ENOCSI 50 /* No CSI structure available */
-#define EL2HLT 51 /* Level 2 halted */
-#define EBADE 52 /* Invalid exchange */
-#define EBADR 53 /* Invalid request descriptor */
-#define EXFULL 54 /* Exchange full */
-#define ENOANO 55 /* No anode */
-#define EBADRQC 56 /* Invalid request code */
-#define EBADSLT 57 /* Invalid slot */
-
-#define EDEADLOCK EDEADLK
-
-#define EBFONT 59 /* Bad font file format */
-#define ENOSTR 60 /* Device not a stream */
-#define ENODATA 61 /* No data available */
-#define ETIME 62 /* Timer expired */
-#define ENOSR 63 /* Out of streams resources */
-#define ENONET 64 /* Machine is not on the network */
-#define ENOPKG 65 /* Package not installed */
-#define EREMOTE 66 /* Object is remote */
-#define ENOLINK 67 /* Link has been severed */
-#define EADV 68 /* Advertise error */
-#define ESRMNT 69 /* Srmount error */
-#define ECOMM 70 /* Communication error on send */
-#define EPROTO 71 /* Protocol error */
-#define EMULTIHOP 72 /* Multihop attempted */
-#define EDOTDOT 73 /* RFS specific error */
-#define EBADMSG 74 /* Not a data message */
-#define EOVERFLOW 75 /* Value too large for defined data type */
-#define ENOTUNIQ 76 /* Name not unique on network */
-#define EBADFD 77 /* File descriptor in bad state */
-#define EREMCHG 78 /* Remote address changed */
-#define ELIBACC 79 /* Can not access a needed shared library */
-#define ELIBBAD 80 /* Accessing a corrupted shared library */
-#define ELIBSCN 81 /* .lib section in a.out corrupted */
-#define ELIBMAX 82 /* Attempting to link in too many shared libraries */
-#define ELIBEXEC 83 /* Cannot exec a shared library directly */
-#undef EILSEQ
-#define EILSEQ 84 /* Illegal byte sequence */
-#define ERESTART 85 /* Interrupted system call should be restarted */
-#define ESTRPIPE 86 /* Streams pipe error */
-#define EUSERS 87 /* Too many users */
-#define ENOTSOCK 88 /* Socket operation on non-socket */
-#define EDESTADDRREQ 89 /* Destination address required */
-#define EMSGSIZE 90 /* Message too long */
-#define EPROTOTYPE 91 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 92 /* Protocol not available */
-#define EPROTONOSUPPORT 93 /* Protocol not supported */
-#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
-#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */
-#define EPFNOSUPPORT 96 /* Protocol family not supported */
-#define EAFNOSUPPORT 97 /* Address family not supported by protocol */
-#define EADDRINUSE 98 /* Address already in use */
-#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
-#define ENETDOWN 100 /* Network is down */
-#define ENETUNREACH 101 /* Network is unreachable */
-#define ENETRESET 102 /* Network dropped connection because of reset */
-#define ECONNABORTED 103 /* Software caused connection abort */
-#define ECONNRESET 104 /* Connection reset by peer */
-#define ENOBUFS 105 /* No buffer space available */
-#define EISCONN 106 /* Transport endpoint is already connected */
-#define ENOTCONN 107 /* Transport endpoint is not connected */
-#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
-#define ETOOMANYREFS 109 /* Too many references: cannot splice */
-#define ETIMEDOUT 110 /* Connection timed out */
-#define ECONNREFUSED 111 /* Connection refused */
-#define EHOSTDOWN 112 /* Host is down */
-#define EHOSTUNREACH 113 /* No route to host */
-#define EALREADY 114 /* Operation already in progress */
-#define EINPROGRESS 115 /* Operation now in progress */
-#define ESTALE 116 /* Stale NFS file handle */
-#define EUCLEAN 117 /* Structure needs cleaning */
-#define ENOTNAM 118 /* Not a XENIX named type file */
-#define ENAVAIL 119 /* No XENIX semaphores available */
-#define EISNAM 120 /* Is a named type file */
-#define EREMOTEIO 121 /* Remote I/O error */
-#define EDQUOT 122 /* Quota exceeded */
-
-#define ENOMEDIUM 123 /* No medium found */
-#define EMEDIUMTYPE 124 /* Wrong medium type */
-
-/* Should never be seen by user programs */
-#define ERESTARTSYS 512
-#define ERESTARTNOINTR 513
-#define ERESTARTNOHAND 514 /* restart if no handler.. */
-#define ENOIOCTLCMD 515 /* No ioctl command */
-
-/* Defined for the NFSv3 protocol */
-#define EBADHANDLE 521 /* Illegal NFS file handle */
-#define ENOTSYNC 522 /* Update synchronization mismatch */
-#define EBADCOOKIE 523 /* Cookie is stale */
-#define ENOTSUPP 524 /* Operation is not supported */
-#define ETOOSMALL 525 /* Buffer or request is too small */
-#define ESERVERFAULT 526 /* An untranslatable error occurred */
-#define EBADTYPE 527 /* Type not supported by server */
-#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
-
-
-
-/* open/fcntl - O_SYNC is only implemented on blocks devices and on files
- located on an ext2 file system */
-#define O_ACCMODE 0003
-#define O_RDONLY 00
-#define O_WRONLY 01
-#define O_RDWR 02
-#define O_CREAT 0100 /* not fcntl */
-#define O_EXCL 0200 /* not fcntl */
-#define O_NOCTTY 0400 /* not fcntl */
-#define O_TRUNC 01000 /* not fcntl */
-#define O_APPEND 02000
-#define O_NONBLOCK 04000
-#define O_NDELAY O_NONBLOCK
-#define O_SYNC 010000
-#define FASYNC 020000 /* fcntl, for BSD compatibility */
-#define O_DIRECT 040000 /* direct disk access hint */
-#define O_LARGEFILE 0100000
-#define O_DIRECTORY 0200000 /* must be a directory */
-#define O_NOFOLLOW 0400000 /* don't follow links */
-
-#define F_DUPFD 0 /* dup */
-#define F_GETFD 1 /* get close_on_exec */
-#define F_SETFD 2 /* set/clear close_on_exec */
-#define F_GETFL 3 /* get file->f_flags */
-#define F_SETFL 4 /* set file->f_flags */
-#define F_GETLK 5
-#define F_SETLK 6
-#define F_SETLKW 7
-
-#define F_SETOWN 8 /* for sockets. */
-#define F_GETOWN 9 /* for sockets. */
-#define F_SETSIG 10 /* for sockets. */
-#define F_GETSIG 11 /* for sockets. */
-
-#define F_GETLK64 12 /* using 'struct flock64' */
-#define F_SETLK64 13
-#define F_SETLKW64 14
-
-/* for F_[GET|SET]FL */
-#define FD_CLOEXEC 1 /* actually anything with low bit set goes */
-
-/* for posix fcntl() and lockf() */
-#define F_RDLCK 0
-#define F_WRLCK 1
-#define F_UNLCK 2
-
-/* for old implementation of bsd flock () */
-#define F_EXLCK 4 /* or 3 */
-#define F_SHLCK 8 /* or 4 */
-
-/* for leases */
-#define F_INPROGRESS 16
-
-/* operations for bsd flock(), also used by the kernel implementation */
-#define LOCK_SH 1 /* shared lock */
-#define LOCK_EX 2 /* exclusive lock */
-#define LOCK_NB 4 /* or'd with one of the above to prevent
- blocking */
-#define LOCK_UN 8 /* remove lock */
-
-#define LOCK_MAND 32 /* This is a mandatory flock */
-#define LOCK_READ 64 /* ... Which allows concurrent read operations */
-#define LOCK_WRITE 128 /* ... Which allows concurrent write operations */
-#define LOCK_RW 192 /* ... Which allows concurrent read & write ops */
-
-#endif
-
-
-#ifndef LIBCFS_SIGNAL_H
-#define LIBCFS_SIGNAL_H
-
-/*
- * signal values ...
- */
-
-#define SIGHUP 1
-#define SIGINT 2
-#define SIGQUIT 3
-#define SIGILL 4
-#define SIGTRAP 5
-#define SIGABRT 6
-#define SIGIOT 6
-#define SIGBUS 7
-#define SIGFPE 8
-#define SIGKILL 9
-#define SIGUSR1 10
-#define SIGSEGV 11
-#define SIGUSR2 12
-#define SIGPIPE 13
-#define SIGALRM 14
-#define SIGTERM 15
-#define SIGSTKFLT 16
-#define SIGCHLD 17
-#define SIGCONT 18
-#define SIGSTOP 19
-#define SIGTSTP 20
-#define SIGTTIN 21
-#define SIGTTOU 22
-#define SIGURG 23
-#define SIGXCPU 24
-#define SIGXFSZ 25
-#define SIGVTALRM 26
-#define SIGPROF 27
-#define SIGWINCH 28
-#define SIGIO 29
-#define SIGPOLL SIGIO
-/*
-#define SIGLOST 29
-*/
-#define SIGPWR 30
-#define SIGSYS 31
-#define SIGUNUSED 31
-
-/* These should not be considered constants from userland. */
-#define SIGRTMIN 32
-#define SIGRTMAX (_NSIG-1)
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP 0x00000001
-#define SA_NOCLDWAIT 0x00000002 /* not supported yet */
-#define SA_SIGINFO 0x00000004
-#define SA_ONSTACK 0x08000000
-#define SA_RESTART 0x10000000
-#define SA_NODEFER 0x40000000
-#define SA_RESETHAND 0x80000000
-
-#define SA_NOMASK SA_NODEFER
-#define SA_ONESHOT SA_RESETHAND
-#define SA_INTERRUPT 0x20000000 /* dummy -- ignored */
-
-#define SA_RESTORER 0x04000000
-
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK 1
-#define SS_DISABLE 2
-
-#define MINSIGSTKSZ 2048
-#define SIGSTKSZ 8192
-
-
-#define sigmask(sig) ((__u32)1 << ((sig) - 1))
-
-#endif // LIBCFS_SIGNAL_H
+++ /dev/null
-Makefile
-Makefile.in
+++ /dev/null
-lnetdir=$(includedir)/lnet
-
-SUBDIRS := linux
-if DARWIN
-SUBDIRS += darwin
-endif
-DIST_SUBDIRS := $(SUBDIRS)
-
-EXTRA_DIST = api.h api-support.h \
- lib-lnet.h lib-types.h lnet.h lnetctl.h types.h \
- socklnd.h ptllnd.h ptllnd_wire.h lnetst.h
+++ /dev/null
-#ifndef __LNET_API_SUPPORT_H__
-#define __LNET_API_SUPPORT_H__
-
-#if defined(__linux__)
-#include <lnet/linux/api-support.h>
-#elif defined(__APPLE__)
-#include <lnet/darwin/api-support.h>
-#elif defined(__WINNT__)
-#include <lnet/winnt/api-support.h>
-#else
-#error Unsupported Operating System
-#endif
-
-#include <lnet/types.h>
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-
-#endif
+++ /dev/null
-#ifndef __LNET_API_H__
-#define __LNET_API_H__
-
-#include <lnet/types.h>
-
-int LNetInit(void);
-void LNetFini(void);
-
-int LNetNIInit(lnet_pid_t requested_pid);
-int LNetNIFini(void);
-
-int LNetGetId(unsigned int index, lnet_process_id_t *id);
-int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order);
-int LNetCtl(unsigned int cmd, void *arg);
-void LNetSnprintHandle (char *str, int str_len, lnet_handle_any_t handle);
-
-/*
- * Portals
- */
-int LNetSetLazyPortal(int portal);
-int LNetClearLazyPortal(int portal);
-
-/*
- * Match entries
- */
-int LNetMEAttach(unsigned int portal,
- lnet_process_id_t match_id_in,
- __u64 match_bits_in,
- __u64 ignore_bits_in,
- lnet_unlink_t unlink_in,
- lnet_ins_pos_t pos_in,
- lnet_handle_me_t *handle_out);
-
-int LNetMEInsert(lnet_handle_me_t current_in,
- lnet_process_id_t match_id_in,
- __u64 match_bits_in,
- __u64 ignore_bits_in,
- lnet_unlink_t unlink_in,
- lnet_ins_pos_t position_in,
- lnet_handle_me_t *handle_out);
-
-int LNetMEUnlink(lnet_handle_me_t current_in);
-
-/*
- * Memory descriptors
- */
-int LNetMDAttach(lnet_handle_me_t current_in,
- lnet_md_t md_in,
- lnet_unlink_t unlink_in,
- lnet_handle_md_t *handle_out);
-
-int LNetMDBind(lnet_md_t md_in,
- lnet_unlink_t unlink_in,
- lnet_handle_md_t *handle_out);
-
-int LNetMDUnlink(lnet_handle_md_t md_in);
-
-/*
- * Event queues
- */
-int LNetEQAlloc(unsigned int count_in,
- lnet_eq_handler_t handler,
- lnet_handle_eq_t *handle_out);
-
-int LNetEQFree(lnet_handle_eq_t eventq_in);
-
-int LNetEQGet(lnet_handle_eq_t eventq_in,
- lnet_event_t *event_out);
-
-
-int LNetEQWait(lnet_handle_eq_t eventq_in,
- lnet_event_t *event_out);
-
-int LNetEQPoll(lnet_handle_eq_t *eventqs_in,
- int neq_in,
- int timeout_ms,
- lnet_event_t *event_out,
- int *which_eq_out);
-
-/*
- * Data movement
- */
-int LNetPut(lnet_nid_t self,
- lnet_handle_md_t md_in,
- lnet_ack_req_t ack_req_in,
- lnet_process_id_t target_in,
- unsigned int portal_in,
- __u64 match_bits_in,
- unsigned int offset_in,
- __u64 hdr_data_in);
-
-int LNetGet(lnet_nid_t self,
- lnet_handle_md_t md_in,
- lnet_process_id_t target_in,
- unsigned int portal_in,
- __u64 match_bits_in,
- unsigned int offset_in);
-
-
-int LNetSetAsync(lnet_process_id_t id, int nasync);
-
-#ifndef __KERNEL__
-/* Temporary workaround to allow uOSS and test programs force server
- * mode in userspace. See comments near ln_server_mode_flag in
- * lnet/lib-types.h */
-
-void lnet_server_mode();
-#endif
-
-#endif
+++ /dev/null
-Makefile
-Makefile.in
+++ /dev/null
-EXTRA_DIST := lib-lnet.h lib-types.h lnet.h api-support.h
+++ /dev/null
-#ifndef __DARWIN_API_SUPPORT_H__
-#define __DARWIN_API_SUPPORT_H__
-
-#ifndef __LNET_API_SUPPORT_H__
-#error Do not #include this file directly. #include <portals/api-support.h> instead
-#endif
-
-#ifndef __KERNEL__
-# include <stdio.h>
-# include <stdlib.h>
-# include <unistd.h>
-# include <time.h>
-
-/* Lots of POSIX dependencies to support PtlEQWait_timeout */
-# include <signal.h>
-# include <setjmp.h>
-# include <time.h>
-
-# ifdef HAVE_LIBREADLINE
-# include <readline/readline.h>
-typedef VFunction rl_vintfunc_t;
-typedef VFunction rl_voidfunc_t;
-# endif
-#endif
-
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LNET_DARWIN_LIB_LNET_H__
-#define __LNET_DARWIN_LIB_LNET_H__
-
-#ifndef __LNET_LIB_LNET_H__
-#error Do not #include this file directly. #include <lnet/lib-lnet.h> instead
-#endif
-
-#include <string.h>
-#include <libcfs/libcfs.h>
-
-#undef LNET_ROUTER
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LNET_DARWIN_LIB_TYPES_H__
-#define __LNET_DARWIN_LIB_TYPES_H__
-
-#ifndef __LNET_LIB_TYPES_H__
-#error Do not #include this file directly. #include <lnet/lib-types.h> instead
-#endif
-
-#include <sys/types.h>
-#include <libcfs/libcfs.h>
-#include <libcfs/list.h>
-
-/*
- * XXX Liang:
- *
- * Temporary fix, because lnet_me_free()->cfs_free->FREE() can be blocked in xnu,
- * at then same time we've taken LNET_LOCK(), which is a spinlock.
- * by using LNET_USE_LIB_FREELIST, we can avoid calling of FREE().
- *
- * A better solution is moving lnet_me_free() out from LNET_LOCK, it's not hard
- * but need to be very careful and take some time.
- */
-#define LNET_USE_LIB_FREELIST
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LNET_DARWIN_LNET_H__
-#define __LNET_DARWIN_LNET_H__
-
-#ifndef __LNET_H__
-#error Do not #include this file directly. #include <lnet/lnet.h> instead
-#endif
-
-/*
- * lnet.h
- *
- * User application interface file
- */
-
-#include <sys/types.h>
-#include <sys/uio.h>
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib-lnet.h
- *
- * Top level include for library side routines
- */
-
-#ifndef __LNET_LIB_LNET_H__
-#define __LNET_LIB_LNET_H__
-
-#if defined(__linux__)
-#include <lnet/linux/lib-lnet.h>
-#elif defined(__APPLE__)
-#include <lnet/darwin/lib-lnet.h>
-#elif defined(__WINNT__)
-#include <lnet/winnt/lib-lnet.h>
-#else
-#error Unsupported Operating System
-#endif
-
-#include <lnet/types.h>
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-types.h>
-
-extern lnet_t the_lnet; /* THE network */
-
-static inline int lnet_is_wire_handle_none (lnet_handle_wire_t *wh)
-{
- return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_NONE.wh_interface_cookie &&
- wh->wh_object_cookie == LNET_WIRE_HANDLE_NONE.wh_object_cookie);
-}
-
-static inline int lnet_md_exhausted (lnet_libmd_t *md)
-{
- return (md->md_threshold == 0 ||
- ((md->md_options & LNET_MD_MAX_SIZE) != 0 &&
- md->md_offset + md->md_max_size > md->md_length));
-}
-
-static inline int lnet_md_unlinkable (lnet_libmd_t *md)
-{
- /* Should unlink md when its refcount is 0 and either:
- * - md has been flagged for deletion (by auto unlink or LNetM[DE]Unlink,
- * in the latter case md may not be exhausted).
- * - auto unlink is on and md is exhausted.
- */
- if (md->md_refcount != 0)
- return 0;
-
- if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) != 0)
- return 1;
-
- return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 &&
- lnet_md_exhausted(md));
-}
-
-#ifdef __KERNEL__
-#define LNET_LOCK() spin_lock(&the_lnet.ln_lock)
-#define LNET_UNLOCK() spin_unlock(&the_lnet.ln_lock)
-#define LNET_MUTEX_DOWN(m) mutex_down(m)
-#define LNET_MUTEX_UP(m) mutex_up(m)
-#else
-# ifndef HAVE_LIBPTHREAD
-#define LNET_SINGLE_THREADED_LOCK(l) \
-do { \
- LASSERT ((l) == 0); \
- (l) = 1; \
-} while (0)
-
-#define LNET_SINGLE_THREADED_UNLOCK(l) \
-do { \
- LASSERT ((l) == 1); \
- (l) = 0; \
-} while (0)
-
-#define LNET_LOCK() LNET_SINGLE_THREADED_LOCK(the_lnet.ln_lock)
-#define LNET_UNLOCK() LNET_SINGLE_THREADED_UNLOCK(the_lnet.ln_lock)
-#define LNET_MUTEX_DOWN(m) LNET_SINGLE_THREADED_LOCK(*(m))
-#define LNET_MUTEX_UP(m) LNET_SINGLE_THREADED_UNLOCK(*(m))
-# else
-#define LNET_LOCK() pthread_mutex_lock(&the_lnet.ln_lock)
-#define LNET_UNLOCK() pthread_mutex_unlock(&the_lnet.ln_lock)
-#define LNET_MUTEX_DOWN(m) pthread_mutex_lock(m)
-#define LNET_MUTEX_UP(m) pthread_mutex_unlock(m)
-# endif
-#endif
-
-#define MAX_PORTALS 64
-
-#ifdef LNET_USE_LIB_FREELIST
-
-#define MAX_MES 2048
-#define MAX_MDS 2048
-#define MAX_MSGS 2048 /* Outstanding messages */
-#define MAX_EQS 512
-
-static inline void *
-lnet_freelist_alloc (lnet_freelist_t *fl)
-{
- /* ALWAYS called with liblock held */
- lnet_freeobj_t *o;
-
- if (list_empty (&fl->fl_list))
- return (NULL);
-
- o = list_entry (fl->fl_list.next, lnet_freeobj_t, fo_list);
- list_del (&o->fo_list);
- return ((void *)&o->fo_contents);
-}
-
-static inline void
-lnet_freelist_free (lnet_freelist_t *fl, void *obj)
-{
- /* ALWAYS called with liblock held */
- lnet_freeobj_t *o = list_entry (obj, lnet_freeobj_t, fo_contents);
-
- list_add (&o->fo_list, &fl->fl_list);
-}
-
-
-static inline lnet_eq_t *
-lnet_eq_alloc (void)
-{
- /* NEVER called with liblock held */
- lnet_eq_t *eq;
-
- LNET_LOCK();
- eq = (lnet_eq_t *)lnet_freelist_alloc(&the_lnet.ln_free_eqs);
- LNET_UNLOCK();
-
- return (eq);
-}
-
-static inline void
-lnet_eq_free (lnet_eq_t *eq)
-{
- /* ALWAYS called with liblock held */
- lnet_freelist_free(&the_lnet.ln_free_eqs, eq);
-}
-
-static inline lnet_libmd_t *
-lnet_md_alloc (lnet_md_t *umd)
-{
- /* NEVER called with liblock held */
- lnet_libmd_t *md;
-
- LNET_LOCK();
- md = (lnet_libmd_t *)lnet_freelist_alloc(&the_lnet.ln_free_mds);
- LNET_UNLOCK();
-
- return (md);
-}
-
-static inline void
-lnet_md_free (lnet_libmd_t *md)
-{
- /* ALWAYS called with liblock held */
- lnet_freelist_free (&the_lnet.ln_free_mds, md);
-}
-
-static inline lnet_me_t *
-lnet_me_alloc (void)
-{
- /* NEVER called with liblock held */
- lnet_me_t *me;
-
- LNET_LOCK();
- me = (lnet_me_t *)lnet_freelist_alloc(&the_lnet.ln_free_mes);
- LNET_UNLOCK();
-
- return (me);
-}
-
-static inline void
-lnet_me_free (lnet_me_t *me)
-{
- /* ALWAYS called with liblock held */
- lnet_freelist_free (&the_lnet.ln_free_mes, me);
-}
-
-static inline lnet_msg_t *
-lnet_msg_alloc (void)
-{
- /* NEVER called with liblock held */
- lnet_msg_t *msg;
-
- LNET_LOCK();
- msg = (lnet_msg_t *)lnet_freelist_alloc(&the_lnet.ln_free_msgs);
- LNET_UNLOCK();
-
- if (msg != NULL) {
- /* NULL pointers, clear flags etc */
- memset (msg, 0, sizeof (*msg));
-#ifdef CRAY_XT3
- msg->msg_ev.uid = LNET_UID_ANY;
-#endif
- }
- return(msg);
-}
-
-static inline void
-lnet_msg_free (lnet_msg_t *msg)
-{
- /* ALWAYS called with liblock held */
- LASSERT (!msg->msg_onactivelist);
- lnet_freelist_free(&the_lnet.ln_free_msgs, msg);
-}
-
-#else
-
-static inline lnet_eq_t *
-lnet_eq_alloc (void)
-{
- /* NEVER called with liblock held */
- lnet_eq_t *eq;
-
- LIBCFS_ALLOC(eq, sizeof(*eq));
- return (eq);
-}
-
-static inline void
-lnet_eq_free (lnet_eq_t *eq)
-{
- /* ALWAYS called with liblock held */
- LIBCFS_FREE(eq, sizeof(*eq));
-}
-
-static inline lnet_libmd_t *
-lnet_md_alloc (lnet_md_t *umd)
-{
- /* NEVER called with liblock held */
- lnet_libmd_t *md;
- int size;
- unsigned int niov;
-
- if ((umd->options & LNET_MD_KIOV) != 0) {
- niov = umd->length;
- size = offsetof(lnet_libmd_t, md_iov.kiov[niov]);
- } else {
- niov = ((umd->options & LNET_MD_IOVEC) != 0) ?
- umd->length : 1;
- size = offsetof(lnet_libmd_t, md_iov.iov[niov]);
- }
-
- LIBCFS_ALLOC(md, size);
-
- if (md != NULL) {
- /* Set here in case of early free */
- md->md_options = umd->options;
- md->md_niov = niov;
- }
-
- return (md);
-}
-
-static inline void
-lnet_md_free (lnet_libmd_t *md)
-{
- /* ALWAYS called with liblock held */
- int size;
-
- if ((md->md_options & LNET_MD_KIOV) != 0)
- size = offsetof(lnet_libmd_t, md_iov.kiov[md->md_niov]);
- else
- size = offsetof(lnet_libmd_t, md_iov.iov[md->md_niov]);
-
- LIBCFS_FREE(md, size);
-}
-
-static inline lnet_me_t *
-lnet_me_alloc (void)
-{
- /* NEVER called with liblock held */
- lnet_me_t *me;
-
- LIBCFS_ALLOC(me, sizeof(*me));
- return (me);
-}
-
-static inline void
-lnet_me_free(lnet_me_t *me)
-{
- /* ALWAYS called with liblock held */
- LIBCFS_FREE(me, sizeof(*me));
-}
-
-static inline lnet_msg_t *
-lnet_msg_alloc(void)
-{
- /* NEVER called with liblock held */
- lnet_msg_t *msg;
-
- LIBCFS_ALLOC(msg, sizeof(*msg));
-
- if (msg != NULL) {
- /* NULL pointers, clear flags etc */
- memset (msg, 0, sizeof (*msg));
-#ifdef CRAY_XT3
- msg->msg_ev.uid = LNET_UID_ANY;
-#endif
- }
- return (msg);
-}
-
-static inline void
-lnet_msg_free(lnet_msg_t *msg)
-{
- /* ALWAYS called with liblock held */
- LASSERT (!msg->msg_onactivelist);
- LIBCFS_FREE(msg, sizeof(*msg));
-}
-#endif
-
-extern lnet_libhandle_t *lnet_lookup_cookie (__u64 cookie, int type);
-extern void lnet_initialise_handle (lnet_libhandle_t *lh, int type);
-extern void lnet_invalidate_handle (lnet_libhandle_t *lh);
-
-static inline void
-lnet_eq2handle (lnet_handle_eq_t *handle, lnet_eq_t *eq)
-{
- if (eq == NULL) {
- *handle = LNET_EQ_NONE;
- return;
- }
-
- handle->cookie = eq->eq_lh.lh_cookie;
-}
-
-static inline lnet_eq_t *
-lnet_handle2eq (lnet_handle_eq_t *handle)
-{
- /* ALWAYS called with liblock held */
- lnet_libhandle_t *lh = lnet_lookup_cookie(handle->cookie,
- LNET_COOKIE_TYPE_EQ);
- if (lh == NULL)
- return (NULL);
-
- return (lh_entry (lh, lnet_eq_t, eq_lh));
-}
-
-static inline void
-lnet_md2handle (lnet_handle_md_t *handle, lnet_libmd_t *md)
-{
- handle->cookie = md->md_lh.lh_cookie;
-}
-
-static inline lnet_libmd_t *
-lnet_handle2md (lnet_handle_md_t *handle)
-{
- /* ALWAYS called with liblock held */
- lnet_libhandle_t *lh = lnet_lookup_cookie(handle->cookie,
- LNET_COOKIE_TYPE_MD);
- if (lh == NULL)
- return (NULL);
-
- return (lh_entry (lh, lnet_libmd_t, md_lh));
-}
-
-static inline lnet_libmd_t *
-lnet_wire_handle2md (lnet_handle_wire_t *wh)
-{
- /* ALWAYS called with liblock held */
- lnet_libhandle_t *lh;
-
- if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie)
- return (NULL);
-
- lh = lnet_lookup_cookie(wh->wh_object_cookie,
- LNET_COOKIE_TYPE_MD);
- if (lh == NULL)
- return (NULL);
-
- return (lh_entry (lh, lnet_libmd_t, md_lh));
-}
-
-static inline void
-lnet_me2handle (lnet_handle_me_t *handle, lnet_me_t *me)
-{
- handle->cookie = me->me_lh.lh_cookie;
-}
-
-static inline lnet_me_t *
-lnet_handle2me (lnet_handle_me_t *handle)
-{
- /* ALWAYS called with liblock held */
- lnet_libhandle_t *lh = lnet_lookup_cookie(handle->cookie,
- LNET_COOKIE_TYPE_ME);
- if (lh == NULL)
- return (NULL);
-
- return (lh_entry (lh, lnet_me_t, me_lh));
-}
-
-static inline void
-lnet_peer_addref_locked(lnet_peer_t *lp)
-{
- LASSERT (lp->lp_refcount > 0);
- lp->lp_refcount++;
-}
-
-extern void lnet_destroy_peer_locked(lnet_peer_t *lp);
-
-static inline void
-lnet_peer_decref_locked(lnet_peer_t *lp)
-{
- LASSERT (lp->lp_refcount > 0);
- lp->lp_refcount--;
- if (lp->lp_refcount == 0)
- lnet_destroy_peer_locked(lp);
-}
-
-static inline int
-lnet_isrouter(lnet_peer_t *lp)
-{
- return lp->lp_rtr_refcount != 0;
-}
-
-static inline void
-lnet_ni_addref_locked(lnet_ni_t *ni)
-{
- LASSERT (ni->ni_refcount > 0);
- ni->ni_refcount++;
-}
-
-static inline void
-lnet_ni_addref(lnet_ni_t *ni)
-{
- LNET_LOCK();
- lnet_ni_addref_locked(ni);
- LNET_UNLOCK();
-}
-
-static inline void
-lnet_ni_decref_locked(lnet_ni_t *ni)
-{
- LASSERT (ni->ni_refcount > 0);
- ni->ni_refcount--;
- if (ni->ni_refcount == 0)
- list_add_tail(&ni->ni_list, &the_lnet.ln_zombie_nis);
-}
-
-static inline void
-lnet_ni_decref(lnet_ni_t *ni)
-{
- LNET_LOCK();
- lnet_ni_decref_locked(ni);
- LNET_UNLOCK();
-}
-
-static inline lnet_nid_t
-lnet_ptlcompat_srcnid(lnet_nid_t src, lnet_nid_t dst)
-{
- /* Give myself a portals srcnid if I'm sending to portals */
- if (the_lnet.ln_ptlcompat > 0 &&
- LNET_NIDNET(dst) == 0)
- return LNET_MKNID(0, LNET_NIDADDR(src));
-
- return src;
-}
-
-static inline int
-lnet_ptlcompat_matchnid(lnet_nid_t lnet_nid, lnet_nid_t ptl_nid)
-{
- return ((ptl_nid == lnet_nid) ||
- (the_lnet.ln_ptlcompat > 0 &&
- LNET_NIDNET(ptl_nid) == 0 &&
- LNET_NETTYP(LNET_NIDNET(lnet_nid)) != LOLND &&
- LNET_NIDADDR(ptl_nid) == LNET_NIDADDR(lnet_nid)));
-}
-
-static inline int
-lnet_ptlcompat_matchnet(__u32 lnet_net, __u32 ptl_net)
-{
- return ((ptl_net == lnet_net) ||
- (the_lnet.ln_ptlcompat > 0 &&
- ptl_net == 0 &&
- LNET_NETTYP(lnet_net) != LOLND));
-}
-
-static inline struct list_head *
-lnet_nid2peerhash (lnet_nid_t nid)
-{
- unsigned int idx = LNET_NIDADDR(nid) % LNET_PEER_HASHSIZE;
-
- return &the_lnet.ln_peer_hash[idx];
-}
-
-extern lnd_t the_lolnd;
-
-#ifndef __KERNEL__
-/* unconditional registration */
-#define LNET_REGISTER_ULND(lnd) \
-do { \
- extern lnd_t lnd; \
- \
- lnet_register_lnd(&(lnd)); \
-} while (0)
-
-/* conditional registration */
-#define LNET_REGISTER_ULND_IF_PRESENT(lnd) \
-do { \
- extern lnd_t lnd __attribute__ ((weak, alias("the_lolnd"))); \
- \
- if (&(lnd) != &the_lolnd) \
- lnet_register_lnd(&(lnd)); \
-} while (0)
-#endif
-
-#ifdef CRAY_XT3
-inline static void
-lnet_set_msg_uid(lnet_ni_t *ni, lnet_msg_t *msg, lnet_uid_t uid)
-{
- LASSERT (msg->msg_ev.uid == LNET_UID_ANY);
- msg->msg_ev.uid = uid;
-}
-#endif
-
-extern lnet_ni_t *lnet_nid2ni_locked (lnet_nid_t nid);
-extern lnet_ni_t *lnet_net2ni_locked (__u32 net);
-static inline lnet_ni_t *
-lnet_net2ni (__u32 net)
-{
- lnet_ni_t *ni;
-
- LNET_LOCK();
- ni = lnet_net2ni_locked(net);
- LNET_UNLOCK();
-
- return ni;
-}
-
-int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, time_t when);
-int lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway_nid);
-int lnet_check_routes(void);
-int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
-void lnet_destroy_routes(void);
-int lnet_get_route(int idx, __u32 *net, __u32 *hops,
- lnet_nid_t *gateway, __u32 *alive);
-void lnet_proc_init(void);
-void lnet_proc_fini(void);
-void lnet_init_rtrpools(void);
-int lnet_alloc_rtrpools(int im_a_router);
-void lnet_free_rtrpools(void);
-lnet_remotenet_t *lnet_find_net_locked (__u32 net);
-
-int lnet_islocalnid(lnet_nid_t nid);
-int lnet_islocalnet(__u32 net);
-
-void lnet_build_unlink_event(lnet_libmd_t *md, lnet_event_t *ev);
-void lnet_enq_event_locked(lnet_eq_t *eq, lnet_event_t *ev);
-void lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target,
- unsigned int offset, unsigned int len);
-int lnet_send(lnet_nid_t nid, lnet_msg_t *msg);
-void lnet_return_credits_locked (lnet_msg_t *msg);
-void lnet_match_blocked_msg(lnet_libmd_t *md);
-int lnet_parse (lnet_ni_t *ni, lnet_hdr_t *hdr,
- lnet_nid_t fromnid, void *private, int rdma_req);
-void lnet_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-lnet_msg_t *lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *get_msg);
-void lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *msg, unsigned int len);
-void lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int rc);
-
-char *lnet_msgtyp2str (int type);
-void lnet_print_hdr (lnet_hdr_t * hdr);
-int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold);
-
-unsigned int lnet_iov_nob (unsigned int niov, struct iovec *iov);
-int lnet_extract_iov (int dst_niov, struct iovec *dst,
- int src_niov, struct iovec *src,
- unsigned int offset, unsigned int len);
-
-unsigned int lnet_kiov_nob (unsigned int niov, lnet_kiov_t *iov);
-int lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst,
- int src_niov, lnet_kiov_t *src,
- unsigned int offset, unsigned int len);
-
-void lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov,
- unsigned int doffset,
- unsigned int nsiov, struct iovec *siov,
- unsigned int soffset, unsigned int nob);
-void lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov,
- unsigned int iovoffset,
- unsigned int nkiov, lnet_kiov_t *kiov,
- unsigned int kiovoffset, unsigned int nob);
-void lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov,
- unsigned int kiovoffset,
- unsigned int niov, struct iovec *iov,
- unsigned int iovoffset, unsigned int nob);
-void lnet_copy_kiov2kiov (unsigned int ndkiov, lnet_kiov_t *dkiov,
- unsigned int doffset,
- unsigned int nskiov, lnet_kiov_t *skiov,
- unsigned int soffset, unsigned int nob);
-
-static inline void
-lnet_copy_iov2flat(int dlen, void *dest, unsigned int doffset,
- unsigned int nsiov, struct iovec *siov, unsigned int soffset,
- unsigned int nob)
-{
- struct iovec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen};
-
- lnet_copy_iov2iov(1, &diov, doffset,
- nsiov, siov, soffset, nob);
-}
-
-static inline void
-lnet_copy_kiov2flat(int dlen, void *dest, unsigned int doffset,
- unsigned int nsiov, lnet_kiov_t *skiov, unsigned int soffset,
- unsigned int nob)
-{
- struct iovec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen};
-
- lnet_copy_kiov2iov(1, &diov, doffset,
- nsiov, skiov, soffset, nob);
-}
-
-static inline void
-lnet_copy_flat2iov(unsigned int ndiov, struct iovec *diov, unsigned int doffset,
- int slen, void *src, unsigned int soffset, unsigned int nob)
-{
- struct iovec siov = {/*.iov_base = */ src, /*.iov_len = */slen};
- lnet_copy_iov2iov(ndiov, diov, doffset,
- 1, &siov, soffset, nob);
-}
-
-static inline void
-lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov, unsigned int doffset,
- int slen, void *src, unsigned int soffset, unsigned int nob)
-{
- struct iovec siov = {/* .iov_base = */ src, /* .iov_len = */ slen};
- lnet_copy_iov2kiov(ndiov, dkiov, doffset,
- 1, &siov, soffset, nob);
-}
-
-void lnet_me_unlink(lnet_me_t *me);
-
-void lnet_md_unlink(lnet_libmd_t *md);
-void lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd);
-
-void lnet_register_lnd(lnd_t *lnd);
-void lnet_unregister_lnd(lnd_t *lnd);
-int lnet_set_ip_niaddr (lnet_ni_t *ni);
-
-#ifdef __KERNEL__
-int lnet_connect(cfs_socket_t **sockp, lnet_nid_t peer_nid,
- __u32 local_ip, __u32 peer_ip, int peer_port);
-void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
- __u32 peer_ip, int port);
-int lnet_count_acceptor_nis(lnet_ni_t **first_ni);
-int lnet_accept(lnet_ni_t *blind_ni, cfs_socket_t *sock, __u32 magic);
-int lnet_acceptor_timeout(void);
-int lnet_acceptor_port(void);
-#endif
-
-#ifdef HAVE_LIBPTHREAD
-int lnet_count_acceptor_nis(lnet_ni_t **first_ni);
-int lnet_acceptor_port(void);
-#endif
-
-int lnet_acceptor_start(void);
-void lnet_acceptor_stop(void);
-
-int lnet_peers_start_down(void);
-int lnet_router_checker_start(void);
-void lnet_router_checker_stop(void);
-
-int lnet_ping_target_init(void);
-void lnet_ping_target_fini(void);
-int lnet_ping(lnet_process_id_t id, int timeout_ms,
- lnet_process_id_t *ids, int n_ids);
-
-int lnet_parse_ip2nets (char **networksp, char *ip2nets);
-int lnet_parse_routes (char *route_str, int *im_a_router);
-int lnet_parse_networks (struct list_head *nilist, char *networks);
-
-int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid);
-lnet_peer_t *lnet_find_peer_locked (lnet_nid_t nid);
-void lnet_clear_peer_table(void);
-void lnet_destroy_peer_table(void);
-int lnet_create_peer_table(void);
-void lnet_debug_peer(lnet_nid_t nid);
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * p30/lib-types.h
- *
- * Types used by the library side routines that do not need to be
- * exposed to the user application
- */
-
-#ifndef __LNET_LIB_TYPES_H__
-#define __LNET_LIB_TYPES_H__
-
-#if defined(__linux__)
-#include <lnet/linux/lib-types.h>
-#elif defined(__APPLE__)
-#include <lnet/darwin/lib-types.h>
-#elif defined(__WINNT__)
-#include <lnet/winnt/lib-types.h>
-#else
-#error Unsupported Operating System
-#endif
-
-#include <libcfs/libcfs.h>
-#include <libcfs/list.h>
-#include <lnet/types.h>
-
-#define WIRE_ATTR __attribute__((packed))
-
-/* The wire handle's interface cookie only matches one network interface in
- * one epoch (i.e. new cookie when the interface restarts or the node
- * reboots). The object cookie only matches one object on that interface
- * during that object's lifetime (i.e. no cookie re-use). */
-typedef struct {
- __u64 wh_interface_cookie;
- __u64 wh_object_cookie;
-} WIRE_ATTR lnet_handle_wire_t;
-
-/* byte-flip insensitive! */
-#define LNET_WIRE_HANDLE_NONE \
-((const lnet_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1})
-
-typedef enum {
- LNET_MSG_ACK = 0,
- LNET_MSG_PUT,
- LNET_MSG_GET,
- LNET_MSG_REPLY,
- LNET_MSG_HELLO,
-} lnet_msg_type_t;
-
-/* The variant fields of the portals message header are aligned on an 8
- * byte boundary in the message header. Note that all types used in these
- * wire structs MUST be fixed size and the smaller types are placed at the
- * end. */
-typedef struct lnet_ack {
- lnet_handle_wire_t dst_wmd;
- __u64 match_bits;
- __u32 mlength;
-} WIRE_ATTR lnet_ack_t;
-
-typedef struct lnet_put {
- lnet_handle_wire_t ack_wmd;
- __u64 match_bits;
- __u64 hdr_data;
- __u32 ptl_index;
- __u32 offset;
-} WIRE_ATTR lnet_put_t;
-
-typedef struct lnet_get {
- lnet_handle_wire_t return_wmd;
- __u64 match_bits;
- __u32 ptl_index;
- __u32 src_offset;
- __u32 sink_length;
-} WIRE_ATTR lnet_get_t;
-
-typedef struct lnet_reply {
- lnet_handle_wire_t dst_wmd;
-} WIRE_ATTR lnet_reply_t;
-
-typedef struct lnet_hello {
- __u64 incarnation;
- __u32 type;
-} WIRE_ATTR lnet_hello_t;
-
-typedef struct {
- lnet_nid_t dest_nid;
- lnet_nid_t src_nid;
- lnet_pid_t dest_pid;
- lnet_pid_t src_pid;
- __u32 type; /* lnet_msg_type_t */
- __u32 payload_length; /* payload data to follow */
- /*<------__u64 aligned------->*/
- union {
- lnet_ack_t ack;
- lnet_put_t put;
- lnet_get_t get;
- lnet_reply_t reply;
- lnet_hello_t hello;
- } msg;
-} WIRE_ATTR lnet_hdr_t;
-
-/* A HELLO message contains a magic number and protocol version
- * code in the header's dest_nid, the peer's NID in the src_nid, and
- * LNET_MSG_HELLO in the type field. All other common fields are zero
- * (including payload_size; i.e. no payload).
- * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is
- * running the same protocol and to find out its NID. These LNDs should
- * exchange HELLO messages when a connection is first established. Individual
- * LNDs can put whatever else they fancy in lnet_hdr_t::msg.
- */
-typedef struct {
- __u32 magic; /* LNET_PROTO_TCP_MAGIC */
- __u16 version_major; /* increment on incompatible change */
- __u16 version_minor; /* increment on compatible change */
-} WIRE_ATTR lnet_magicversion_t;
-
-/* PROTO MAGIC for LNDs */
-#define LNET_PROTO_IB_MAGIC 0x0be91b91
-#define LNET_PROTO_OPENIB_MAGIC LNET_PROTO_IB_MAGIC
-#define LNET_PROTO_IIB_MAGIC LNET_PROTO_IB_MAGIC
-#define LNET_PROTO_VIB_MAGIC LNET_PROTO_IB_MAGIC
-#define LNET_PROTO_RA_MAGIC 0x0be91b92
-#define LNET_PROTO_QSW_MAGIC 0x0be91b93
-#define LNET_PROTO_TCP_MAGIC 0xeebc0ded
-#define LNET_PROTO_PTL_MAGIC 0x50746C4E /* 'PtlN' unique magic */
-#define LNET_PROTO_GM_MAGIC 0x6d797269 /* 'myri'! */
-#define LNET_PROTO_MX_MAGIC 0x4d583130 /* 'MX10'! */
-#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100
-#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */
-
-/* Placeholder for a future "unified" protocol across all LNDs */
-/* Current LNDs that receive a request with this magic will respond with a
- * "stub" reply using their current protocol */
-#define LNET_PROTO_MAGIC 0x45726963 /* ! */
-
-
-#define LNET_PROTO_TCP_VERSION_MAJOR 1
-#define LNET_PROTO_TCP_VERSION_MINOR 0
-
-/* Acceptor connection request */
-typedef struct {
- __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */
- __u32 acr_version; /* protocol version */
- __u64 acr_nid; /* target NID */
-} WIRE_ATTR lnet_acceptor_connreq_t;
-
-#define LNET_PROTO_ACCEPTOR_VERSION 1
-
-/* forward refs */
-struct lnet_libmd;
-
-typedef struct lnet_msg {
- struct list_head msg_activelist;
- struct list_head msg_list; /* Q for credits/MD */
-
- lnet_process_id_t msg_target;
- __u32 msg_type;
-
- unsigned int msg_target_is_router:1; /* sending to a router */
- unsigned int msg_routing:1; /* being forwarded */
- unsigned int msg_ack:1; /* ack on finalize (PUT) */
- unsigned int msg_sending:1; /* outgoing message */
- unsigned int msg_receiving:1; /* being received */
- unsigned int msg_delayed:1; /* had to Q for buffer or tx credit */
- unsigned int msg_txcredit:1; /* taken an NI send credit */
- unsigned int msg_peertxcredit:1; /* taken a peer send credit */
- unsigned int msg_rtrcredit:1; /* taken a globel router credit */
- unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */
- unsigned int msg_onactivelist:1; /* on the activelist */
-
- struct lnet_peer *msg_txpeer; /* peer I'm sending to */
- struct lnet_peer *msg_rxpeer; /* peer I received from */
-
- void *msg_private;
- struct lnet_libmd *msg_md;
-
- unsigned int msg_len;
- unsigned int msg_wanted;
- unsigned int msg_offset;
- unsigned int msg_niov;
- struct iovec *msg_iov;
- lnet_kiov_t *msg_kiov;
-
- lnet_event_t msg_ev;
- lnet_hdr_t msg_hdr;
-} lnet_msg_t;
-
-
-typedef struct lnet_libhandle {
- struct list_head lh_hash_chain;
- __u64 lh_cookie;
-} lnet_libhandle_t;
-
-#define lh_entry(ptr, type, member) \
- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-
-typedef struct lnet_eq {
- struct list_head eq_list;
- lnet_libhandle_t eq_lh;
- lnet_seq_t eq_enq_seq;
- lnet_seq_t eq_deq_seq;
- unsigned int eq_size;
- lnet_event_t *eq_events;
- int eq_refcount;
- lnet_eq_handler_t eq_callback;
-} lnet_eq_t;
-
-typedef struct lnet_me {
- struct list_head me_list;
- lnet_libhandle_t me_lh;
- lnet_process_id_t me_match_id;
- unsigned int me_portal;
- __u64 me_match_bits;
- __u64 me_ignore_bits;
- lnet_unlink_t me_unlink;
- struct lnet_libmd *me_md;
-} lnet_me_t;
-
-typedef struct lnet_libmd {
- struct list_head md_list;
- lnet_libhandle_t md_lh;
- lnet_me_t *md_me;
- char *md_start;
- unsigned int md_offset;
- unsigned int md_length;
- unsigned int md_max_size;
- int md_threshold;
- int md_refcount;
- unsigned int md_options;
- unsigned int md_flags;
- void *md_user_ptr;
- lnet_eq_t *md_eq;
- void *md_addrkey;
- unsigned int md_niov; /* # frags */
- union {
- struct iovec iov[LNET_MAX_IOV];
- lnet_kiov_t kiov[LNET_MAX_IOV];
- } md_iov;
-} lnet_libmd_t;
-
-#define LNET_MD_FLAG_ZOMBIE (1 << 0)
-#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1)
-
-#ifdef LNET_USE_LIB_FREELIST
-typedef struct
-{
- void *fl_objs; /* single contiguous array of objects */
- int fl_nobjs; /* the number of them */
- int fl_objsize; /* the size (including overhead) of each of them */
- struct list_head fl_list; /* where they are enqueued */
-} lnet_freelist_t;
-
-typedef struct
-{
- struct list_head fo_list; /* enqueue on fl_list */
- void *fo_contents; /* aligned contents */
-} lnet_freeobj_t;
-#endif
-
-typedef struct {
- /* info about peers we are trying to fail */
- struct list_head tp_list; /* ln_test_peers */
- lnet_nid_t tp_nid; /* matching nid */
- unsigned int tp_threshold; /* # failures to simulate */
-} lnet_test_peer_t;
-
-#define LNET_COOKIE_TYPE_MD 1
-#define LNET_COOKIE_TYPE_ME 2
-#define LNET_COOKIE_TYPE_EQ 3
-#define LNET_COOKIE_TYPES 4
-/* LNET_COOKIE_TYPES must be a power of 2, so the cookie type can be
- * extracted by masking with (LNET_COOKIE_TYPES - 1) */
-
-struct lnet_ni; /* forward ref */
-
-typedef struct lnet_lnd
-{
- /* fields managed by portals */
- struct list_head lnd_list; /* stash in the LND table */
- int lnd_refcount; /* # active instances */
-
- /* fields initialised by the LND */
- unsigned int lnd_type;
-
- int (*lnd_startup) (struct lnet_ni *ni);
- void (*lnd_shutdown) (struct lnet_ni *ni);
- int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg);
-
- /* In data movement APIs below, payload buffers are described as a set
- * of 'niov' fragments which are...
- * EITHER
- * in virtual memory (struct iovec *iov != NULL)
- * OR
- * in pages (kernel only: plt_kiov_t *kiov != NULL).
- * The LND may NOT overwrite these fragment descriptors.
- * An 'offset' and may specify a byte offset within the set of
- * fragments to start from
- */
-
- /* Start sending a preformatted message. 'private' is NULL for PUT and
- * GET messages; otherwise this is a response to an incoming message
- * and 'private' is the 'private' passed to lnet_parse(). Return
- * non-zero for immediate failure, otherwise complete later with
- * lnet_finalize() */
- int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg);
-
- /* Start receiving 'mlen' bytes of payload data, skipping the following
- * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to
- * lnet_parse(). Return non-zero for immedaite failure, otherwise
- * complete later with lnet_finalize(). This also gives back a receive
- * credit if the LND does flow control. */
- int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-
- /* lnet_parse() has had to delay processing of this message
- * (e.g. waiting for a forwarding buffer or send credits). Give the
- * LND a chance to free urgently needed resources. If called, return 0
- * for success and do NOT give back a receive credit; that has to wait
- * until lnd_recv() gets called. On failure return < 0 and
- * release resources; lnd_recv() will not be called. */
- int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
- void **new_privatep);
-
- /* notification of peer health */
- void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
-
-#ifdef __KERNEL__
- /* accept a new connection */
- int (*lnd_accept)(struct lnet_ni *ni, cfs_socket_t *sock);
-#else
- /* wait for something to happen */
- void (*lnd_wait)(struct lnet_ni *ni, int milliseconds);
-
- /* ensure non-RDMA messages can be received outside liblustre */
- int (*lnd_setasync)(struct lnet_ni *ni, lnet_process_id_t id, int nasync);
-
-#ifdef HAVE_LIBPTHREAD
- int (*lnd_accept)(struct lnet_ni *ni, int sock);
-#endif
-#endif
-} lnd_t;
-
-#define LNET_MAX_INTERFACES 16
-
-typedef struct lnet_ni {
- struct list_head ni_list; /* chain on ln_nis */
- struct list_head ni_txq; /* messages waiting for tx credits */
- int ni_maxtxcredits; /* # tx credits */
- int ni_txcredits; /* # tx credits free */
- int ni_mintxcredits; /* lowest it's been */
- int ni_peertxcredits; /* # per-peer send credits */
- lnet_nid_t ni_nid; /* interface's NID */
- void *ni_data; /* instance-specific data */
- lnd_t *ni_lnd; /* procedural interface */
- int ni_refcount; /* reference count */
- char *ni_interfaces[LNET_MAX_INTERFACES]; /* equivalent interfaces to use */
-} lnet_ni_t;
-
-typedef struct lnet_peer {
- struct list_head lp_hashlist; /* chain on peer hash */
- struct list_head lp_txq; /* messages blocking for tx credits */
- struct list_head lp_rtrq; /* messages blocking for router credits */
- struct list_head lp_rtr_list; /* chain on router list */
- int lp_txcredits; /* # tx credits available */
- int lp_mintxcredits; /* low water mark */
- int lp_rtrcredits; /* # router credits */
- int lp_minrtrcredits; /* low water mark */
- unsigned int lp_alive:1; /* alive/dead? */
- unsigned int lp_notify:1; /* notification outstanding? */
- unsigned int lp_notifylnd:1; /* outstanding notification for LND? */
- unsigned int lp_notifying:1; /* some thread is handling notification */
- unsigned int lp_ping_notsent; /* SEND event outstanding from ping */
- int lp_alive_count; /* # times router went dead<->alive */
- long lp_txqnob; /* bytes queued for sending */
- time_t lp_timestamp; /* time of last aliveness news */
- time_t lp_ping_timestamp; /* time of last ping attempt */
- time_t lp_ping_deadline; /* != 0 if ping reply expected */
- lnet_ni_t *lp_ni; /* interface peer is on */
- lnet_nid_t lp_nid; /* peer's NID */
- int lp_refcount; /* # refs */
- int lp_rtr_refcount; /* # refs from lnet_route_t::lr_gateway */
-} lnet_peer_t;
-
-typedef struct {
- struct list_head lr_list; /* chain on net */
- lnet_peer_t *lr_gateway; /* router node */
-} lnet_route_t;
-
-typedef struct {
- struct list_head lrn_list; /* chain on ln_remote_nets */
- struct list_head lrn_routes; /* routes to me */
- __u32 lrn_net; /* my net number */
- unsigned int lrn_hops; /* how far I am */
-} lnet_remotenet_t;
-
-typedef struct {
- struct list_head rbp_bufs; /* my free buffer pool */
- struct list_head rbp_msgs; /* messages blocking for a buffer */
- int rbp_npages; /* # pages in each buffer */
- int rbp_nbuffers; /* # buffers */
- int rbp_credits; /* # free buffers / blocked messages */
- int rbp_mincredits; /* low water mark */
-} lnet_rtrbufpool_t;
-
-typedef struct {
- struct list_head rb_list; /* chain on rbp_bufs */
- lnet_rtrbufpool_t *rb_pool; /* owning pool */
- lnet_kiov_t rb_kiov[0]; /* the buffer space */
-} lnet_rtrbuf_t;
-
-typedef struct {
- __u32 msgs_alloc;
- __u32 msgs_max;
- __u32 errors;
- __u32 send_count;
- __u32 recv_count;
- __u32 route_count;
- __u32 drop_count;
- __u64 send_length;
- __u64 recv_length;
- __u64 route_length;
- __u64 drop_length;
-} lnet_counters_t;
-
-#define LNET_PEER_HASHSIZE 503 /* prime! */
-
-#define LNET_NRBPOOLS 3 /* # different router buffer pools */
-
-#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL
-#define LNET_PROTO_PING_VERSION 1
-typedef struct {
- __u32 pi_magic;
- __u32 pi_version;
- lnet_pid_t pi_pid;
- __u32 pi_nnids;
- lnet_nid_t pi_nid[0];
-} WIRE_ATTR lnet_ping_info_t;
-
-/* Options for lnet_portal_t::ptl_options */
-#define LNET_PTL_LAZY (1 << 0)
-typedef struct {
- struct list_head ptl_ml; /* match list */
- struct list_head ptl_msgq; /* messages blocking for MD */
- __u64 ptl_msgq_version; /* validity stamp */
- unsigned int ptl_options;
-} lnet_portal_t;
-
-/* Router Checker */
-/* < 0 == startup error */
-#define LNET_RC_STATE_SHUTDOWN 0 /* not started */
-#define LNET_RC_STATE_RUNNING 1 /* started up OK */
-#define LNET_RC_STATE_STOPTHREAD 2 /* telling thread to stop */
-#define LNET_RC_STATE_UNLINKING 3 /* unlinking RC MD */
-#define LNET_RC_STATE_UNLINKED 4 /* RC's MD has been unlinked */
-
-typedef struct
-{
- /* Stuff initialised at LNetInit() */
- int ln_init; /* LNetInit() called? */
- int ln_refcount; /* LNetNIInit/LNetNIFini counter */
- int ln_niinit_self; /* Have I called LNetNIInit myself? */
-
- int ln_ptlcompat; /* do I support talking to portals? */
-
- struct list_head ln_lnds; /* registered LNDs */
-
-#ifdef __KERNEL__
- spinlock_t ln_lock;
- cfs_waitq_t ln_waitq;
- struct semaphore ln_api_mutex;
- struct semaphore ln_lnd_mutex;
-#else
-# ifndef HAVE_LIBPTHREAD
- int ln_lock;
- int ln_api_mutex;
- int ln_lnd_mutex;
-# else
- pthread_cond_t ln_cond;
- pthread_mutex_t ln_lock;
- pthread_mutex_t ln_api_mutex;
- pthread_mutex_t ln_lnd_mutex;
-# endif
-#endif
-
- /* Stuff initialised at LNetNIInit() */
-
- int ln_shutdown; /* shutdown in progress */
- int ln_nportals; /* # portals */
- lnet_portal_t *ln_portals; /* the vector of portals */
-
- lnet_pid_t ln_pid; /* requested pid */
-
- struct list_head ln_nis; /* LND instances */
- lnet_ni_t *ln_loni; /* the loopback NI */
- lnet_ni_t *ln_eqwaitni; /* NI to wait for events in */
- struct list_head ln_zombie_nis; /* dying LND instances */
- int ln_nzombie_nis; /* # of NIs to wait for */
-
- struct list_head ln_remote_nets; /* remote networks with routes to them */
- __u64 ln_remote_nets_version; /* validity stamp */
-
- struct list_head ln_routers; /* list of all known routers */
- __u64 ln_routers_version; /* validity stamp */
-
- struct list_head *ln_peer_hash; /* NID->peer hash */
- int ln_npeers; /* # peers extant */
- int ln_peertable_version; /* /proc validity stamp */
-
- int ln_routing; /* am I a router? */
- lnet_rtrbufpool_t ln_rtrpools[LNET_NRBPOOLS]; /* router buffer pools */
-
- int ln_lh_hash_size; /* size of lib handle hash table */
- struct list_head *ln_lh_hash_table; /* all extant lib handles, this interface */
- __u64 ln_next_object_cookie; /* cookie generator */
- __u64 ln_interface_cookie; /* uniquely identifies this ni in this epoch */
-
- char *ln_network_tokens; /* space for network names */
- int ln_network_tokens_nob;
-
- int ln_testprotocompat; /* test protocol compatibility flags */
-
- struct list_head ln_finalizeq; /* msgs waiting to complete finalizing */
-#ifdef __KERNEL__
- void **ln_finalizers; /* threads doing finalization */
- int ln_nfinalizers; /* max # threads finalizing */
-#else
- int ln_finalizing;
-#endif
- struct list_head ln_test_peers; /* failure simulation */
-
- lnet_handle_md_t ln_ping_target_md;
- lnet_handle_eq_t ln_ping_target_eq;
- lnet_ping_info_t *ln_ping_info;
-
-#ifdef __KERNEL__
- int ln_rc_state; /* router checker startup/shutdown state */
- struct semaphore ln_rc_signal; /* serialise startup/shutdown */
- lnet_handle_eq_t ln_rc_eqh; /* router checker's event queue */
-#endif
-
-#ifdef LNET_USE_LIB_FREELIST
- lnet_freelist_t ln_free_mes;
- lnet_freelist_t ln_free_msgs;
- lnet_freelist_t ln_free_mds;
- lnet_freelist_t ln_free_eqs;
-#endif
- struct list_head ln_active_msgs;
- struct list_head ln_active_mds;
- struct list_head ln_active_eqs;
-
- lnet_counters_t ln_counters;
-
-#ifndef __KERNEL__
- /* Temporary workaround to allow uOSS and test programs force
- * server mode in userspace. The only place where we use it is
- * lnet_prepare(). The only way to turn this flag on is to
- * call lnet_server_mode() */
-
- int ln_server_mode_flag;
-#endif
-} lnet_t;
-
-#endif
+++ /dev/null
-Makefile.in
-Makefile
+++ /dev/null
-EXTRA_DIST := lib-lnet.h lib-types.h lnet.h api-support.h
+++ /dev/null
-#ifndef __LINUX_API_SUPPORT_H__
-#define __LINUX_API_SUPPORT_H__
-
-#ifndef __LNET_API_SUPPORT_H__
-#error Do not #include this file directly. #include <lnet /api-support.h> instead
-#endif
-
-#ifndef __KERNEL__
-# include <stdio.h>
-# include <stdlib.h>
-# include <unistd.h>
-# include <time.h>
-
-/* Lots of POSIX dependencies to support PtlEQWait_timeout */
-# include <signal.h>
-# include <setjmp.h>
-# include <time.h>
-
-#ifdef HAVE_LIBREADLINE
-#define READLINE_LIBRARY
-#include <readline/readline.h>
-
-/* readline.h pulls in a #define that conflicts with one in libcfs.h */
-#undef RETURN
-
-/* completion_matches() is #if 0-ed out in modern glibc */
-#ifndef completion_matches
-# define completion_matches rl_completion_matches
-#endif
-
-#endif /* HAVE_LIBREADLINE */
-
-extern void using_history(void);
-extern void stifle_history(int);
-extern void add_history(char *);
-
-#endif /* !__KERNEL__ */
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LNET_LINUX_LIB_LNET_H__
-#define __LNET_LINUX_LIB_LNET_H__
-
-#ifndef __LNET_LIB_LNET_H__
-#error Do not #include this file directly. #include <lnet/lib-lnet.h> instead
-#endif
-
-#ifdef __KERNEL__
-# include <asm/page.h>
-# include <linux/string.h>
-# include <asm/io.h>
-# include <libcfs/kp30.h>
-
-static inline __u64
-lnet_page2phys (struct page *p)
-{
- /* compiler optimizer will elide unused branches */
-
- switch (sizeof(typeof(page_to_phys(p)))) {
- case 4:
- /* page_to_phys returns a 32 bit physical address. This must
- * be a 32 bit machine with <= 4G memory and we must ensure we
- * don't sign extend when converting to 64 bits. */
- return (unsigned long)page_to_phys(p);
-
- case 8:
- /* page_to_phys returns a 64 bit physical address :) */
- return page_to_phys(p);
-
- default:
- LBUG();
- return 0;
- }
-}
-
-#else /* __KERNEL__ */
-# include <libcfs/list.h>
-# include <string.h>
-# ifdef HAVE_LIBPTHREAD
-# include <pthread.h>
-# endif
-#endif
-
-#define LNET_ROUTER
-
-#endif /* __LNET_LINUX_LIB_LNET_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LNET_LINUX_LIB_TYPES_H__
-#define __LNET_LINUX_LIB_TYPES_H__
-
-#ifndef __LNET_LIB_TYPES_H__
-#error Do not #include this file directly. #include <lnet/lib-types.h> instead
-#endif
-
-#ifdef __KERNEL__
-# include <linux/uio.h>
-# include <linux/smp_lock.h>
-# include <linux/types.h>
-#else
-# define LNET_USE_LIB_FREELIST
-# include <sys/types.h>
-#endif
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LNET_LINUX_LNET_H__
-#define __LNET_LINUX_LNET_H__
-
-#ifndef __LNET_H__
-#error Do not #include this file directly. #include <lnet/lnet.h> instead
-#endif
-
-/*
- * lnet.h
- *
- * User application interface file
- */
-
-#if defined (__KERNEL__)
-#include <linux/uio.h>
-#include <linux/types.h>
-#else
-#include <sys/types.h>
-#include <sys/uio.h>
-#endif
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LNET_H__
-#define __LNET_H__
-
-/*
- * lnet.h
- *
- * User application interface file
- */
-#if defined(__linux__)
-#include <lnet/linux/lnet.h>
-#elif defined(__APPLE__)
-#include <lnet/darwin/lnet.h>
-#elif defined(__WINNT__)
-#include <lnet/winnt/lnet.h>
-#else
-#error Unsupported Operating System
-#endif
-
-#include <lnet/types.h>
-#include <lnet/api.h>
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * header for libptlctl.a
- */
-#ifndef _PTLCTL_H_
-#define _PTLCTL_H_
-
-#include <lnet/types.h>
-#include <libcfs/kp30.h>
-#include <libcfs/libcfs.h>
-
-#define LNET_DEV_ID 0
-#define LNET_DEV_PATH "/dev/lnet"
-#define LNET_DEV_MAJOR 10
-#define LNET_DEV_MINOR 240
-#define OBD_DEV_ID 1
-#define OBD_DEV_PATH "/dev/obd"
-#define OBD_DEV_MAJOR 10
-#define OBD_DEV_MINOR 241
-#define SMFS_DEV_ID 2
-#define SMFS_DEV_PATH "/dev/snapdev"
-#define SMFS_DEV_MAJOR 10
-#define SMFS_DEV_MINOR 242
-
-int ptl_initialize(int argc, char **argv);
-int jt_ptl_network(int argc, char **argv);
-int jt_ptl_list_nids(int argc, char **argv);
-int jt_ptl_which_nid(int argc, char **argv);
-int jt_ptl_print_interfaces(int argc, char **argv);
-int jt_ptl_add_interface(int argc, char **argv);
-int jt_ptl_del_interface(int argc, char **argv);
-int jt_ptl_print_peers (int argc, char **argv);
-int jt_ptl_add_peer (int argc, char **argv);
-int jt_ptl_del_peer (int argc, char **argv);
-int jt_ptl_print_connections (int argc, char **argv);
-int jt_ptl_disconnect(int argc, char **argv);
-int jt_ptl_push_connection(int argc, char **argv);
-int jt_ptl_print_active_txs(int argc, char **argv);
-int jt_ptl_ping(int argc, char **argv);
-int jt_ptl_mynid(int argc, char **argv);
-int jt_ptl_add_uuid(int argc, char **argv);
-int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */
-int jt_ptl_close_uuid(int argc, char **argv);
-int jt_ptl_del_uuid(int argc, char **argv);
-int jt_ptl_add_route (int argc, char **argv);
-int jt_ptl_del_route (int argc, char **argv);
-int jt_ptl_notify_router (int argc, char **argv);
-int jt_ptl_print_routes (int argc, char **argv);
-int jt_ptl_fail_nid (int argc, char **argv);
-int jt_ptl_lwt(int argc, char **argv);
-int jt_ptl_testprotocompat(int argc, char **argv);
-int jt_ptl_memhog(int argc, char **argv);
-
-int dbg_initialize(int argc, char **argv);
-int jt_dbg_filter(int argc, char **argv);
-int jt_dbg_show(int argc, char **argv);
-int jt_dbg_list(int argc, char **argv);
-int jt_dbg_debug_kernel(int argc, char **argv);
-int jt_dbg_debug_daemon(int argc, char **argv);
-int jt_dbg_debug_file(int argc, char **argv);
-int jt_dbg_clear_debug_buf(int argc, char **argv);
-int jt_dbg_mark_debug_buf(int argc, char **argv);
-int jt_dbg_modules(int argc, char **argv);
-int jt_dbg_panic(int argc, char **argv);
-
-/* l_ioctl.c */
-typedef int (ioc_handler_t)(int dev_id, unsigned int opc, void *buf);
-void set_ioc_handler(ioc_handler_t *handler);
-int register_ioc_dev(int dev_id, const char * dev_name, int major, int minor);
-void unregister_ioc_dev(int dev_id);
-int set_ioctl_dump(char * file);
-int l_ioctl(int dev_id, unsigned int opc, void *buf);
-int parse_dump(char * dump_file, ioc_handler_t ioc_func);
-int jt_ioc_dump(int argc, char **argv);
-extern char *dump_filename;
-int dump(int dev_id, unsigned int opc, void *buf);
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- */
-
-#ifndef __LNET_ST_H__
-#define __LNET_ST_H__
-
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-types.h>
-
-#define LST_NAME_SIZE 32 /* max name buffer length */
-
-#define LSTIO_DEBUG 0xC00 /* debug */
-#define LSTIO_SESSION_NEW 0xC01 /* create session */
-#define LSTIO_SESSION_END 0xC02 /* end session */
-#define LSTIO_SESSION_INFO 0xC03 /* query session */
-#define LSTIO_GROUP_ADD 0xC10 /* add group */
-#define LSTIO_GROUP_LIST 0xC11 /* list all groups in session */
-#define LSTIO_GROUP_INFO 0xC12 /* query defailt infomation of specified group */
-#define LSTIO_GROUP_DEL 0xC13 /* delete group */
-#define LSTIO_NODES_ADD 0xC14 /* add nodes to specified group */
-#define LSTIO_GROUP_UPDATE 0xC15 /* update group */
-#define LSTIO_BATCH_ADD 0xC20 /* add batch */
-#define LSTIO_BATCH_START 0xC21 /* start batch */
-#define LSTIO_BATCH_STOP 0xC22 /* stop batch */
-#define LSTIO_BATCH_DEL 0xC23 /* delete batch */
-#define LSTIO_BATCH_LIST 0xC24 /* show all batches in the session */
-#define LSTIO_BATCH_INFO 0xC25 /* show defail of specified batch */
-#define LSTIO_TEST_ADD 0xC26 /* add test (to batch) */
-#define LSTIO_BATCH_QUERY 0xC27 /* query batch status */
-#define LSTIO_STAT_QUERY 0xC30 /* get stats */
-
-typedef struct {
- lnet_nid_t ses_nid; /* nid of console node */
- __u64 ses_stamp; /* time stamp */
-} lst_sid_t; /*** session id */
-
-#define LST_INVALID_SID ((const lst_sid_t){.ses_nid = LNET_NID_ANY,\
- .ses_stamp = -1})
-
-typedef struct {
- __u64 bat_id; /* unique id in session */
-} lst_bid_t; /*** batch id (group of tests) */
-
-/* Status of test node */
-#define LST_NODE_ACTIVE 0x1 /* node in this session */
-#define LST_NODE_BUSY 0x2 /* node is taken by other session */
-#define LST_NODE_DOWN 0x4 /* node is down */
-#define LST_NODE_UNKNOWN 0x8 /* node not in session */
-
-typedef struct {
- lnet_process_id_t nde_id; /* id of node */
- int nde_state; /* state of node */
-} lstcon_node_ent_t; /*** node entry, for list_group command */
-
-typedef struct {
- int nle_nnode; /* # of nodes */
- int nle_nactive; /* # of active nodes */
- int nle_nbusy; /* # of busy nodes */
- int nle_ndown; /* # of down nodes */
- int nle_nunknown; /* # of unknown nodes */
-} lstcon_ndlist_ent_t; /*** node_list entry, for list_batch command */
-
-typedef struct {
- int tse_type; /* test type */
- int tse_loop; /* loop count */
- int tse_concur; /* concurrency of test */
-} lstcon_test_ent_t; /*** test summary entry, for list_batch command */
-
-typedef struct {
- int bae_state; /* batch status */
- int bae_timeout; /* batch timeout */
- int bae_ntest; /* # of tests in the batch */
-} lstcon_batch_ent_t; /*** batch summary entry, for list_batch command */
-
-typedef struct {
- lstcon_ndlist_ent_t tbe_cli_nle; /* client (group) node_list entry */
- lstcon_ndlist_ent_t tbe_srv_nle; /* server (group) node_list entry */
- union {
- lstcon_test_ent_t tbe_test; /* test entry */
- lstcon_batch_ent_t tbe_batch; /* batch entry */
- } u;
-} lstcon_test_batch_ent_t; /*** test/batch verbose information entry,
- *** for list_batch command */
-
-typedef struct {
- struct list_head rpe_link; /* link chain */
- lnet_process_id_t rpe_peer; /* peer's id */
- struct timeval rpe_stamp; /* time stamp of RPC */
- int rpe_state; /* peer's state */
- int rpe_rpc_errno; /* RPC errno */
-
- lst_sid_t rpe_sid; /* peer's session id */
- int rpe_fwk_errno; /* framework errno */
- int rpe_priv[4]; /* private data */
- char rpe_payload[0]; /* private reply payload */
-} lstcon_rpc_ent_t;
-
-typedef struct {
- int trs_rpc_stat[4]; /* RPCs stat (0: total, 1: failed, 2: finished, 4: reserved */
- int trs_rpc_errno; /* RPC errno */
- int trs_fwk_stat[8]; /* framework stat */
- int trs_fwk_errno; /* errno of the first remote error */
- void *trs_fwk_private; /* private framework stat */
-} lstcon_trans_stat_t;
-
-static inline int
-lstcon_rpc_stat_total(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0];
-}
-
-static inline int
-lstcon_rpc_stat_success(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1];
-}
-
-static inline int
-lstcon_rpc_stat_failure(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2];
-}
-
-static inline int
-lstcon_sesop_stat_success(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_sesop_stat_failure(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_sesqry_stat_active(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_sesqry_stat_busy(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_sesqry_stat_unknown(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
-}
-
-static inline int
-lstcon_tsbop_stat_success(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_tsbop_stat_failure(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_tsbqry_stat_idle(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_tsbqry_stat_run(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_tsbqry_stat_failure(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
-}
-
-static inline int
-lstcon_statqry_stat_success(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_statqry_stat_failure(lstcon_trans_stat_t *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-/* create a session */
-typedef struct {
- int lstio_ses_key; /* IN: local key */
- int lstio_ses_timeout; /* IN: session timeout */
- int lstio_ses_force; /* IN: force create ? */
- lst_sid_t *lstio_ses_idp; /* OUT: session id */
- int lstio_ses_nmlen; /* IN: name length */
- char *lstio_ses_namep; /* IN: session name */
-} lstio_session_new_args_t;
-
-/* query current session */
-typedef struct {
- lst_sid_t *lstio_ses_idp; /* OUT: session id */
- int *lstio_ses_keyp; /* OUT: local key */
- lstcon_ndlist_ent_t *lstio_ses_ndinfo; /* OUT: */
- int lstio_ses_nmlen; /* IN: name length */
- char *lstio_ses_namep; /* OUT: session name */
-} lstio_session_info_args_t;
-
-/* delete a session */
-typedef struct {
- int lstio_ses_key; /* IN: session key */
-} lstio_session_end_args_t;
-
-#define LST_OPC_SESSION 1
-#define LST_OPC_GROUP 2
-#define LST_OPC_NODES 3
-#define LST_OPC_BATCHCLI 4
-#define LST_OPC_BATCHSRV 5
-
-typedef struct {
- int lstio_dbg_key; /* IN: session key */
- int lstio_dbg_type; /* IN: debug sessin|batch|group|nodes list */
- int lstio_dbg_flags; /* IN: reserved debug flags */
- int lstio_dbg_timeout; /* IN: timeout of debug */
-
- int lstio_dbg_nmlen; /* IN: len of name */
- char *lstio_dbg_namep; /* IN: name of group|batch */
- int lstio_dbg_count; /* IN: # of test nodes to debug */
- lnet_process_id_t *lstio_dbg_idsp; /* IN: id of test nodes */
- struct list_head *lstio_dbg_resultp; /* OUT: list head of result buffer */
-} lstio_debug_args_t;
-
-typedef struct {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_nmlen; /* IN: name length */
- char *lstio_grp_namep; /* IN: group name */
-} lstio_group_add_args_t;
-
-typedef struct {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_nmlen; /* IN: name length */
- char *lstio_grp_namep; /* IN: group name */
-} lstio_group_del_args_t;
-
-#define LST_GROUP_CLEAN 1 /* remove inactive nodes in the group */
-#define LST_GROUP_REFRESH 2 /* refresh inactive nodes in the group */
-#define LST_GROUP_RMND 3 /* delete nodes from the group */
-
-typedef struct {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_opc; /* IN: OPC */
- int lstio_grp_args; /* IN: arguments */
- int lstio_grp_nmlen; /* IN: name length */
- char *lstio_grp_namep; /* IN: group name */
- int lstio_grp_count; /* IN: # of nodes id */
- lnet_process_id_t *lstio_grp_idsp; /* IN: array of nodes */
- struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */
-} lstio_group_update_args_t;
-
-typedef struct {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_nmlen; /* IN: name length */
- char *lstio_grp_namep; /* IN: group name */
- int lstio_grp_count; /* IN: # of nodes */
- lnet_process_id_t *lstio_grp_idsp; /* IN: nodes */
- struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */
-} lstio_group_nodes_args_t;
-
-typedef struct {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_idx; /* IN: group idx */
- int lstio_grp_nmlen; /* IN: name len */
- char *lstio_grp_namep; /* OUT: name */
-} lstio_group_list_args_t;
-
-typedef struct {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_nmlen; /* IN: name len */
- char *lstio_grp_namep; /* IN: name */
- lstcon_ndlist_ent_t *lstio_grp_entp; /* OUT: description of group */
-
- int *lstio_grp_idxp; /* IN/OUT: node index */
- int *lstio_grp_ndentp; /* IN/OUT: # of nodent */
- lstcon_node_ent_t *lstio_grp_dentsp; /* OUT: nodent array */
-} lstio_group_info_args_t;
-
-#define LST_DEFAULT_BATCH "batch" /* default batch name */
-
-typedef struct {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_nmlen; /* IN: name length */
- char *lstio_bat_namep; /* IN: batch name */
-} lstio_batch_add_args_t;
-
-typedef struct {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_nmlen; /* IN: name length */
- char *lstio_bat_namep; /* IN: batch name */
-} lstio_batch_del_args_t;
-
-typedef struct {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_timeout; /* IN: timeout for the batch */
- int lstio_bat_nmlen; /* IN: name length */
- char *lstio_bat_namep; /* IN: batch name */
- struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */
-} lstio_batch_run_args_t;
-
-typedef struct {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_force; /* IN: abort unfinished test RPC */
- int lstio_bat_nmlen; /* IN: name length */
- char *lstio_bat_namep; /* IN: batch name */
- struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */
-} lstio_batch_stop_args_t;
-
-typedef struct {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_testidx; /* IN: test index */
- int lstio_bat_client; /* IN: is test client? */
- int lstio_bat_timeout; /* IN: timeout for waiting */
- int lstio_bat_nmlen; /* IN: name length */
- char *lstio_bat_namep; /* IN: batch name */
- struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */
-} lstio_batch_query_args_t;
-
-typedef struct {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_idx; /* IN: index */
- int lstio_bat_nmlen; /* IN: name length */
- char *lstio_bat_namep; /* IN: batch name */
-} lstio_batch_list_args_t;
-
-typedef struct {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_nmlen; /* IN: name length */
- char *lstio_bat_namep; /* IN: name */
- int lstio_bat_server; /* IN: query server or not */
- int lstio_bat_testidx; /* IN: test index */
- lstcon_test_batch_ent_t *lstio_bat_entp; /* OUT: batch ent */
-
- int *lstio_bat_idxp; /* IN/OUT: index of node */
- int *lstio_bat_ndentp; /* IN/OUT: # of nodent */
- lstcon_node_ent_t *lstio_bat_dentsp; /* array of nodent */
-} lstio_batch_info_args_t;
-
-/* add stat in session */
-typedef struct {
- int lstio_sta_key; /* IN: session key */
- int lstio_sta_timeout; /* IN: timeout for stat requst */
- int lstio_sta_nmlen; /* IN: group name length */
- char *lstio_sta_namep; /* IN: group name */
- int lstio_sta_count; /* IN: # of pid */
- lnet_process_id_t *lstio_sta_idsp; /* IN: pid */
- struct list_head *lstio_sta_resultp; /* OUT: list head of result buffer */
-} lstio_stat_args_t;
-
-typedef enum {
- LST_TEST_BULK = 1,
- LST_TEST_PING = 2
-} lst_test_type_t;
-
-/* create a test in a batch */
-#define LST_MAX_CONCUR 1024 /* Max concurrency of test */
-
-typedef struct {
- int lstio_tes_key; /* IN: session key */
- int lstio_tes_bat_nmlen; /* IN: batch name len */
- char *lstio_tes_bat_name; /* IN: batch name */
- int lstio_tes_type; /* IN: test type */
- int lstio_tes_oneside; /* IN: one sided test */
- int lstio_tes_loop; /* IN: loop count */
- int lstio_tes_concur; /* IN: concurrency */
-
- int lstio_tes_dist; /* IN: node distribution in destination groups */
- int lstio_tes_span; /* IN: node span in destination groups */
- int lstio_tes_sgrp_nmlen; /* IN: source group name length */
- char *lstio_tes_sgrp_name; /* IN: group name */
- int lstio_tes_dgrp_nmlen; /* IN: destination group name length */
- char *lstio_tes_dgrp_name; /* IN: group name */
-
- int lstio_tes_param_len; /* IN: param buffer len */
- void *lstio_tes_param; /* IN: parameter for specified test:
- lstio_bulk_param_t,
- lstio_ping_param_t,
- ... more */
- int *lstio_tes_retp; /* OUT: private returned value */
- struct list_head *lstio_tes_resultp; /* OUT: list head of result buffer */
-} lstio_test_args_t;
-
-typedef enum {
- LST_BRW_READ = 1,
- LST_BRW_WRITE = 2
-} lst_brw_type_t;
-
-typedef enum {
- LST_BRW_CHECK_NONE = 1,
- LST_BRW_CHECK_SIMPLE = 2,
- LST_BRW_CHECK_FULL = 3
-} lst_brw_flags_t;
-
-typedef struct {
- int blk_opc; /* bulk operation code */
- int blk_size; /* size (bytes) */
- int blk_time; /* time of running the test*/
- int blk_flags; /* reserved flags */
-} lst_test_bulk_param_t;
-
-typedef struct {
- int png_size; /* size of ping message */
- int png_time; /* time */
- int png_loop; /* loop */
- int png_flags; /* reserved flags */
-} lst_test_ping_param_t;
-
-/* more tests */
-
-typedef struct {
- __u32 errors;
- __u32 rpcs_sent;
- __u32 rpcs_rcvd;
- __u32 rpcs_dropped;
- __u32 rpcs_expired;
- __u64 bulk_get;
- __u64 bulk_put;
-} srpc_counters_t;
-
-typedef struct {
- __u32 active_tests;
- __u32 active_batches;
- __u32 zombie_sessions;
- __u32 brw_errors;
- __u32 ping_errors;
-} sfw_counters_t;
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-/*
- * The PTLLND was designed to support Portals with
- * Lustre and non-lustre UNLINK semantics.
- * However for now the two targets are Cray Portals
- * on the XT3 and Lustre Portals (for testing) both
- * have Lustre UNLINK semantics, so this is defined
- * by default.
- */
-#define LUSTRE_PORTALS_UNLINK_SEMANTICS
-
-
-#ifdef _USING_LUSTRE_PORTALS_
-
-/* NIDs are 64-bits on Lustre Portals */
-#define FMT_NID LPU64
-#define FMT_PID "%d"
-
-/* When using Lustre Portals Lustre completion semantics are imlicit*/
-#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0
-
-#else /* _USING_CRAY_PORTALS_ */
-
-/* NIDs are integers on Cray Portals */
-#define FMT_NID "%u"
-#define FMT_PID "%d"
-
-/* When using Cray Portals this is defined in the Cray Portals Header*/
-/*#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS */
-
-/* Can compare handles directly on Cray Portals */
-#define PtlHandleIsEqual(a,b) ((a) == (b))
-
-/* Diffrent error types on Cray Portals*/
-#define ptl_err_t ptl_ni_fail_t
-
-/*
- * The Cray Portals has no maximum number of IOVs. The
- * maximum is limited only my memory and size of the
- * int parameters (2^31-1).
- * Lustre only really require that the underyling
- * implemenation to support at least LNET_MAX_IOV,
- * so for Cray portals we can safely just use that
- * value here.
- *
- */
-#define PTL_MD_MAX_IOV LNET_MAX_IOV
-
-#endif
-
-#define FMT_PTLID "ptlid:"FMT_PID"-"FMT_NID
-
-/* Align incoming small request messages to an 8 byte boundary if this is
- * supported to avoid alignment issues on some architectures */
-#ifndef PTL_MD_LOCAL_ALIGN8
-# define PTL_MD_LOCAL_ALIGN8 0
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-/* Minimum buffer size that any peer will post to receive ptllnd messages */
-#define PTLLND_MIN_BUFFER_SIZE 256
-
-/************************************************************************
- * Tunable defaults that {u,k}lnds/ptllnd should have in common.
- */
-
-#define PTLLND_PORTAL 9 /* The same portal PTLPRC used when talking to cray portals */
-#define PTLLND_PID 9 /* The Portals PID */
-#define PTLLND_PEERCREDITS 8 /* concurrent sends to 1 peer */
-
-/* Default buffer size for kernel ptllnds (guaranteed eager) */
-#define PTLLND_MAX_KLND_MSG_SIZE 512
-
-/* Default buffer size for catamount ptllnds (not guaranteed eager) - large
- * enough to avoid RDMA for anything sent while control is not in liblustre */
-#define PTLLND_MAX_ULND_MSG_SIZE 512
-
-
-/************************************************************************
- * Portals LND Wire message format.
- * These are sent in sender's byte order (i.e. receiver flips).
- */
-
-#define PTL_RESERVED_MATCHBITS 0x100 /* below this value is reserved
- * above is for bulk data transfer */
-#define LNET_MSG_MATCHBITS 0 /* the value for the message channel */
-
-typedef struct
-{
- lnet_hdr_t kptlim_hdr; /* portals header */
- char kptlim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR kptl_immediate_msg_t;
-
-typedef struct
-{
- lnet_hdr_t kptlrm_hdr; /* portals header */
- __u64 kptlrm_matchbits; /* matchbits */
-} WIRE_ATTR kptl_rdma_msg_t;
-
-typedef struct
-{
- __u64 kptlhm_matchbits; /* matchbits */
- __u32 kptlhm_max_msg_size; /* max message size */
-} WIRE_ATTR kptl_hello_msg_t;
-
-typedef struct
-{
- /* First 2 fields fixed FOR ALL TIME */
- __u32 ptlm_magic; /* I'm a Portals LND message */
- __u16 ptlm_version; /* this is my version number */
- __u8 ptlm_type; /* the message type */
- __u8 ptlm_credits; /* returned credits */
- __u32 ptlm_nob; /* # bytes in whole message */
- __u32 ptlm_cksum; /* checksum (0 == no checksum) */
- __u64 ptlm_srcnid; /* sender's NID */
- __u64 ptlm_srcstamp; /* sender's incarnation */
- __u64 ptlm_dstnid; /* destination's NID */
- __u64 ptlm_dststamp; /* destination's incarnation */
- __u32 ptlm_srcpid; /* sender's PID */
- __u32 ptlm_dstpid; /* destination's PID */
-
- union {
- kptl_immediate_msg_t immediate;
- kptl_rdma_msg_t rdma;
- kptl_hello_msg_t hello;
- } WIRE_ATTR ptlm_u;
-
-} kptl_msg_t;
-
-#define PTLLND_MSG_MAGIC LNET_PROTO_PTL_MAGIC
-#define PTLLND_MSG_VERSION 0x04
-
-#define PTLLND_RDMA_OK 0x00
-#define PTLLND_RDMA_FAIL 0x01
-
-#define PTLLND_MSG_TYPE_INVALID 0x00
-#define PTLLND_MSG_TYPE_PUT 0x01
-#define PTLLND_MSG_TYPE_GET 0x02
-#define PTLLND_MSG_TYPE_IMMEDIATE 0x03 /* No bulk data xfer*/
-#define PTLLND_MSG_TYPE_NOOP 0x04
-#define PTLLND_MSG_TYPE_HELLO 0x05
-#define PTLLND_MSG_TYPE_NAK 0x06
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * <lnet/socklnd.h>
- *
- * #defines shared between socknal implementation and utilities
- */
-#ifndef __LNET_LNET_SOCKLND_H__
-#define __LNET_LNET_SOCKLND_H__
-
-#include <lnet/types.h>
-#include <lnet/lib-types.h>
-
-#define SOCKLND_CONN_NONE (-1)
-#define SOCKLND_CONN_ANY 0
-#define SOCKLND_CONN_CONTROL 1
-#define SOCKLND_CONN_BULK_IN 2
-#define SOCKLND_CONN_BULK_OUT 3
-#define SOCKLND_CONN_NTYPES 4
-
-typedef struct {
- __u32 kshm_magic; /* magic number of socklnd message */
- __u32 kshm_version; /* version of socklnd message */
- lnet_nid_t kshm_src_nid; /* sender's nid */
- lnet_nid_t kshm_dst_nid; /* destination nid */
- lnet_pid_t kshm_src_pid; /* sender's pid */
- lnet_pid_t kshm_dst_pid; /* destination pid */
- __u64 kshm_src_incarnation; /* sender's incarnation */
- __u64 kshm_dst_incarnation; /* destination's incarnation */
- __u32 kshm_ctype; /* connection type */
- __u32 kshm_nips; /* # IP addrs */
- __u32 kshm_ips[0]; /* IP addrs */
-} WIRE_ATTR ksock_hello_msg_t;
-
-typedef struct {
- lnet_hdr_t ksnm_hdr; /* lnet hdr */
- char ksnm_payload[0];/* lnet payload */
-} WIRE_ATTR ksock_lnet_msg_t;
-
-typedef struct {
- __u32 ksm_type; /* type of socklnd message */
- __u32 ksm_csum; /* checksum if != 0 */
- __u64 ksm_zc_req_cookie; /* ack required if != 0 */
- __u64 ksm_zc_ack_cookie; /* ack if != 0 */
- union {
- ksock_lnet_msg_t lnetmsg; /* lnet message, it's empty if it's NOOP */
- } WIRE_ATTR ksm_u;
-} WIRE_ATTR ksock_msg_t;
-
-#define KSOCK_MSG_NOOP 0xc0 /* ksm_u empty */
-#define KSOCK_MSG_LNET 0xc1 /* lnet msg */
-
-/* We need to know this number to parse hello msg from ksocklnd in
- * other LND (usocklnd, for example) */
-#define KSOCK_PROTO_V2 2
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LNET_TYPES_H__
-#define __LNET_TYPES_H__
-
-#include <libcfs/libcfs.h>
-
-#define LNET_RESERVED_PORTAL 0 /* portals reserved for lnet's own use */
-
-typedef __u64 lnet_nid_t;
-typedef __u32 lnet_pid_t;
-
-#define LNET_NID_ANY ((lnet_nid_t) -1)
-#define LNET_PID_ANY ((lnet_pid_t) -1)
-
-#ifdef CRAY_XT3
-typedef __u32 lnet_uid_t;
-#define LNET_UID_ANY ((lnet_uid_t) -1)
-#endif
-
-#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
-#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
-
-#define LNET_TIME_FOREVER (-1)
-
-typedef struct {
- __u64 cookie;
-} lnet_handle_any_t;
-
-typedef lnet_handle_any_t lnet_handle_eq_t;
-typedef lnet_handle_any_t lnet_handle_md_t;
-typedef lnet_handle_any_t lnet_handle_me_t;
-
-#define LNET_INVALID_HANDLE \
- ((const lnet_handle_any_t){.cookie = -1})
-#define LNET_EQ_NONE LNET_INVALID_HANDLE
-
-static inline int LNetHandleIsEqual (lnet_handle_any_t h1, lnet_handle_any_t h2)
-{
- return (h1.cookie == h2.cookie);
-}
-
-typedef struct {
- lnet_nid_t nid;
- lnet_pid_t pid; /* node id / process id */
-} lnet_process_id_t;
-
-typedef enum {
- LNET_RETAIN = 0,
- LNET_UNLINK
-} lnet_unlink_t;
-
-typedef enum {
- LNET_INS_BEFORE,
- LNET_INS_AFTER
-} lnet_ins_pos_t;
-
-typedef struct {
- void *start;
- unsigned int length;
- int threshold;
- int max_size;
- unsigned int options;
- void *user_ptr;
- lnet_handle_eq_t eq_handle;
-} lnet_md_t;
-
-/* Max Transfer Unit (minimum supported everywhere) */
-#define LNET_MTU_BITS 20
-#define LNET_MTU (1<<LNET_MTU_BITS)
-
-/* limit on the number of entries in discontiguous MDs */
-#define LNET_MAX_IOV 256
-
-/* Max payload size */
-#ifndef LNET_MAX_PAYLOAD
-# error "LNET_MAX_PAYLOAD must be defined in config.h"
-#else
-# if (LNET_MAX_PAYLOAD < LNET_MTU)
-# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb"
-# elif defined(__KERNEL__)
-# if (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV))
-/* PAGE_SIZE is a constant: check with cpp! */
-# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb"
-# endif
-# endif
-#endif
-
-/* Options for the MD structure */
-#define LNET_MD_OP_PUT (1 << 0)
-#define LNET_MD_OP_GET (1 << 1)
-#define LNET_MD_MANAGE_REMOTE (1 << 2)
-/* unused (1 << 3) */
-#define LNET_MD_TRUNCATE (1 << 4)
-#define LNET_MD_ACK_DISABLE (1 << 5)
-#define LNET_MD_IOVEC (1 << 6)
-#define LNET_MD_MAX_SIZE (1 << 7)
-#define LNET_MD_KIOV (1 << 8)
-
-/* For compatibility with Cray Portals */
-#define LNET_MD_PHYS 0
-
-#define LNET_MD_THRESH_INF (-1)
-
-/* NB lustre portals uses struct iovec internally! */
-typedef struct iovec lnet_md_iovec_t;
-
-typedef struct {
- cfs_page_t *kiov_page;
- unsigned int kiov_len;
- unsigned int kiov_offset;
-} lnet_kiov_t;
-
-typedef enum {
- LNET_EVENT_GET,
- LNET_EVENT_PUT,
- LNET_EVENT_REPLY,
- LNET_EVENT_ACK,
- LNET_EVENT_SEND,
- LNET_EVENT_UNLINK,
-} lnet_event_kind_t;
-
-#define LNET_SEQ_BASETYPE long
-typedef unsigned LNET_SEQ_BASETYPE lnet_seq_t;
-#define LNET_SEQ_GT(a,b) (((signed LNET_SEQ_BASETYPE)((a) - (b))) > 0)
-
-/* XXX
- * cygwin need the pragma line, not clear if it's needed in other places.
- * checking!!!
- */
-#ifdef __CYGWIN__
-#pragma pack(push, 4)
-#endif
-typedef struct {
- lnet_process_id_t target;
- lnet_process_id_t initiator;
- lnet_nid_t sender;
- lnet_event_kind_t type;
- unsigned int pt_index;
- __u64 match_bits;
- unsigned int rlength;
- unsigned int mlength;
- lnet_handle_md_t md_handle;
- lnet_md_t md;
- __u64 hdr_data;
- int status;
- int unlinked;
- unsigned int offset;
-#ifdef CRAY_XT3
- lnet_uid_t uid;
-#endif
-
- volatile lnet_seq_t sequence;
-} lnet_event_t;
-#ifdef __CYGWIN__
-#pragma pop
-#endif
-
-typedef enum {
- LNET_ACK_REQ,
- LNET_NOACK_REQ
-} lnet_ack_req_t;
-
-typedef void (*lnet_eq_handler_t)(lnet_event_t *event);
-#define LNET_EQ_HANDLER_NONE NULL
-
-#endif
+++ /dev/null
-#ifndef __WINNT_API_SUPPORT_H__
-#define __WINNT_API_SUPPORT_H__
-
-#ifndef __LNET_API_SUPPORT_H__
-#error Do not #include this file directly. #include <lnet/api-support.h> instead
-#endif
-
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LNET_WINNT_LIB_LNET_H__
-#define __LNET_WINNT_LIB_LNET_H__
-
-#ifndef __LNET_LIB_LNET_H__
-#error Do not #include this file directly. #include <lnet/lib-lnet.h> instead
-#endif
-
-#ifdef __KERNEL__
-# include <libcfs/libcfs.h>
-# include <libcfs/kp30.h>
-
-static inline __u64
-lnet_page2phys (struct page *p)
-{
- return 0;
-}
-
-#else /* __KERNEL__ */
-
-#endif
-
-#endif /* __LNET_WINNT_LIB_LNET_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef __LNET_WINNT_LIB_TYPES_H__
-#define __LNET_WINNT_LIB_TYPES_H__
-
-#ifndef __LNET_LIB_TYPES_H__
-#error Do not #include this file directly. #include <lnet/lib-types.h> instead
-#endif
-
-#include <libcfs/libcfs.h>
-
-typedef struct {
- spinlock_t lock;
-} lib_ni_lock_t;
-
-static inline void lib_ni_lock_init(lib_ni_lock_t *l)
-{
- spin_lock_init(&l->lock);
-}
-
-static inline void lib_ni_lock_fini(lib_ni_lock_t *l)
-{}
-
-static inline void lib_ni_lock(lib_ni_lock_t *l)
-{
- int flags;
- spin_lock_irqsave(&l->lock, flags);
-}
-
-static inline void lib_ni_unlock(lib_ni_lock_t *l)
-{
- spin_unlock_irqrestore(&l->lock, 0);
-}
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __LNET_LINUX_LNET_H__
-#define __LNET_LINUX_LNET_H__
-
-#ifndef __LNET_H__
-#error Do not #include this file directly. #include <lnet/lnet.h> instead
-#endif
-
-#ifdef __KERNEL__
-
-#include <libcfs/libcfs.h>
-#include <lnet/lib-lnet.h>
-
-/*
- * tdilnd routines
- */
-
-
-PUCHAR
-KsNtStatusToString (IN NTSTATUS Status);
-
-
-VOID
-KsPrintf(
- IN LONG DebugPrintLevel,
- IN PCHAR DebugMessage,
- IN ...
- );
-
-
-ksock_mdl_t *
-ks_lock_iovs(
- IN struct iovec *iov,
- IN int niov,
- IN int recv,
- IN int * len
- );
-
-ksock_mdl_t *
-ks_lock_kiovs(
- IN lnet_kiov_t * kiov,
- IN int nkiov,
- IN int recv,
- IN int * len
- );
-
-int
-ks_send_mdl(
- ksock_tconn_t * tconn,
- void * tx,
- ksock_mdl_t * mdl,
- int len,
- int flags
- );
-
-int
-ks_query_data(
- ksock_tconn_t * tconn,
- size_t * size,
- int bIsExpedited);
-
-int
-ks_recv_mdl(
- ksock_tconn_t * tconn,
- ksock_mdl_t * mdl,
- int size,
- int flags
- );
-
-int
-ks_get_tcp_option (
- ksock_tconn_t * tconn,
- ULONG ID,
- PVOID OptionValue,
- PULONG Length
- );
-
-NTSTATUS
-ks_set_tcp_option (
- ksock_tconn_t * tconn,
- ULONG ID,
- PVOID OptionValue,
- ULONG Length
- );
-
-int
-ks_bind_tconn (
- ksock_tconn_t * tconn,
- ksock_tconn_t * parent,
- ulong_ptr addr,
- unsigned short port
- );
-
-int
-ks_build_tconn(
- ksock_tconn_t * tconn,
- ulong_ptr addr,
- unsigned short port
- );
-
-int
-ks_disconnect_tconn(
- ksock_tconn_t * tconn,
- ulong_ptr flags
- );
-
-void
-ks_abort_tconn(
- ksock_tconn_t * tconn
- );
-
-int
-ks_query_local_ipaddr(
- ksock_tconn_t * tconn
- );
-
-int
-ks_tconn_write (ksock_tconn_t *tconn, void *buffer, int nob);
-
-int
-ks_tconn_read (ksock_tconn_t * tconn, void *buffer, int nob);
-
-NTSTATUS
-KsTcpCompletionRoutine(
- IN PDEVICE_OBJECT DeviceObject,
- IN PIRP Irp,
- IN PVOID Context
- );
-
-NTSTATUS
-KsDisconectCompletionRoutine (
- IN PDEVICE_OBJECT DeviceObject,
- IN PIRP Irp,
- IN PVOID Context
- );
-
-NTSTATUS
-KsTcpReceiveCompletionRoutine(
- IN PIRP Irp,
- IN PKS_TCP_COMPLETION_CONTEXT Context
- );
-
-NTSTATUS
-KsTcpSendCompletionRoutine(
- IN PIRP Irp,
- IN PKS_TCP_COMPLETION_CONTEXT Context
- );
-
-NTSTATUS
-KsAcceptCompletionRoutine(
- IN PDEVICE_OBJECT DeviceObject,
- IN PIRP Irp,
- IN PVOID Context
- );
-
-
-NTSTATUS
-KsConnectEventHandler(
- IN PVOID TdiEventContext,
- IN LONG RemoteAddressLength,
- IN PVOID RemoteAddress,
- IN LONG UserDataLength,
- IN PVOID UserData,
- IN LONG OptionsLength,
- IN PVOID Options,
- OUT CONNECTION_CONTEXT * ConnectionContext,
- OUT PIRP * AcceptIrp
- );
-
-NTSTATUS
-KsDisconnectEventHandler(
- IN PVOID TdiEventContext,
- IN CONNECTION_CONTEXT ConnectionContext,
- IN LONG DisconnectDataLength,
- IN PVOID DisconnectData,
- IN LONG DisconnectInformationLength,
- IN PVOID DisconnectInformation,
- IN ULONG DisconnectFlags
- );
-
-NTSTATUS
-KsTcpReceiveEventHandler(
- IN PVOID TdiEventContext,
- IN CONNECTION_CONTEXT ConnectionContext,
- IN ULONG ReceiveFlags,
- IN ULONG BytesIndicated,
- IN ULONG BytesAvailable,
- OUT ULONG * BytesTaken,
- IN PVOID Tsdu,
- OUT PIRP * IoRequestPacket
- );
-
-NTSTATUS
-KsTcpReceiveExpeditedEventHandler(
- IN PVOID TdiEventContext,
- IN CONNECTION_CONTEXT ConnectionContext,
- IN ULONG ReceiveFlags,
- IN ULONG BytesIndicated,
- IN ULONG BytesAvailable,
- OUT ULONG * BytesTaken,
- IN PVOID Tsdu,
- OUT PIRP * IoRequestPacket
- );
-
-NTSTATUS
-KsTcpChainedReceiveEventHandler (
- IN PVOID TdiEventContext, // the event context
- IN CONNECTION_CONTEXT ConnectionContext,
- IN ULONG ReceiveFlags,
- IN ULONG ReceiveLength,
- IN ULONG StartingOffset, // offset of start of client data in TSDU
- IN PMDL Tsdu, // TSDU data chain
- IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives
- );
-
-NTSTATUS
-KsTcpChainedReceiveExpeditedEventHandler (
- IN PVOID TdiEventContext, // the event context
- IN CONNECTION_CONTEXT ConnectionContext,
- IN ULONG ReceiveFlags,
- IN ULONG ReceiveLength,
- IN ULONG StartingOffset, // offset of start of client data in TSDU
- IN PMDL Tsdu, // TSDU data chain
- IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives
- );
-
-
-
-VOID
-KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem);
-
-
-ULONG
-ks_tdi_send_flags(ULONG SockFlags);
-
-PIRP
-KsBuildTdiIrp(
- IN PDEVICE_OBJECT DeviceObject
- );
-
-NTSTATUS
-KsSubmitTdiIrp(
- IN PDEVICE_OBJECT DeviceObject,
- IN PIRP Irp,
- IN BOOLEAN bSynchronous,
- OUT PULONG Information
- );
-
-NTSTATUS
-KsOpenControl(
- IN PUNICODE_STRING DeviceName,
- OUT HANDLE * Handle,
- OUT PFILE_OBJECT * FileObject
- );
-
-NTSTATUS
-KsCloseControl(
- IN HANDLE Handle,
- IN PFILE_OBJECT FileObject
- );
-
-NTSTATUS
-KsOpenAddress(
- IN PUNICODE_STRING DeviceName,
- IN PTRANSPORT_ADDRESS pAddress,
- IN ULONG AddressLength,
- OUT HANDLE * Handle,
- OUT PFILE_OBJECT * FileObject
- );
-
-NTSTATUS
-KsCloseAddress(
- IN HANDLE Handle,
- IN PFILE_OBJECT FileObject
- );
-
-NTSTATUS
-KsOpenConnection(
- IN PUNICODE_STRING DeviceName,
- IN CONNECTION_CONTEXT ConnectionContext,
- OUT HANDLE * Handle,
- OUT PFILE_OBJECT * FileObject
- );
-
-NTSTATUS
-KsCloseConnection(
- IN HANDLE Handle,
- IN PFILE_OBJECT FileObject
- );
-
-NTSTATUS
-KsAssociateAddress(
- IN HANDLE AddressHandle,
- IN PFILE_OBJECT ConnectionObject
- );
-
-
-NTSTATUS
-KsDisassociateAddress(
- IN PFILE_OBJECT ConnectionObject
- );
-
-
-NTSTATUS
-KsSetEventHandlers(
- IN PFILE_OBJECT AddressObject,
- IN PVOID EventContext,
- IN PKS_EVENT_HANDLERS Handlers
- );
-
-
-NTSTATUS
-KsQueryProviderInfo(
- PWSTR TdiDeviceName,
- PTDI_PROVIDER_INFO ProviderInfo
- );
-
-NTSTATUS
-KsQueryAddressInfo(
- IN PFILE_OBJECT FileObject,
- OUT PTDI_ADDRESS_INFO AddressInfo,
- OUT PULONG AddressSize
- );
-
-NTSTATUS
-KsQueryConnectionInfo(
- IN PFILE_OBJECT ConnectionObject,
- OUT PTDI_CONNECTION_INFO ConnectionInfo,
- OUT PULONG ConnectionSize
- );
-
-ULONG
-KsInitializeTdiAddress(
- IN OUT PTA_IP_ADDRESS pTransportAddress,
- IN ULONG IpAddress,
- IN USHORT IpPort
- );
-
-ULONG
-KsQueryMdlsSize (IN PMDL Mdl);
-
-
-ULONG
-KsQueryTdiAddressLength(
- OUT PTRANSPORT_ADDRESS pTransportAddress
- );
-
-NTSTATUS
-KsQueryIpAddress(
- IN PFILE_OBJECT FileObject,
- OUT PVOID TdiAddress,
- OUT ULONG* AddressLength
- );
-
-
-NTSTATUS
-KsErrorEventHandler(
- IN PVOID TdiEventContext,
- IN NTSTATUS Status
- );
-
-int
-ks_set_handlers(
- ksock_tconn_t * tconn
- );
-
-
-VOID
-KsPrintProviderInfo(
- PWSTR DeviceName,
- PTDI_PROVIDER_INFO ProviderInfo
- );
-
-ksock_tconn_t *
-ks_create_tconn();
-
-void
-ks_free_tconn(
- ksock_tconn_t * tconn
- );
-
-void
-ks_init_listener(
- ksock_tconn_t * tconn
- );
-
-void
-ks_init_sender(
- ksock_tconn_t * tconn
- );
-
-void
-ks_init_child(
- ksock_tconn_t * tconn
- );
-
-void
-ks_get_tconn(
- ksock_tconn_t * tconn
- );
-
-void
-ks_put_tconn(
- ksock_tconn_t * tconn
- );
-
-int
-ks_reset_handlers(
- ksock_tconn_t * tconn
- );
-
-void
-ks_destroy_tconn(
- ksock_tconn_t * tconn
- );
-
-
-PKS_TSDU
-KsAllocateKsTsdu();
-
-VOID
-KsPutKsTsdu(
- PKS_TSDU KsTsdu
- );
-
-VOID
-KsFreeKsTsdu(
- PKS_TSDU KsTsdu
- );
-
-VOID
-KsInitializeKsTsdu(
- PKS_TSDU KsTsdu,
- ULONG Length
- );
-
-
-VOID
-KsInitializeKsTsduMgr(
- PKS_TSDUMGR TsduMgr
- );
-
-VOID
-KsInitializeKsChain(
- PKS_CHAIN KsChain
- );
-
-NTSTATUS
-KsCleanupTsduMgr(
- PKS_TSDUMGR KsTsduMgr
- );
-
-NTSTATUS
-KsCleanupKsChain(
- PKS_CHAIN KsChain
- );
-
-NTSTATUS
-KsCleanupTsdu(
- ksock_tconn_t * tconn
- );
-
-NTSTATUS
-KsCopyMdlChainToMdlChain(
- IN PMDL SourceMdlChain,
- IN ULONG SourceOffset,
- IN PMDL DestinationMdlChain,
- IN ULONG DestinationOffset,
- IN ULONG BytesTobecopied,
- OUT PULONG BytesCopied
- );
-
-ULONG
-KsQueryMdlsSize (PMDL Mdl);
-
-NTSTATUS
-KsLockUserBuffer (
- IN PVOID UserBuffer,
- IN BOOLEAN bPaged,
- IN ULONG Length,
- IN LOCK_OPERATION Operation,
- OUT PMDL * pMdl
- );
-
-PVOID
-KsMapMdlBuffer (PMDL Mdl);
-
-VOID
-KsReleaseMdl ( IN PMDL Mdl,
- IN int Paged );
-
-int
-ks_lock_buffer (
- void * buffer,
- int paged,
- int length,
- LOCK_OPERATION access,
- ksock_mdl_t ** kmdl
- );
-
-void *
-ks_map_mdl (ksock_mdl_t * mdl);
-
-void
-ks_release_mdl (ksock_mdl_t *mdl, int paged);
-
-#endif /* __KERNEL__ */
-
-#endif
+++ /dev/null
-Makefile
-autoMakefile
-autoMakefile.in
-.*.cmd
-.depend
+++ /dev/null
-@BUILD_GMLND_TRUE@subdir-m += gmlnd
-@BUILD_MXLND_TRUE@subdir-m += mxlnd
-@BUILD_RALND_TRUE@subdir-m += ralnd
-@BUILD_O2IBLND_TRUE@subdir-m += o2iblnd
-@BUILD_OPENIBLND_TRUE@subdir-m += openiblnd
-@BUILD_CIBLND_TRUE@subdir-m += ciblnd
-@BUILD_IIBLND_TRUE@subdir-m += iiblnd
-@BUILD_VIBLND_TRUE@subdir-m += viblnd
-@BUILD_QSWLND_TRUE@subdir-m += qswlnd
-@BUILD_PTLLND_TRUE@subdir-m += ptllnd
-subdir-m += socklnd
-
-@INCLUDE_RULES@
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-SUBDIRS = socklnd qswlnd gmlnd mxlnd openiblnd iiblnd viblnd ralnd ptllnd ciblnd o2iblnd
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
+++ /dev/null
-MODULES := kciblnd
-kciblnd-objs := ciblnd.o ciblnd_cb.o ciblnd_modparams.o
-
-default: all
-
-EXTRA_POST_CFLAGS := @CIBCPPFLAGS@ -I@LUSTRE@/../lnet/klnds/openiblnd
-
-@INCLUDE_RULES@
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if BUILD_CIBLND
-modulenet_DATA = kciblnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kciblnd-objs:%.o=%.c)
-
+++ /dev/null
-#include "openiblnd.c"
+++ /dev/null
-#include "openiblnd_cb.c"
+++ /dev/null
-#include "openiblnd_modparams.c"
+++ /dev/null
-.deps
-Makefile
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.cmd
-.*.flags
-.tmp_versions
-.depend
+++ /dev/null
-MODULES := kgmlnd
-kgmlnd-objs := gmlnd_api.o gmlnd_cb.o gmlnd_comm.o gmlnd_utils.o gmlnd_module.o
-
-EXTRA_PRE_CFLAGS := @GMCPPFLAGS@ -DGM_KERNEL
-
-@INCLUDE_RULES@
+++ /dev/null
-1. This version of the GM nal requires an unreleased extension to the GM API to
- map physical memory: gm_register_memory_ex_phys(). This allows it to avoid
- ENOMEM problems associated with large contiguous buffer allocation.
-
-2. ./configure --with-gm=<path-to-gm-source-tree> \
- [--with-gm-install=<path-to-gm-installation>]
-
- If the sources do not support gm_register_memory_ex_phys(), configure flags
- an error. In this case you should apply the patch and rebuild and re-install
- GM as directed in the error message.
-
- By default GM is installed in /opt/gm. If an alternate path was specified to
- <GM-sources>/binary/GM_INSTALL, you should also specify --with-gm-install
- with the same path.
-
-3. The GM timeout is 300 seconds; i.e. the network may not release resources
- claimed by communications stalled with a crashing node for this time.
- Default gmnal buffer tuning parameters (see (4) below) have been chosen to
- minimize this problem and prevent lustre having to block for resources.
- However in some situations, where all network buffers are busy, the default
- lustre timeout (various, scaled from the base timeout of 100 seconds) may be
- too small and the only solution may be to increase the lustre timeout
- dramatically.
-
-4. The gmnal has the following module parameters...
-
- gmnal_port The GM port that the NAL will use (default 4)
- Change this if it conflicts with site usage.
-
- gmnal_ntx The number of "normal" transmit descriptors (default
- 32). When this pool is exhausted, threads sending
- and receiving on the network block until in-progress
- transmits have completed. Each descriptor consumes 1
- GM_MTU sized buffer.
-
- gmnal_ntx_nblk The number of "reserved" transmit descriptors
- (default 256). This pool is reserved for responses to
- incoming communications that may not block. Increase
- only if console error messages indicates the pool
- has been exhausted (LustreError: Can't get tx for
- msg type...) Each descriptor consumes 1 GM_MTU sized
- buffer.
-
- gmnal_nlarge_tx_bufs The number of 1MByte transmit buffers to reserve at
- startup (default 32). This controls the number of
- concurrent sends larger that GM_MTU. It can be
- reduced to conserve memory, or increased to increase
- large message sending concurrency.
-
- gmnal_nrx_small The number of GM_MTU sized receive buffers posted to
- receive from the network (default 128). Increase if
- congestion is suspected, however note that the total
- number of receives that can be posted at any time is
- limited by the number of GM receive tokens
- available. If there are too few, this, and
- gmnal_nrx_large are scaled back accordingly.
-
- gmnal_nrx_large The number of 1MByte receive buffers posted to
- receive from the network (default 64). Increase if
- the number of OST threads is increased. But note
- that the total number of receives that can be posted
- at any time is limited by the number of GM receive
- tokens available. If there are too few, this, and
- gmnal_nrx_small are scaled back accordingly.
-
-5. Network configuration for GM is done in an lmc script as follows...
-
- GM2NID=${path-to-lustre-tree}/portals/utils/gmnalnid
-
- ${LMC} --node some_server --add net --nettype gm --nid `$GM2NID -n some_server`
-
- ${LMC} --node client --add net --nettype gm --nid '*'
-
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if BUILD_GMLND
-modulenet_DATA = kgmlnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kgmlnd-objs:%.o=%.c) gmlnd.h
+++ /dev/null
-Index: libgm/gm_register.c
-===================================================================
-RCS file: /repository/gm/libgm/gm_register.c,v
-retrieving revision 1.9.16.3
-diff -u -r1.9.16.3 gm_register.c
---- libgm/gm_register.c 9 Aug 2005 14:37:02 -0000 1.9.16.3
-+++ libgm/gm_register.c 25 Aug 2005 21:35:58 -0000
-@@ -77,20 +77,14 @@
-
- */
-
--GM_ENTRY_POINT
--gm_status_t
--gm_register_memory_ex (gm_port_t *p, void *_ptr, gm_size_t length, void *_pvma)
-+static gm_status_t
-+_gm_register_memory (gm_port_t *p, int is_physical, gm_u64_t ptr, gm_size_t length, gm_up_t pvma)
- {
- gm_status_t status;
-- gm_up_t ptr;
-- gm_up_t pvma;
-
- GM_CALLED_WITH_ARGS (("%p,%p,"GM_U64_TMPL",%p",
- p, _ptr, GM_U64_ARG (length), _pvma));
-
-- ptr = GM_PTR_TO_UP (_ptr);
-- pvma = GM_PTR_TO_UP (_pvma);
--
- #if !GM_KERNEL && !GM_CAN_REGISTER_MEMORY
- GM_PARAMETER_MAY_BE_UNUSED (p);
- GM_PARAMETER_MAY_BE_UNUSED (ptr);
-@@ -160,7 +154,7 @@
- status = gm_add_mapping_to_page_table (ps,
- ptr + offset,
- pvma + offset,
-- GM_INVALID_DMA_PAGE);
-+ is_physical ? ptr + offset : GM_INVALID_DMA_PAGE);
- if (status != GM_SUCCESS)
- {
- status = GM_INVALID_PARAMETER;
-@@ -317,13 +311,31 @@
-
- */
-
-+#if GM_KERNEL && (GM_CPU_x86 || GM_CPU_x86_64 || GM_CPU_ia64)
-+/* only architecture where pci bus addr == physical address can use
-+ such a simple scheme */
-+GM_ENTRY_POINT gm_status_t
-+gm_register_memory_ex_phys (struct gm_port *p,
-+ gm_u64_t phys, gm_size_t length,
-+ gm_up_t pvma)
-+{
-+ return _gm_register_memory(p, 1, phys, length, (gm_size_t)pvma);
-+}
-+#endif
-+
-+GM_ENTRY_POINT gm_status_t
-+gm_register_memory_ex (gm_port_t *p, void *ptr, gm_size_t length, void *pvma)
-+{
-+ return _gm_register_memory(p, 0, (gm_size_t)ptr, length, (gm_size_t)pvma);
-+}
-+
- GM_ENTRY_POINT gm_status_t
- gm_register_memory (gm_port_t *p, void *ptr, gm_size_t length)
- {
- gm_status_t status;
-
- GM_CALLED_WITH_ARGS (("%p,%p,"GM_U64_TMPL, p, ptr, GM_U64_ARG (length)));
-- status = gm_register_memory_ex (p, ptr, length, ptr);
-+ status = _gm_register_memory(p, 0, (gm_size_t)ptr, length, (gm_size_t)ptr);
- GM_RETURN_STATUS (status);
- }
-
-Index: include/gm.h
-===================================================================
-RCS file: /repository/gm/include/gm.h,v
-retrieving revision 1.25.10.11
-diff -u -r1.25.10.11 gm.h
---- include/gm.h 14 Mar 2005 21:42:41 -0000 1.25.10.11
-+++ include/gm.h 25 Aug 2005 21:35:58 -0000
-@@ -2676,6 +2676,10 @@
- GM_ENTRY_POINT gm_status_t gm_register_memory_ex (struct gm_port *p,
- void *ptr, gm_size_t length,
- void *pvma);
-+
-+GM_ENTRY_POINT gm_status_t gm_register_memory_ex_phys (struct gm_port *p,
-+ gm_u64_t phys, gm_size_t length,
-+ gm_up_t pvma);
- #endif /* GM_API_VERSION >= GM_API_VERSION_2_0_6 */
-
- #if GM_API_VERSION >= GM_API_VERSION_2_1_0
-Index: libgm/gm_reference_api.c
-===================================================================
-RCS file: /repository/gm/libgm/gm_reference_api.c,v
-retrieving revision 1.3.14.1
-diff -u -r1.3.14.1 gm_reference_api.c
---- libgm/gm_reference_api.c 23 Apr 2004 20:27:29 -0000 1.3.14.1
-+++ libgm/gm_reference_api.c 25 Aug 2005 22:39:20 -0000
-@@ -154,6 +154,9 @@
- GM_REF (gm_register_buffer);
- GM_REF (gm_register_memory);
- GM_REF (gm_register_memory_ex);
-+#if GM_KERNEL && (GM_CPU_x86 || GM_CPU_x86_64 || GM_CPU_ia64)
-+GM_REF (gm_register_memory_ex_phys);
-+#endif
- GM_REF (gm_resume_sending);
- GM_REF (gm_send);
- GM_REF (gm_send_to_peer);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- *
- * This file is part of Lustre, http://www.lustre.org/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-/*
- * Portals GM kernel NAL header file
- * This file makes all declaration and prototypes
- * for the API side and CB side of the NAL
- */
-#ifndef __INCLUDE_GMNAL_H__
-#define __INCLUDE_GMNAL_H__
-
-/* XXX Lustre as of V1.2.2 drop defines VERSION, which causes problems
- * when including <GM>/include/gm_lanai.h which defines a structure field
- * with the name VERSION XXX */
-#ifdef VERSION
-# undef VERSION
-#endif
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include "linux/module.h"
-#include "linux/tty.h"
-#include "linux/kernel.h"
-#include "linux/mm.h"
-#include "linux/string.h"
-#include "linux/stat.h"
-#include "linux/errno.h"
-#include "linux/version.h"
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include "linux/buffer_head.h"
-#include "linux/fs.h"
-#else
-#include "linux/locks.h"
-#endif
-#include "linux/unistd.h"
-#include "linux/init.h"
-#include "linux/sem.h"
-#include "linux/vmalloc.h"
-#include "linux/sysctl.h"
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include "libcfs/kp30.h"
-#include "lnet/lnet.h"
-#include "lnet/lib-lnet.h"
-
-/* undefine these before including the GM headers which clash */
-#undef PACKAGE_BUGREPORT
-#undef PACKAGE_NAME
-#undef PACKAGE_STRING
-#undef PACKAGE_TARNAME
-#undef PACKAGE_VERSION
-
-#define GM_STRONG_TYPES 1
-#ifdef VERSION
-#undef VERSION
-#endif
-#include "gm.h"
-#include "gm_internal.h"
-
-/* Fixed tunables */
-#define GMNAL_RESCHED 100 /* # busy loops to force scheduler to yield */
-#define GMNAL_NETADDR_BASE 0x10000000 /* where we start in network VM */
-#define GMNAL_LARGE_PRIORITY GM_LOW_PRIORITY /* large message GM priority */
-#define GMNAL_SMALL_PRIORITY GM_LOW_PRIORITY /* small message GM priority */
-
-/* Wire protocol */
-typedef struct {
- lnet_hdr_t gmim_hdr; /* portals header */
- char gmim_payload[0]; /* payload */
-} gmnal_immediate_msg_t;
-
-typedef struct {
- /* First 2 fields fixed FOR ALL TIME */
- __u32 gmm_magic; /* I'm a GM message */
- __u16 gmm_version; /* this is my version number */
-
- __u16 gmm_type; /* msg type */
- __u64 gmm_srcnid; /* sender's NID */
- __u64 gmm_dstnid; /* destination's NID */
- union {
- gmnal_immediate_msg_t immediate;
- } gmm_u;
-} WIRE_ATTR gmnal_msg_t;
-
-#define GMNAL_MSG_MAGIC LNET_PROTO_GM_MAGIC
-#define GMNAL_MSG_VERSION 1
-#define GMNAL_MSG_IMMEDIATE 1
-
-typedef struct netbuf {
- __u64 nb_netaddr; /* network VM address */
- lnet_kiov_t nb_kiov[1]; /* the pages (at least 1) */
-} gmnal_netbuf_t;
-
-#define GMNAL_NETBUF_MSG(nb) ((gmnal_msg_t *)page_address((nb)->nb_kiov[0].kiov_page))
-#define GMNAL_NETBUF_LOCAL_NETADDR(nb) ((void *)((unsigned long)(nb)->nb_netaddr))
-
-typedef struct gmnal_txbuf {
- struct list_head txb_list; /* queue on gmni_idle_ltxbs */
- struct gmnal_txbuf *txb_next; /* stash on gmni_ltxs */
- gmnal_netbuf_t txb_buf; /* space */
-} gmnal_txbuf_t;
-
-typedef struct gmnal_tx {
- struct list_head tx_list; /* queue */
- int tx_credit:1; /* consumed a credit? */
- int tx_large_iskiov:1; /* large is in kiovs? */
- struct gmnal_ni *tx_gmni; /* owning NI */
- lnet_nid_t tx_nid; /* destination NID */
- int tx_gmlid; /* destination GM local ID */
- lnet_msg_t *tx_lntmsg; /* lntmsg to finalize on completion */
-
- gmnal_netbuf_t tx_buf; /* small tx buffer */
- gmnal_txbuf_t *tx_ltxb; /* large buffer (to free on completion) */
- int tx_msgnob; /* message size (so far) */
-
- int tx_large_nob; /* # bytes large buffer payload */
- int tx_large_offset; /* offset within frags */
- int tx_large_niov; /* # VM frags */
- union {
- struct iovec *iov; /* mapped frags */
- lnet_kiov_t *kiov; /* page frags */
- } tx_large_frags;
- cfs_time_t tx_launchtime; /* when (in jiffies) the
- * transmit was launched */
- struct gmnal_tx *tx_next; /* stash on gmni_txs */
-} gmnal_tx_t;
-
-typedef struct gmnal_rx {
- struct list_head rx_list; /* enqueue on gmni_rxq for handling */
- int rx_islarge:1; /* large receive buffer? */
- unsigned int rx_recv_nob; /* bytes received */
- __u16 rx_recv_gmid; /* sender */
- __u8 rx_recv_port; /* sender's port */
- __u8 rx_recv_type; /* ?? */
- struct gmnal_rx *rx_next; /* stash on gmni_rxs */
- gmnal_netbuf_t rx_buf; /* the buffer */
-} gmnal_rx_t;
-
-typedef struct gmnal_ni {
- lnet_ni_t *gmni_ni; /* generic NI */
- struct gm_port *gmni_port; /* GM port */
- spinlock_t gmni_gm_lock; /* serialise GM calls */
- int gmni_large_pages; /* # pages in a large message buffer */
- int gmni_large_msgsize; /* nob in large message buffers */
- int gmni_large_gmsize; /* large message GM bucket */
- int gmni_small_msgsize; /* nob in small message buffers */
- int gmni_small_gmsize; /* small message GM bucket */
- __u64 gmni_netaddr_base; /* base of mapped network VM */
- int gmni_netaddr_size; /* # bytes of mapped network VM */
-
- gmnal_tx_t *gmni_txs; /* all txs */
- gmnal_rx_t *gmni_rxs; /* all rx descs */
- gmnal_txbuf_t *gmni_ltxbs; /* all large tx bufs */
-
- atomic_t gmni_nthreads; /* total # threads */
- gm_alarm_t gmni_alarm; /* alarm to wake caretaker */
- int gmni_shutdown; /* tell all threads to exit */
-
- struct list_head gmni_idle_txs; /* idle tx's */
- int gmni_tx_credits; /* # transmits still possible */
- struct list_head gmni_idle_ltxbs; /* idle large tx buffers */
- struct list_head gmni_buf_txq; /* tx's waiting for buffers */
- struct list_head gmni_cred_txq; /* tx's waiting for credits */
- spinlock_t gmni_tx_lock; /* serialise */
-
- struct gm_hash *gmni_rx_hash; /* buffer->rx lookup */
- struct semaphore gmni_rx_mutex; /* serialise blocking on GM */
-} gmnal_ni_t;
-
-typedef struct {
- int *gm_port;
- int *gm_ntx;
- int *gm_credits;
- int *gm_peer_credits;
- int *gm_nlarge_tx_bufs;
- int *gm_nrx_small;
- int *gm_nrx_large;
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *gm_sysctl; /* sysctl interface */
-#endif
-} gmnal_tunables_t;
-
-
-/* gmnal_api.c */
-int gmnal_init(void);
-void gmnal_fini(void);
-int gmnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int gmnal_startup(lnet_ni_t *ni);
-void gmnal_shutdown(lnet_ni_t *ni);
-
-/* gmnal_cb.c */
-int gmnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-int gmnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-
-/* gmnal_util.c */
-void gmnal_free_ltxbufs(gmnal_ni_t *gmni);
-int gmnal_alloc_ltxbufs(gmnal_ni_t *gmni);
-void gmnal_free_txs(gmnal_ni_t *gmni);
-int gmnal_alloc_txs(gmnal_ni_t *gmni);
-void gmnal_free_rxs(gmnal_ni_t *gmni);
-int gmnal_alloc_rxs(gmnal_ni_t *gmni);
-char *gmnal_gmstatus2str(gm_status_t status);
-char *gmnal_rxevent2str(gm_recv_event_t *ev);
-void gmnal_yield(int delay);
-
-/* gmnal_comm.c */
-void gmnal_post_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx);
-gmnal_tx_t *gmnal_get_tx(gmnal_ni_t *gmni);
-void gmnal_tx_done(gmnal_tx_t *tx, int rc);
-void gmnal_pack_msg(gmnal_ni_t *gmni, gmnal_msg_t *msg,
- lnet_nid_t dstnid, int type);
-void gmnal_stop_threads(gmnal_ni_t *gmni);
-int gmnal_start_threads(gmnal_ni_t *gmni);
-void gmnal_check_txqueues_locked (gmnal_ni_t *gmni);
-
-/* Module Parameters */
-extern gmnal_tunables_t gmnal_tunables;
-
-#endif /*__INCLUDE_GMNAL_H__*/
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- *
- * This file is part of Lustre, http://www.lustre.org/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * Implements the API NAL functions
- */
-
-#include "gmlnd.h"
-
-lnd_t the_gmlnd =
-{
- .lnd_type = GMLND,
- .lnd_startup = gmnal_startup,
- .lnd_shutdown = gmnal_shutdown,
- .lnd_ctl = gmnal_ctl,
- .lnd_send = gmnal_send,
- .lnd_recv = gmnal_recv,
-};
-
-gmnal_ni_t *the_gmni = NULL;
-
-int
-gmnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
-
- switch (cmd) {
- case IOC_LIBCFS_REGISTER_MYNID:
- if (data->ioc_nid == ni->ni_nid)
- return 0;
-
- LASSERT (LNET_NIDNET(data->ioc_nid) == LNET_NIDNET(ni->ni_nid));
-
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID for %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- return 0;
-
- default:
- return (-EINVAL);
- }
-}
-
-int
-gmnal_set_local_nid (gmnal_ni_t *gmni)
-{
- lnet_ni_t *ni = gmni->gmni_ni;
- __u32 local_gmid;
- __u32 global_gmid;
- gm_status_t gm_status;
-
- /* Called before anything initialised: no need to lock */
- gm_status = gm_get_node_id(gmni->gmni_port, &local_gmid);
- if (gm_status != GM_SUCCESS)
- return 0;
-
- CDEBUG(D_NET, "Local node id is [%u]\n", local_gmid);
-
- gm_status = gm_node_id_to_global_id(gmni->gmni_port,
- local_gmid,
- &global_gmid);
- if (gm_status != GM_SUCCESS)
- return 0;
-
- CDEBUG(D_NET, "Global node id is [%u]\n", global_gmid);
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), global_gmid);
- return 1;
-}
-
-void
-gmnal_shutdown(lnet_ni_t *ni)
-{
- gmnal_ni_t *gmni = ni->ni_data;
-
- CDEBUG(D_TRACE, "gmnal_api_shutdown: gmni [%p]\n", gmni);
-
- LASSERT (gmni == the_gmni);
-
- /* stop processing messages */
- gmnal_stop_threads(gmni);
-
- /* stop all network callbacks */
- gm_close(gmni->gmni_port);
- gmni->gmni_port = NULL;
-
- gm_finalize();
-
- gmnal_free_ltxbufs(gmni);
- gmnal_free_txs(gmni);
- gmnal_free_rxs(gmni);
-
- LIBCFS_FREE(gmni, sizeof(*gmni));
-
- the_gmni = NULL;
- PORTAL_MODULE_UNUSE;
-}
-
-int
-gmnal_startup(lnet_ni_t *ni)
-{
- gmnal_ni_t *gmni = NULL;
- gmnal_rx_t *rx = NULL;
- gm_status_t gm_status;
- int rc;
-
- LASSERT (ni->ni_lnd == &the_gmlnd);
-
- ni->ni_maxtxcredits = *gmnal_tunables.gm_credits;
- ni->ni_peertxcredits = *gmnal_tunables.gm_peer_credits;
-
- if (the_gmni != NULL) {
- CERROR("Only 1 instance supported\n");
- return -EINVAL;
- }
-
- LIBCFS_ALLOC(gmni, sizeof(*gmni));
- if (gmni == NULL) {
- CERROR("can't allocate gmni\n");
- return -ENOMEM;
- }
-
- ni->ni_data = gmni;
-
- memset(gmni, 0, sizeof(*gmni));
- gmni->gmni_ni = ni;
- spin_lock_init(&gmni->gmni_tx_lock);
- spin_lock_init(&gmni->gmni_gm_lock);
- INIT_LIST_HEAD(&gmni->gmni_idle_txs);
- INIT_LIST_HEAD(&gmni->gmni_idle_ltxbs);
- INIT_LIST_HEAD(&gmni->gmni_buf_txq);
- INIT_LIST_HEAD(&gmni->gmni_cred_txq);
- sema_init(&gmni->gmni_rx_mutex, 1);
- PORTAL_MODULE_USE;
-
- /*
- * initialise the interface,
- */
- CDEBUG(D_NET, "Calling gm_init\n");
- if (gm_init() != GM_SUCCESS) {
- CERROR("call to gm_init failed\n");
- goto failed_0;
- }
-
- CDEBUG(D_NET, "Calling gm_open with port [%d], version [%d]\n",
- *gmnal_tunables.gm_port, GM_API_VERSION);
-
- gm_status = gm_open(&gmni->gmni_port, 0, *gmnal_tunables.gm_port,
- "gmnal", GM_API_VERSION);
-
- if (gm_status != GM_SUCCESS) {
- CERROR("Can't open GM port %d: %d (%s)\n",
- *gmnal_tunables.gm_port, gm_status,
- gmnal_gmstatus2str(gm_status));
- goto failed_1;
- }
-
- CDEBUG(D_NET,"gm_open succeeded port[%p]\n",gmni->gmni_port);
-
- if (!gmnal_set_local_nid(gmni))
- goto failed_2;
-
- CDEBUG(D_NET, "portals_nid is %s\n", libcfs_nid2str(ni->ni_nid));
-
- gmni->gmni_large_msgsize =
- offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[LNET_MAX_PAYLOAD]);
- gmni->gmni_large_gmsize =
- gm_min_size_for_length(gmni->gmni_large_msgsize);
- gmni->gmni_large_pages =
- (gmni->gmni_large_msgsize + PAGE_SIZE - 1)/PAGE_SIZE;
-
- gmni->gmni_small_msgsize = MIN(GM_MTU, PAGE_SIZE);
- gmni->gmni_small_gmsize =
- gm_min_size_for_length(gmni->gmni_small_msgsize);
-
- gmni->gmni_netaddr_base = GMNAL_NETADDR_BASE;
- gmni->gmni_netaddr_size = 0;
-
- CDEBUG(D_NET, "Msg size %08x/%08x [%d/%d]\n",
- gmni->gmni_large_msgsize, gmni->gmni_small_msgsize,
- gmni->gmni_large_gmsize, gmni->gmni_small_gmsize);
-
- if (gmnal_alloc_rxs(gmni) != 0) {
- CERROR("Failed to allocate rx descriptors\n");
- goto failed_2;
- }
-
- if (gmnal_alloc_txs(gmni) != 0) {
- CERROR("Failed to allocate tx descriptors\n");
- goto failed_2;
- }
-
- if (gmnal_alloc_ltxbufs(gmni) != 0) {
- CERROR("Failed to allocate large tx buffers\n");
- goto failed_2;
- }
-
- rc = gmnal_start_threads(gmni);
- if (rc != 0) {
- CERROR("Can't start threads: %d\n", rc);
- goto failed_2;
- }
-
- /* Start listening */
- for (rx = gmni->gmni_rxs; rx != NULL; rx = rx->rx_next)
- gmnal_post_rx(gmni, rx);
-
- the_gmni = gmni;
-
- CDEBUG(D_NET, "gmnal_init finished\n");
- return 0;
-
- failed_2:
- gm_close(gmni->gmni_port);
- gmni->gmni_port = NULL;
-
- failed_1:
- gm_finalize();
-
- failed_0:
- /* safe to free descriptors after network has been shut down */
- gmnal_free_ltxbufs(gmni);
- gmnal_free_txs(gmni);
- gmnal_free_rxs(gmni);
-
- LIBCFS_FREE(gmni, sizeof(*gmni));
- PORTAL_MODULE_UNUSE;
-
- return -EIO;
-}
-
-/*
- * Called when module loaded
- */
-int gmnal_init(void)
-{
- lnet_register_lnd(&the_gmlnd);
- return 0;
-}
-
-/*
- * Called when module removed
- */
-void gmnal_fini()
-{
- lnet_unregister_lnd(&the_gmlnd);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- *
- * This file is part of Lustre, http://www.lustre.org/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-/*
- * This file implements the nal cb functions
- */
-
-
-#include "gmlnd.h"
-
-int
-gmnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- gmnal_ni_t *gmni = ni->ni_data;
- gmnal_rx_t *rx = (gmnal_rx_t*)private;
- gmnal_msg_t *msg = GMNAL_NETBUF_MSG(&rx->rx_buf);
- int npages = rx->rx_islarge ? gmni->gmni_large_pages : 1;
- int payload_offset = offsetof(gmnal_msg_t,
- gmm_u.immediate.gmim_payload[0]);
- int nob = payload_offset + mlen;
-
- LASSERT (msg->gmm_type == GMNAL_MSG_IMMEDIATE);
- LASSERT (iov == NULL || kiov == NULL);
-
- if (rx->rx_recv_nob < nob) {
- CERROR("Short message from nid %s: got %d, need %d\n",
- libcfs_nid2str(msg->gmm_srcnid), rx->rx_recv_nob, nob);
- gmnal_post_rx(gmni, rx);
- return -EIO;
- }
-
- if (kiov != NULL)
- lnet_copy_kiov2kiov(niov, kiov, offset,
- npages, rx->rx_buf.nb_kiov, payload_offset,
- mlen);
- else
- lnet_copy_kiov2iov(niov, iov, offset,
- npages, rx->rx_buf.nb_kiov, payload_offset,
- mlen);
-
- lnet_finalize(ni, lntmsg, 0);
- gmnal_post_rx(gmni, rx);
- return 0;
-}
-
-int
-gmnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- lnet_hdr_t *hdr= &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- unsigned int niov = lntmsg->msg_niov;
- struct iovec *iov = lntmsg->msg_iov;
- lnet_kiov_t *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int len = lntmsg->msg_len;
- gmnal_ni_t *gmni = ni->ni_data;
- gm_status_t gmrc;
- gmnal_tx_t *tx;
-
- LASSERT (iov == NULL || kiov == NULL);
-
- /* I may not block for a tx if I'm responding to an incoming message */
- tx = gmnal_get_tx(gmni);
- if (tx == NULL) {
- if (!gmni->gmni_shutdown)
- CERROR ("Can't get tx for msg type %d for %s\n",
- type, libcfs_nid2str(target.nid));
- return -EIO;
- }
-
- tx->tx_nid = target.nid;
-
- gmrc = gm_global_id_to_node_id(gmni->gmni_port, LNET_NIDADDR(target.nid),
- &tx->tx_gmlid);
- if (gmrc != GM_SUCCESS) {
- CERROR("Can't map Nid %s to a GM local ID: %d\n",
- libcfs_nid2str(target.nid), gmrc);
- /* NB tx_lntmsg not set => doesn't finalize */
- gmnal_tx_done(tx, -EIO);
- return -EIO;
- }
-
- gmnal_pack_msg(gmni, GMNAL_NETBUF_MSG(&tx->tx_buf),
- target.nid, GMNAL_MSG_IMMEDIATE);
- GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_u.immediate.gmim_hdr = *hdr;
- tx->tx_msgnob = offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[0]);
-
- if (the_lnet.ln_testprotocompat != 0) {
- /* single-shot proto test */
- LNET_LOCK();
- if ((the_lnet.ln_testprotocompat & 1) != 0) {
- GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_version++;
- the_lnet.ln_testprotocompat &= ~1;
- }
- if ((the_lnet.ln_testprotocompat & 2) != 0) {
- GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_magic =
- LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~2;
- }
- LNET_UNLOCK();
- }
-
- if (tx->tx_msgnob + len <= gmni->gmni_small_msgsize) {
- /* whole message fits in tx_buf */
- char *buffer = &(GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_u.immediate.gmim_payload[0]);
-
- if (iov != NULL)
- lnet_copy_iov2flat(len, buffer, 0,
- niov, iov, offset, len);
- else
- lnet_copy_kiov2flat(len, buffer, 0,
- niov, kiov, offset, len);
-
- tx->tx_msgnob += len;
- tx->tx_large_nob = 0;
- } else {
- /* stash payload pts to copy later */
- tx->tx_large_nob = len;
- tx->tx_large_iskiov = (kiov != NULL);
- tx->tx_large_niov = niov;
- if (tx->tx_large_iskiov)
- tx->tx_large_frags.kiov = kiov;
- else
- tx->tx_large_frags.iov = iov;
- }
-
- LASSERT(tx->tx_lntmsg == NULL);
- tx->tx_lntmsg = lntmsg;
-
- spin_lock(&gmni->gmni_tx_lock);
-
- list_add_tail(&tx->tx_list, &gmni->gmni_buf_txq);
- gmnal_check_txqueues_locked(gmni);
-
- spin_unlock(&gmni->gmni_tx_lock);
-
- return 0;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- *
- * This file is part of Lustre, http://www.lustre.org/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * This file contains all gmnal send and receive functions
- */
-
-#include "gmlnd.h"
-
-void
-gmnal_notify_peer_down(gmnal_tx_t *tx)
-{
- time_t then;
-
- then = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() -
- tx->tx_launchtime);
-
- lnet_notify(tx->tx_gmni->gmni_ni, tx->tx_nid, 0, then);
-}
-
-void
-gmnal_pack_msg(gmnal_ni_t *gmni, gmnal_msg_t *msg,
- lnet_nid_t dstnid, int type)
-{
- /* CAVEAT EMPTOR! this only sets the common message fields. */
- msg->gmm_magic = GMNAL_MSG_MAGIC;
- msg->gmm_version = GMNAL_MSG_VERSION;
- msg->gmm_type = type;
- msg->gmm_srcnid = lnet_ptlcompat_srcnid(gmni->gmni_ni->ni_nid,
- dstnid);
- msg->gmm_dstnid = dstnid;
-}
-
-int
-gmnal_unpack_msg(gmnal_ni_t *gmni, gmnal_rx_t *rx)
-{
- gmnal_msg_t *msg = GMNAL_NETBUF_MSG(&rx->rx_buf);
- const int hdr_size = offsetof(gmnal_msg_t, gmm_u);
- int buffnob = rx->rx_islarge ? gmni->gmni_large_msgsize :
- gmni->gmni_small_msgsize;
- int flip;
-
- /* rc = 0:SUCCESS -ve:failure +ve:version mismatch */
-
- /* GM may not overflow our buffer */
- LASSERT (rx->rx_recv_nob <= buffnob);
-
- /* 6 bytes are enough to have received magic + version */
- if (rx->rx_recv_nob < 6) {
- CERROR("Short message from gmid %u: %d\n",
- rx->rx_recv_gmid, rx->rx_recv_nob);
- return -EPROTO;
- }
-
- if (msg->gmm_magic == GMNAL_MSG_MAGIC) {
- flip = 0;
- } else if (msg->gmm_magic == __swab32(GMNAL_MSG_MAGIC)) {
- flip = 1;
- } else if (msg->gmm_magic == LNET_PROTO_MAGIC ||
- msg->gmm_magic == __swab32(LNET_PROTO_MAGIC)) {
- return EPROTO;
- } else {
- CERROR("Bad magic from gmid %u: %08x\n",
- rx->rx_recv_gmid, msg->gmm_magic);
- return -EPROTO;
- }
-
- if (msg->gmm_version !=
- (flip ? __swab16(GMNAL_MSG_VERSION) : GMNAL_MSG_VERSION)) {
- return EPROTO;
- }
-
- if (rx->rx_recv_nob < hdr_size) {
- CERROR("Short message from %u: %d\n",
- rx->rx_recv_gmid, rx->rx_recv_nob);
- return -EPROTO;
- }
-
- if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
- __swab16s(&msg->gmm_version);
- __swab16s(&msg->gmm_type);
- __swab64s(&msg->gmm_srcnid);
- __swab64s(&msg->gmm_dstnid);
- }
-
- if (msg->gmm_srcnid == LNET_NID_ANY) {
- CERROR("Bad src nid from %u: %s\n",
- rx->rx_recv_gmid, libcfs_nid2str(msg->gmm_srcnid));
- return -EPROTO;
- }
-
- if (!lnet_ptlcompat_matchnid(gmni->gmni_ni->ni_nid,
- msg->gmm_dstnid)) {
- CERROR("Bad dst nid from %u: %s\n",
- rx->rx_recv_gmid, libcfs_nid2str(msg->gmm_dstnid));
- return -EPROTO;
- }
-
- switch (msg->gmm_type) {
- default:
- CERROR("Unknown message type from %u: %x\n",
- rx->rx_recv_gmid, msg->gmm_type);
- return -EPROTO;
-
- case GMNAL_MSG_IMMEDIATE:
- if (rx->rx_recv_nob < offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[0])) {
- CERROR("Short IMMEDIATE from %u: %d("LPSZ")\n",
- rx->rx_recv_gmid, rx->rx_recv_nob,
- offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[0]));
- return -EPROTO;
- }
- break;
- }
- return 0;
-}
-
-gmnal_tx_t *
-gmnal_get_tx(gmnal_ni_t *gmni)
-{
- gmnal_tx_t *tx = NULL;
-
- spin_lock(&gmni->gmni_tx_lock);
-
- if (gmni->gmni_shutdown ||
- list_empty(&gmni->gmni_idle_txs)) {
- spin_unlock(&gmni->gmni_tx_lock);
- return NULL;
- }
-
- tx = list_entry(gmni->gmni_idle_txs.next, gmnal_tx_t, tx_list);
- list_del(&tx->tx_list);
-
- spin_unlock(&gmni->gmni_tx_lock);
-
- LASSERT (tx->tx_lntmsg == NULL);
- LASSERT (tx->tx_ltxb == NULL);
- LASSERT (!tx->tx_credit);
-
- return tx;
-}
-
-void
-gmnal_tx_done(gmnal_tx_t *tx, int rc)
-{
- gmnal_ni_t *gmni = tx->tx_gmni;
- int wake_sched = 0;
- lnet_msg_t *lnetmsg = tx->tx_lntmsg;
-
- tx->tx_lntmsg = NULL;
-
- spin_lock(&gmni->gmni_tx_lock);
-
- if (tx->tx_ltxb != NULL) {
- wake_sched = 1;
- list_add_tail(&tx->tx_ltxb->txb_list, &gmni->gmni_idle_ltxbs);
- tx->tx_ltxb = NULL;
- }
-
- if (tx->tx_credit) {
- wake_sched = 1;
- gmni->gmni_tx_credits++;
- tx->tx_credit = 0;
- }
-
- list_add_tail(&tx->tx_list, &gmni->gmni_idle_txs);
-
- if (wake_sched)
- gmnal_check_txqueues_locked(gmni);
-
- spin_unlock(&gmni->gmni_tx_lock);
-
- /* Delay finalize until tx is free */
- if (lnetmsg != NULL)
- lnet_finalize(gmni->gmni_ni, lnetmsg, rc);
-}
-
-void
-gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
- gm_status_t status)
-{
- gmnal_tx_t *tx = (gmnal_tx_t*)context;
-
- LASSERT(!in_interrupt());
-
- CDEBUG(D_NET, "status for tx [%p] is [%d][%s], nid %s\n",
- tx, status, gmnal_gmstatus2str(status),
- libcfs_nid2str(tx->tx_nid));
-
- gmnal_tx_done(tx, -EIO);
-}
-
-void
-gmnal_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
-{
- gmnal_tx_t *tx = (gmnal_tx_t*)context;
- gmnal_ni_t *gmni = tx->tx_gmni;
-
- LASSERT(!in_interrupt());
-
- switch(status) {
- case GM_SUCCESS:
- gmnal_tx_done(tx, 0);
- return;
-
- case GM_SEND_DROPPED:
- CDEBUG(D_NETERROR, "Dropped tx %p to %s\n",
- tx, libcfs_nid2str(tx->tx_nid));
- /* Another tx failed and called gm_drop_sends() which made this
- * one complete immediately */
- gmnal_tx_done(tx, -EIO);
- return;
-
- default:
- /* Some error; NB don't complete tx yet; we need its credit for
- * gm_drop_sends() */
- CDEBUG(D_NETERROR, "tx %p error %d(%s), nid %s\n",
- tx, status, gmnal_gmstatus2str(status),
- libcfs_nid2str(tx->tx_nid));
-
- gmnal_notify_peer_down(tx);
-
- spin_lock(&gmni->gmni_gm_lock);
- gm_drop_sends(gmni->gmni_port,
- tx->tx_ltxb != NULL ?
- GMNAL_LARGE_PRIORITY : GMNAL_SMALL_PRIORITY,
- tx->tx_gmlid, *gmnal_tunables.gm_port,
- gmnal_drop_sends_callback, tx);
- spin_unlock(&gmni->gmni_gm_lock);
- return;
- }
-
- /* not reached */
- LBUG();
-}
-
-void
-gmnal_check_txqueues_locked (gmnal_ni_t *gmni)
-{
- gmnal_tx_t *tx;
- gmnal_txbuf_t *ltxb;
- int gmsize;
- int pri;
- void *netaddr;
-
- tx = list_empty(&gmni->gmni_buf_txq) ? NULL :
- list_entry(gmni->gmni_buf_txq.next, gmnal_tx_t, tx_list);
-
- if (tx != NULL &&
- (tx->tx_large_nob == 0 ||
- !list_empty(&gmni->gmni_idle_ltxbs))) {
-
- /* consume tx */
- list_del(&tx->tx_list);
-
- LASSERT (tx->tx_ltxb == NULL);
-
- if (tx->tx_large_nob != 0) {
- ltxb = list_entry(gmni->gmni_idle_ltxbs.next,
- gmnal_txbuf_t, txb_list);
-
- /* consume large buffer */
- list_del(<xb->txb_list);
-
- spin_unlock(&gmni->gmni_tx_lock);
-
- /* Unlocking here allows sends to get re-ordered,
- * but we want to allow other CPUs to progress... */
-
- tx->tx_ltxb = ltxb;
-
- /* marshall message in tx_ltxb...
- * 1. Copy what was marshalled so far (in tx_buf) */
- memcpy(GMNAL_NETBUF_MSG(<xb->txb_buf),
- GMNAL_NETBUF_MSG(&tx->tx_buf), tx->tx_msgnob);
-
- /* 2. Copy the payload */
- if (tx->tx_large_iskiov)
- lnet_copy_kiov2kiov(
- gmni->gmni_large_pages,
- ltxb->txb_buf.nb_kiov,
- tx->tx_msgnob,
- tx->tx_large_niov,
- tx->tx_large_frags.kiov,
- tx->tx_large_offset,
- tx->tx_large_nob);
- else
- lnet_copy_iov2kiov(
- gmni->gmni_large_pages,
- ltxb->txb_buf.nb_kiov,
- tx->tx_msgnob,
- tx->tx_large_niov,
- tx->tx_large_frags.iov,
- tx->tx_large_offset,
- tx->tx_large_nob);
-
- tx->tx_msgnob += tx->tx_large_nob;
-
- spin_lock(&gmni->gmni_tx_lock);
- }
-
- list_add_tail(&tx->tx_list, &gmni->gmni_cred_txq);
- }
-
- if (!list_empty(&gmni->gmni_cred_txq) &&
- gmni->gmni_tx_credits != 0) {
-
- tx = list_entry(gmni->gmni_cred_txq.next, gmnal_tx_t, tx_list);
-
- /* consume tx and 1 credit */
- list_del(&tx->tx_list);
- gmni->gmni_tx_credits--;
-
- spin_unlock(&gmni->gmni_tx_lock);
-
- /* Unlocking here allows sends to get re-ordered, but we want
- * to allow other CPUs to progress... */
-
- LASSERT(!tx->tx_credit);
- tx->tx_credit = 1;
-
- tx->tx_launchtime = cfs_time_current();
-
- if (tx->tx_msgnob <= gmni->gmni_small_msgsize) {
- LASSERT (tx->tx_ltxb == NULL);
- netaddr = GMNAL_NETBUF_LOCAL_NETADDR(&tx->tx_buf);
- gmsize = gmni->gmni_small_gmsize;
- pri = GMNAL_SMALL_PRIORITY;
- } else {
- LASSERT (tx->tx_ltxb != NULL);
- netaddr = GMNAL_NETBUF_LOCAL_NETADDR(&tx->tx_ltxb->txb_buf);
- gmsize = gmni->gmni_large_gmsize;
- pri = GMNAL_LARGE_PRIORITY;
- }
-
- spin_lock(&gmni->gmni_gm_lock);
-
- gm_send_to_peer_with_callback(gmni->gmni_port,
- netaddr, gmsize,
- tx->tx_msgnob,
- pri,
- tx->tx_gmlid,
- gmnal_tx_callback,
- (void*)tx);
-
- spin_unlock(&gmni->gmni_gm_lock);
- spin_lock(&gmni->gmni_tx_lock);
- }
-}
-
-void
-gmnal_post_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx)
-{
- int gmsize = rx->rx_islarge ? gmni->gmni_large_gmsize :
- gmni->gmni_small_gmsize;
- int pri = rx->rx_islarge ? GMNAL_LARGE_PRIORITY :
- GMNAL_SMALL_PRIORITY;
- void *buffer = GMNAL_NETBUF_LOCAL_NETADDR(&rx->rx_buf);
-
- CDEBUG(D_NET, "posting rx %p buf %p\n", rx, buffer);
-
- spin_lock(&gmni->gmni_gm_lock);
- gm_provide_receive_buffer_with_tag(gmni->gmni_port,
- buffer, gmsize, pri, 0);
- spin_unlock(&gmni->gmni_gm_lock);
-}
-
-void
-gmnal_version_reply (gmnal_ni_t *gmni, gmnal_rx_t *rx)
-{
- /* Future protocol version compatibility support!
- * The next gmlnd-specific protocol rev will first send a message to
- * check version; I reply with a stub message containing my current
- * magic+version... */
- gmnal_msg_t *msg;
- gmnal_tx_t *tx = gmnal_get_tx(gmni);
-
- if (tx == NULL) {
- CERROR("Can't allocate tx to send version info to %u\n",
- rx->rx_recv_gmid);
- return;
- }
-
- LASSERT (tx->tx_lntmsg == NULL); /* no finalize */
-
- tx->tx_nid = LNET_NID_ANY;
- tx->tx_gmlid = rx->rx_recv_gmid;
-
- msg = GMNAL_NETBUF_MSG(&tx->tx_buf);
- msg->gmm_magic = GMNAL_MSG_MAGIC;
- msg->gmm_version = GMNAL_MSG_VERSION;
-
- /* just send magic + version */
- tx->tx_msgnob = offsetof(gmnal_msg_t, gmm_type);
- tx->tx_large_nob = 0;
-
- spin_lock(&gmni->gmni_tx_lock);
-
- list_add_tail(&tx->tx_list, &gmni->gmni_buf_txq);
- gmnal_check_txqueues_locked(gmni);
-
- spin_unlock(&gmni->gmni_tx_lock);
-}
-
-int
-gmnal_rx_thread(void *arg)
-{
- gmnal_ni_t *gmni = arg;
- gm_recv_event_t *rxevent = NULL;
- gm_recv_t *recv = NULL;
- gmnal_rx_t *rx;
- int rc;
-
- cfs_daemonize("gmnal_rxd");
-
- while (!gmni->gmni_shutdown) {
- rc = down_interruptible(&gmni->gmni_rx_mutex);
- LASSERT (rc == 0 || rc == -EINTR);
- if (rc != 0)
- continue;
-
- spin_lock(&gmni->gmni_gm_lock);
- rxevent = gm_blocking_receive_no_spin(gmni->gmni_port);
- spin_unlock(&gmni->gmni_gm_lock);
-
- switch (GM_RECV_EVENT_TYPE(rxevent)) {
- default:
- gm_unknown(gmni->gmni_port, rxevent);
- up(&gmni->gmni_rx_mutex);
- continue;
-
- case GM_FAST_RECV_EVENT:
- case GM_FAST_PEER_RECV_EVENT:
- case GM_PEER_RECV_EVENT:
- case GM_FAST_HIGH_RECV_EVENT:
- case GM_FAST_HIGH_PEER_RECV_EVENT:
- case GM_HIGH_PEER_RECV_EVENT:
- case GM_RECV_EVENT:
- case GM_HIGH_RECV_EVENT:
- break;
- }
-
- recv = &rxevent->recv;
- rx = gm_hash_find(gmni->gmni_rx_hash,
- gm_ntohp(recv->buffer));
- LASSERT (rx != NULL);
-
- rx->rx_recv_nob = gm_ntoh_u32(recv->length);
- rx->rx_recv_gmid = gm_ntoh_u16(recv->sender_node_id);
- rx->rx_recv_port = gm_ntoh_u8(recv->sender_port_id);
- rx->rx_recv_type = gm_ntoh_u8(recv->type);
-
- switch (GM_RECV_EVENT_TYPE(rxevent)) {
- case GM_FAST_RECV_EVENT:
- case GM_FAST_PEER_RECV_EVENT:
- case GM_FAST_HIGH_RECV_EVENT:
- case GM_FAST_HIGH_PEER_RECV_EVENT:
- LASSERT (rx->rx_recv_nob <= PAGE_SIZE);
-
- memcpy(GMNAL_NETBUF_MSG(&rx->rx_buf),
- gm_ntohp(recv->message), rx->rx_recv_nob);
- break;
- }
-
- up(&gmni->gmni_rx_mutex);
-
- CDEBUG (D_NET, "rx %p: buf %p(%p) nob %d\n", rx,
- GMNAL_NETBUF_LOCAL_NETADDR(&rx->rx_buf),
- gm_ntohp(recv->buffer), rx->rx_recv_nob);
-
- /* We're connectionless: simply drop packets with
- * errors */
- rc = gmnal_unpack_msg(gmni, rx);
-
- if (rc == 0) {
- gmnal_msg_t *msg = GMNAL_NETBUF_MSG(&rx->rx_buf);
-
- LASSERT (msg->gmm_type == GMNAL_MSG_IMMEDIATE);
- rc = lnet_parse(gmni->gmni_ni,
- &msg->gmm_u.immediate.gmim_hdr,
- msg->gmm_srcnid, rx, 0);
- } else if (rc > 0) {
- gmnal_version_reply(gmni, rx);
- rc = -EPROTO; /* repost rx */
- }
-
- if (rc < 0) /* parse failure */
- gmnal_post_rx(gmni, rx);
- }
-
- CDEBUG(D_NET, "exiting\n");
- atomic_dec(&gmni->gmni_nthreads);
- return 0;
-}
-
-void
-gmnal_stop_threads(gmnal_ni_t *gmni)
-{
- int count = 2;
-
- gmni->gmni_shutdown = 1;
- mb();
-
- /* wake rxthread owning gmni_rx_mutex with an alarm. */
- spin_lock(&gmni->gmni_gm_lock);
- gm_set_alarm(gmni->gmni_port, &gmni->gmni_alarm, 0, NULL, NULL);
- spin_unlock(&gmni->gmni_gm_lock);
-
- while (atomic_read(&gmni->gmni_nthreads) != 0) {
- count++;
- if ((count & (count - 1)) == 0)
- CWARN("Waiting for %d threads to stop\n",
- atomic_read(&gmni->gmni_nthreads));
- gmnal_yield(1);
- }
-}
-
-int
-gmnal_start_threads(gmnal_ni_t *gmni)
-{
- int i;
- int pid;
-
- LASSERT (!gmni->gmni_shutdown);
- LASSERT (atomic_read(&gmni->gmni_nthreads) == 0);
-
- gm_initialize_alarm(&gmni->gmni_alarm);
-
- for (i = 0; i < num_online_cpus(); i++) {
-
- pid = kernel_thread(gmnal_rx_thread, (void*)gmni, 0);
- if (pid < 0) {
- CERROR("rx thread failed to start: %d\n", pid);
- gmnal_stop_threads(gmni);
- return pid;
- }
-
- atomic_inc(&gmni->gmni_nthreads);
- }
-
- return 0;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- *
- * This file is part of Lustre, http://www.lustre.org/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "gmlnd.h"
-
-
-static int port = 4;
-CFS_MODULE_PARM(port, "i", int, 0444,
- "GM port to use for communications");
-
-static int ntx = 256;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
- "# tx descriptors");
-
-static int credits = 128;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "# concurrent sends");
-
-static int peer_credits = 8;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
- "# concurrent sends per peer");
-
-static int nlarge_tx_bufs = 32;
-CFS_MODULE_PARM(nlarge_tx_bufs, "i", int, 0444,
- "# large tx message buffers");
-
-static int nrx_small = 128;
-CFS_MODULE_PARM(nrx_small, "i", int, 0444,
- "# small rx message buffers");
-
-static int nrx_large = 64;
-CFS_MODULE_PARM(nrx_large, "i", int, 0444,
- "# large rx message buffers");
-
-gmnal_tunables_t gmnal_tunables = {
- .gm_port = &port,
- .gm_ntx = &ntx,
- .gm_credits = &credits,
- .gm_peer_credits = &peer_credits,
- .gm_nlarge_tx_bufs = &nlarge_tx_bufs,
- .gm_nrx_small = &nrx_small,
- .gm_nrx_large = &nrx_large,
-};
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-static cfs_sysctl_table_t gmnal_ctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "port",
- .data = &port,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 2,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 3,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 4,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 5,
- .procname = "nlarge_tx_bufs",
- .data = &nlarge_tx_bufs,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 6,
- .procname = "nrx_small",
- .data = &nrx_small,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 7,
- .procname = "nrx_large",
- .data = &nrx_large,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {0}
-};
-
-static cfs_sysctl_table_t gmnal_top_ctl_table[] = {
- {
- .ctl_name = 207,
- .procname = "gmnal",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = gmnal_ctl_table
- },
- {0}
-};
-#endif
-
-static int __init
-gmnal_load(void)
-{
- int status;
- CDEBUG(D_TRACE, "This is the gmnal module initialisation routine\n");
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- gmnal_tunables.gm_sysctl =
- cfs_register_sysctl_table(gmnal_top_ctl_table, 0);
-
- if (gmnal_tunables.gm_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-#endif
- CDEBUG(D_NET, "Calling gmnal_init\n");
- status = gmnal_init();
- if (status == 0) {
- CDEBUG(D_NET, "Portals GMNAL initialised ok\n");
- } else {
- CDEBUG(D_NET, "Portals GMNAL Failed to initialise\n");
- return(-ENODEV);
- }
-
- CDEBUG(D_NET, "This is the end of the gmnal init routine");
-
- return(0);
-}
-
-static void __exit
-gmnal_unload(void)
-{
- gmnal_fini();
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- if (gmnal_tunables.gm_sysctl != NULL)
- cfs_unregister_sysctl_table(gmnal_tunables.gm_sysctl);
-#endif
-}
-
-module_init(gmnal_load);
-module_exit(gmnal_unload);
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel GM LND v1.01");
-MODULE_LICENSE("GPL");
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- *
- * This file is part of Lustre, http://www.lustre.org/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "gmlnd.h"
-
-void
-gmnal_free_netbuf_pages (gmnal_netbuf_t *nb, int npages)
-{
- int i;
-
- for (i = 0; i < npages; i++)
- __free_page(nb->nb_kiov[i].kiov_page);
-}
-
-int
-gmnal_alloc_netbuf_pages (gmnal_ni_t *gmni, gmnal_netbuf_t *nb, int npages)
-{
- int i;
- gm_status_t gmrc;
-
- LASSERT (npages > 0);
-
- for (i = 0; i < npages; i++) {
- nb->nb_kiov[i].kiov_page = alloc_page(GFP_KERNEL);
- nb->nb_kiov[i].kiov_offset = 0;
- nb->nb_kiov[i].kiov_len = PAGE_SIZE;
-
- if (nb->nb_kiov[i].kiov_page == NULL) {
- CERROR("Can't allocate page\n");
- gmnal_free_netbuf_pages(nb, i);
- return -ENOMEM;
- }
-
- CDEBUG(D_NET,"[%3d] page %p, phys "LPX64", @ "LPX64"\n",
- i, nb->nb_kiov[i].kiov_page,
- lnet_page2phys(nb->nb_kiov[i].kiov_page),
- gmni->gmni_netaddr_base);
-
- gmrc = gm_register_memory_ex_phys(
- gmni->gmni_port,
- lnet_page2phys(nb->nb_kiov[i].kiov_page),
- PAGE_SIZE,
- gmni->gmni_netaddr_base);
- CDEBUG(D_NET,"[%3d] page %p: %d\n",
- i, nb->nb_kiov[i].kiov_page, gmrc);
-
- if (gmrc != GM_SUCCESS) {
- CERROR("Can't map page: %d(%s)\n", gmrc,
- gmnal_gmstatus2str(gmrc));
- gmnal_free_netbuf_pages(nb, i+1);
- return -ENOMEM;
- }
-
- if (i == 0)
- nb->nb_netaddr = gmni->gmni_netaddr_base;
-
- gmni->gmni_netaddr_base += PAGE_SIZE;
- }
-
- return 0;
-}
-
-void
-gmnal_free_ltxbuf (gmnal_ni_t *gmni, gmnal_txbuf_t *txb)
-{
- int npages = gmni->gmni_large_pages;
-
- LASSERT (gmni->gmni_port == NULL);
- /* No unmapping; the port has been closed */
-
- gmnal_free_netbuf_pages(&txb->txb_buf, gmni->gmni_large_pages);
- LIBCFS_FREE(txb, offsetof(gmnal_txbuf_t, txb_buf.nb_kiov[npages]));
-}
-
-int
-gmnal_alloc_ltxbuf (gmnal_ni_t *gmni)
-{
- int npages = gmni->gmni_large_pages;
- int sz = offsetof(gmnal_txbuf_t, txb_buf.nb_kiov[npages]);
- gmnal_txbuf_t *txb;
- int rc;
-
- LIBCFS_ALLOC(txb, sz);
- if (txb == NULL) {
- CERROR("Can't allocate large txbuffer\n");
- return -ENOMEM;
- }
-
- rc = gmnal_alloc_netbuf_pages(gmni, &txb->txb_buf, npages);
- if (rc != 0) {
- LIBCFS_FREE(txb, sz);
- return rc;
- }
-
- list_add_tail(&txb->txb_list, &gmni->gmni_idle_ltxbs);
-
- txb->txb_next = gmni->gmni_ltxbs;
- gmni->gmni_ltxbs = txb;
-
- return 0;
-}
-
-void
-gmnal_free_tx (gmnal_tx_t *tx)
-{
- LASSERT (tx->tx_gmni->gmni_port == NULL);
-
- gmnal_free_netbuf_pages(&tx->tx_buf, 1);
- LIBCFS_FREE(tx, sizeof(*tx));
-}
-
-int
-gmnal_alloc_tx (gmnal_ni_t *gmni)
-{
- gmnal_tx_t *tx;
- int rc;
-
- LIBCFS_ALLOC(tx, sizeof(*tx));
- if (tx == NULL) {
- CERROR("Failed to allocate tx\n");
- return -ENOMEM;
- }
-
- memset(tx, 0, sizeof(*tx));
-
- rc = gmnal_alloc_netbuf_pages(gmni, &tx->tx_buf, 1);
- if (rc != 0) {
- LIBCFS_FREE(tx, sizeof(*tx));
- return -ENOMEM;
- }
-
- tx->tx_gmni = gmni;
-
- list_add_tail(&tx->tx_list, &gmni->gmni_idle_txs);
-
- tx->tx_next = gmni->gmni_txs;
- gmni->gmni_txs = tx;
-
- return 0;
-}
-
-void
-gmnal_free_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx)
-{
- int npages = rx->rx_islarge ? gmni->gmni_large_pages : 1;
-
- LASSERT (gmni->gmni_port == NULL);
-
- gmnal_free_netbuf_pages(&rx->rx_buf, npages);
- LIBCFS_FREE(rx, offsetof(gmnal_rx_t, rx_buf.nb_kiov[npages]));
-}
-
-int
-gmnal_alloc_rx (gmnal_ni_t *gmni, int islarge)
-{
- int npages = islarge ? gmni->gmni_large_pages : 1;
- int sz = offsetof(gmnal_rx_t, rx_buf.nb_kiov[npages]);
- int rc;
- gmnal_rx_t *rx;
- gm_status_t gmrc;
-
- LIBCFS_ALLOC(rx, sz);
- if (rx == NULL) {
- CERROR("Failed to allocate rx\n");
- return -ENOMEM;
- }
-
- memset(rx, 0, sizeof(*rx));
-
- rc = gmnal_alloc_netbuf_pages(gmni, &rx->rx_buf, npages);
- if (rc != 0) {
- LIBCFS_FREE(rx, sz);
- return rc;
- }
-
- rx->rx_islarge = islarge;
- rx->rx_next = gmni->gmni_rxs;
- gmni->gmni_rxs = rx;
-
- gmrc = gm_hash_insert(gmni->gmni_rx_hash,
- GMNAL_NETBUF_LOCAL_NETADDR(&rx->rx_buf), rx);
- if (gmrc != GM_SUCCESS) {
- CERROR("Couldn't add rx to hash table: %d\n", gmrc);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void
-gmnal_free_ltxbufs (gmnal_ni_t *gmni)
-{
- gmnal_txbuf_t *txb;
-
- while ((txb = gmni->gmni_ltxbs) != NULL) {
- gmni->gmni_ltxbs = txb->txb_next;
- gmnal_free_ltxbuf(gmni, txb);
- }
-}
-
-int
-gmnal_alloc_ltxbufs (gmnal_ni_t *gmni)
-{
- int nlarge_tx_bufs = *gmnal_tunables.gm_nlarge_tx_bufs;
- int i;
- int rc;
-
- for (i = 0; i < nlarge_tx_bufs; i++) {
- rc = gmnal_alloc_ltxbuf(gmni);
-
- if (rc != 0)
- return rc;
- }
-
- return 0;
-}
-
-void
-gmnal_free_txs(gmnal_ni_t *gmni)
-{
- gmnal_tx_t *tx;
-
- while ((tx = gmni->gmni_txs) != NULL) {
- gmni->gmni_txs = tx->tx_next;
- gmnal_free_tx (tx);
- }
-}
-
-int
-gmnal_alloc_txs(gmnal_ni_t *gmni)
-{
- int ntxcred = gm_num_send_tokens(gmni->gmni_port);
- int ntx = *gmnal_tunables.gm_ntx;
- int i;
- int rc;
-
- CDEBUG(D_NET, "ntxcred: %d\n", ntxcred);
- gmni->gmni_tx_credits = ntxcred;
-
- for (i = 0; i < ntx; i++) {
- rc = gmnal_alloc_tx(gmni);
- if (rc != 0)
- return rc;
- }
-
- return 0;
-}
-
-void
-gmnal_free_rxs(gmnal_ni_t *gmni)
-{
- gmnal_rx_t *rx;
-
- while ((rx = gmni->gmni_rxs) != NULL) {
- gmni->gmni_rxs = rx->rx_next;
-
- gmnal_free_rx(gmni, rx);
- }
-
- LASSERT (gmni->gmni_port == NULL);
-#if 0
- /* GM releases all resources allocated to a port when it closes */
- if (gmni->gmni_rx_hash != NULL)
- gm_destroy_hash(gmni->gmni_rx_hash);
-#endif
-}
-
-int
-gmnal_alloc_rxs (gmnal_ni_t *gmni)
-{
- int nrxcred = gm_num_receive_tokens(gmni->gmni_port);
- int nrx_small = *gmnal_tunables.gm_nrx_small;
- int nrx_large = *gmnal_tunables.gm_nrx_large;
- int nrx = nrx_large + nrx_small;
- int rc;
- int i;
-
- CDEBUG(D_NET, "nrxcred: %d(%dL+%dS)\n", nrxcred, nrx_large, nrx_small);
-
- if (nrx > nrxcred) {
- int nlarge = (nrx_large * nrxcred)/nrx;
- int nsmall = nrxcred - nlarge;
-
- CWARN("Only %d rx credits: "
- "reducing large %d->%d, small %d->%d\n", nrxcred,
- nrx_large, nlarge, nrx_small, nsmall);
-
- *gmnal_tunables.gm_nrx_large = nrx_large = nlarge;
- *gmnal_tunables.gm_nrx_small = nrx_small = nsmall;
- nrx = nlarge + nsmall;
- }
-
- gmni->gmni_rx_hash = gm_create_hash(gm_hash_compare_ptrs,
- gm_hash_hash_ptr, 0, 0, nrx, 0);
- if (gmni->gmni_rx_hash == NULL) {
- CERROR("Failed to create hash table\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < nrx; i++ ) {
- rc = gmnal_alloc_rx(gmni, i < nrx_large);
- if (rc != 0)
- return rc;
- }
-
- return 0;
-}
-
-char *
-gmnal_gmstatus2str(gm_status_t status)
-{
- return(gm_strerror(status));
-
- switch(status) {
- case(GM_SUCCESS):
- return("SUCCESS");
- case(GM_FAILURE):
- return("FAILURE");
- case(GM_INPUT_BUFFER_TOO_SMALL):
- return("INPUT_BUFFER_TOO_SMALL");
- case(GM_OUTPUT_BUFFER_TOO_SMALL):
- return("OUTPUT_BUFFER_TOO_SMALL");
- case(GM_TRY_AGAIN ):
- return("TRY_AGAIN");
- case(GM_BUSY):
- return("BUSY");
- case(GM_MEMORY_FAULT):
- return("MEMORY_FAULT");
- case(GM_INTERRUPTED):
- return("INTERRUPTED");
- case(GM_INVALID_PARAMETER):
- return("INVALID_PARAMETER");
- case(GM_OUT_OF_MEMORY):
- return("OUT_OF_MEMORY");
- case(GM_INVALID_COMMAND):
- return("INVALID_COMMAND");
- case(GM_PERMISSION_DENIED):
- return("PERMISSION_DENIED");
- case(GM_INTERNAL_ERROR):
- return("INTERNAL_ERROR");
- case(GM_UNATTACHED):
- return("UNATTACHED");
- case(GM_UNSUPPORTED_DEVICE):
- return("UNSUPPORTED_DEVICE");
- case(GM_SEND_TIMED_OUT):
- return("GM_SEND_TIMEDOUT");
- case(GM_SEND_REJECTED):
- return("GM_SEND_REJECTED");
- case(GM_SEND_TARGET_PORT_CLOSED):
- return("GM_SEND_TARGET_PORT_CLOSED");
- case(GM_SEND_TARGET_NODE_UNREACHABLE):
- return("GM_SEND_TARGET_NODE_UNREACHABLE");
- case(GM_SEND_DROPPED):
- return("GM_SEND_DROPPED");
- case(GM_SEND_PORT_CLOSED):
- return("GM_SEND_PORT_CLOSED");
- case(GM_NODE_ID_NOT_YET_SET):
- return("GM_NODE_ID_NOT_YET_SET");
- case(GM_STILL_SHUTTING_DOWN):
- return("GM_STILL_SHUTTING_DOWN");
- case(GM_CLONE_BUSY):
- return("GM_CLONE_BUSY");
- case(GM_NO_SUCH_DEVICE):
- return("GM_NO_SUCH_DEVICE");
- case(GM_ABORTED):
- return("GM_ABORTED");
- case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
- return("GM_INCOMPATIBLE_LIB_AND_DRIVER");
- case(GM_UNTRANSLATED_SYSTEM_ERROR):
- return("GM_UNTRANSLATED_SYSTEM_ERROR");
- case(GM_ACCESS_DENIED):
- return("GM_ACCESS_DENIED");
-
-
- /*
- * These ones are in the docs but aren't in the header file
- case(GM_DEV_NOT_FOUND):
- return("GM_DEV_NOT_FOUND");
- case(GM_INVALID_PORT_NUMBER):
- return("GM_INVALID_PORT_NUMBER");
- case(GM_UC_ERROR):
- return("GM_US_ERROR");
- case(GM_PAGE_TABLE_FULL):
- return("GM_PAGE_TABLE_FULL");
- case(GM_MINOR_OVERFLOW):
- return("GM_MINOR_OVERFLOW");
- case(GM_SEND_ORPHANED):
- return("GM_SEND_ORPHANED");
- case(GM_HARDWARE_FAULT):
- return("GM_HARDWARE_FAULT");
- case(GM_DATA_CORRUPTED):
- return("GM_DATA_CORRUPTED");
- case(GM_TIMED_OUT):
- return("GM_TIMED_OUT");
- case(GM_USER_ERROR):
- return("GM_USER_ERROR");
- case(GM_NO_MATCH):
- return("GM_NOMATCH");
- case(GM_NOT_SUPPORTED_IN_KERNEL):
- return("GM_NOT_SUPPORTED_IN_KERNEL");
- case(GM_NOT_SUPPORTED_ON_ARCH):
- return("GM_NOT_SUPPORTED_ON_ARCH");
- case(GM_PTE_REF_CNT_OVERFLOW):
- return("GM_PTR_REF_CNT_OVERFLOW");
- case(GM_NO_DRIVER_SUPPORT):
- return("GM_NO_DRIVER_SUPPORT");
- case(GM_FIRMWARE_NOT_RUNNING):
- return("GM_FIRMWARE_NOT_RUNNING");
- * These ones are in the docs but aren't in the header file
- */
-
- default:
- return("UNKNOWN GM ERROR CODE");
- }
-}
-
-
-char *
-gmnal_rxevent2str(gm_recv_event_t *ev)
-{
- short event;
- event = GM_RECV_EVENT_TYPE(ev);
- switch(event) {
- case(GM_NO_RECV_EVENT):
- return("GM_NO_RECV_EVENT");
- case(GM_SENDS_FAILED_EVENT):
- return("GM_SEND_FAILED_EVENT");
- case(GM_ALARM_EVENT):
- return("GM_ALARM_EVENT");
- case(GM_SENT_EVENT):
- return("GM_SENT_EVENT");
- case(_GM_SLEEP_EVENT):
- return("_GM_SLEEP_EVENT");
- case(GM_RAW_RECV_EVENT):
- return("GM_RAW_RECV_EVENT");
- case(GM_BAD_SEND_DETECTED_EVENT):
- return("GM_BAD_SEND_DETECTED_EVENT");
- case(GM_SEND_TOKEN_VIOLATION_EVENT):
- return("GM_SEND_TOKEN_VIOLATION_EVENT");
- case(GM_RECV_TOKEN_VIOLATION_EVENT):
- return("GM_RECV_TOKEN_VIOLATION_EVENT");
- case(GM_BAD_RECV_TOKEN_EVENT):
- return("GM_BAD_RECV_TOKEN_EVENT");
- case(GM_ALARM_VIOLATION_EVENT):
- return("GM_ALARM_VIOLATION_EVENT");
- case(GM_RECV_EVENT):
- return("GM_RECV_EVENT");
- case(GM_HIGH_RECV_EVENT):
- return("GM_HIGH_RECV_EVENT");
- case(GM_PEER_RECV_EVENT):
- return("GM_PEER_RECV_EVENT");
- case(GM_HIGH_PEER_RECV_EVENT):
- return("GM_HIGH_PEER_RECV_EVENT");
- case(GM_FAST_RECV_EVENT):
- return("GM_FAST_RECV_EVENT");
- case(GM_FAST_HIGH_RECV_EVENT):
- return("GM_FAST_HIGH_RECV_EVENT");
- case(GM_FAST_PEER_RECV_EVENT):
- return("GM_FAST_PEER_RECV_EVENT");
- case(GM_FAST_HIGH_PEER_RECV_EVENT):
- return("GM_FAST_HIGH_PEER_RECV_EVENT");
- case(GM_REJECTED_SEND_EVENT):
- return("GM_REJECTED_SEND_EVENT");
- case(GM_ORPHANED_SEND_EVENT):
- return("GM_ORPHANED_SEND_EVENT");
- case(GM_BAD_RESEND_DETECTED_EVENT):
- return("GM_BAD_RESEND_DETETED_EVENT");
- case(GM_DROPPED_SEND_EVENT):
- return("GM_DROPPED_SEND_EVENT");
- case(GM_BAD_SEND_VMA_EVENT):
- return("GM_BAD_SEND_VMA_EVENT");
- case(GM_BAD_RECV_VMA_EVENT):
- return("GM_BAD_RECV_VMA_EVENT");
- case(_GM_FLUSHED_ALARM_EVENT):
- return("GM_FLUSHED_ALARM_EVENT");
- case(GM_SENT_TOKENS_EVENT):
- return("GM_SENT_TOKENS_EVENTS");
- case(GM_IGNORE_RECV_EVENT):
- return("GM_IGNORE_RECV_EVENT");
- case(GM_ETHERNET_RECV_EVENT):
- return("GM_ETHERNET_RECV_EVENT");
- case(GM_NEW_NO_RECV_EVENT):
- return("GM_NEW_NO_RECV_EVENT");
- case(GM_NEW_SENDS_FAILED_EVENT):
- return("GM_NEW_SENDS_FAILED_EVENT");
- case(GM_NEW_ALARM_EVENT):
- return("GM_NEW_ALARM_EVENT");
- case(GM_NEW_SENT_EVENT):
- return("GM_NEW_SENT_EVENT");
- case(_GM_NEW_SLEEP_EVENT):
- return("GM_NEW_SLEEP_EVENT");
- case(GM_NEW_RAW_RECV_EVENT):
- return("GM_NEW_RAW_RECV_EVENT");
- case(GM_NEW_BAD_SEND_DETECTED_EVENT):
- return("GM_NEW_BAD_SEND_DETECTED_EVENT");
- case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT):
- return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT");
- case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT):
- return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT");
- case(GM_NEW_BAD_RECV_TOKEN_EVENT):
- return("GM_NEW_BAD_RECV_TOKEN_EVENT");
- case(GM_NEW_ALARM_VIOLATION_EVENT):
- return("GM_NEW_ALARM_VIOLATION_EVENT");
- case(GM_NEW_RECV_EVENT):
- return("GM_NEW_RECV_EVENT");
- case(GM_NEW_HIGH_RECV_EVENT):
- return("GM_NEW_HIGH_RECV_EVENT");
- case(GM_NEW_PEER_RECV_EVENT):
- return("GM_NEW_PEER_RECV_EVENT");
- case(GM_NEW_HIGH_PEER_RECV_EVENT):
- return("GM_NEW_HIGH_PEER_RECV_EVENT");
- case(GM_NEW_FAST_RECV_EVENT):
- return("GM_NEW_FAST_RECV_EVENT");
- case(GM_NEW_FAST_HIGH_RECV_EVENT):
- return("GM_NEW_FAST_HIGH_RECV_EVENT");
- case(GM_NEW_FAST_PEER_RECV_EVENT):
- return("GM_NEW_FAST_PEER_RECV_EVENT");
- case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT):
- return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT");
- case(GM_NEW_REJECTED_SEND_EVENT):
- return("GM_NEW_REJECTED_SEND_EVENT");
- case(GM_NEW_ORPHANED_SEND_EVENT):
- return("GM_NEW_ORPHANED_SEND_EVENT");
- case(_GM_NEW_PUT_NOTIFICATION_EVENT):
- return("_GM_NEW_PUT_NOTIFICATION_EVENT");
- case(GM_NEW_FREE_SEND_TOKEN_EVENT):
- return("GM_NEW_FREE_SEND_TOKEN_EVENT");
- case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT):
- return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT");
- case(GM_NEW_BAD_RESEND_DETECTED_EVENT):
- return("GM_NEW_BAD_RESEND_DETECTED_EVENT");
- case(GM_NEW_DROPPED_SEND_EVENT):
- return("GM_NEW_DROPPED_SEND_EVENT");
- case(GM_NEW_BAD_SEND_VMA_EVENT):
- return("GM_NEW_BAD_SEND_VMA_EVENT");
- case(GM_NEW_BAD_RECV_VMA_EVENT):
- return("GM_NEW_BAD_RECV_VMA_EVENT");
- case(_GM_NEW_FLUSHED_ALARM_EVENT):
- return("GM_NEW_FLUSHED_ALARM_EVENT");
- case(GM_NEW_SENT_TOKENS_EVENT):
- return("GM_NEW_SENT_TOKENS_EVENT");
- case(GM_NEW_IGNORE_RECV_EVENT):
- return("GM_NEW_IGNORE_RECV_EVENT");
- case(GM_NEW_ETHERNET_RECV_EVENT):
- return("GM_NEW_ETHERNET_RECV_EVENT");
- default:
- return("Unknown Recv event");
- /* _GM_PUT_NOTIFICATION_EVENT */
- /* GM_FREE_SEND_TOKEN_EVENT */
- /* GM_FREE_HIGH_SEND_TOKEN_EVENT */
- }
-}
-
-
-void
-gmnal_yield(int delay)
-{
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(delay);
-}
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
+++ /dev/null
-MODULES := kiiblnd
-kiiblnd-objs := iiblnd.o iiblnd_cb.o iiblnd_modparams.o
-
-EXTRA_POST_CFLAGS := @IIBCPPFLAGS@
-
-@INCLUDE_RULES@
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if BUILD_IIBLND
-modulenet_DATA = kiiblnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kiiblnd-objs:%.o=%.c) iiblnd.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "iiblnd.h"
-
-lnd_t the_kiblnd = {
- .lnd_type = IIBLND,
- .lnd_startup = kibnal_startup,
- .lnd_shutdown = kibnal_shutdown,
- .lnd_ctl = kibnal_ctl,
- .lnd_send = kibnal_send,
- .lnd_recv = kibnal_recv,
- .lnd_eager_recv = kibnal_eager_recv,
-};
-
-kib_data_t kibnal_data;
-
-__u32
-kibnal_cksum (void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return (sum == 0) ? 1 : sum;
-}
-
-void
-kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
-{
- msg->ibm_type = type;
- msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
-}
-
-void
-kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
- lnet_nid_t dstnid, __u64 dststamp, __u64 seq)
-{
- /* CAVEAT EMPTOR! all message fields not set here should have been
- * initialised previously. */
- msg->ibm_magic = IBNAL_MSG_MAGIC;
- msg->ibm_version = version;
- /* ibm_type */
- msg->ibm_credits = credits;
- /* ibm_nob */
- msg->ibm_cksum = 0;
- msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid,
- dstnid);
- msg->ibm_srcstamp = kibnal_data.kib_incarnation;
- msg->ibm_dstnid = dstnid;
- msg->ibm_dststamp = dststamp;
- msg->ibm_seq = seq;
-
- if (*kibnal_tunables.kib_cksum) {
- /* NB ibm_cksum zero while computing cksum */
- msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
- }
-}
-
-void
-kibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob,
- int type, lnet_nid_t dstnid, __u64 dststamp)
-{
- LASSERT (nob >= offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t));
-
- memset(msg, 0, nob);
- kibnal_init_msg(msg, type, sizeof(kib_connparams_t));
-
- msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE;
- msg->ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE;
- msg->ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS;
-
- kibnal_pack_msg(msg, version, 0, dstnid, dststamp, 0);
-}
-
-int
-kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
-{
- const int hdr_size = offsetof(kib_msg_t, ibm_u);
- __u32 msg_cksum;
- __u32 msg_version;
- int flip;
- int msg_nob;
-#if !IBNAL_USE_FMR
- int i;
- int n;
-#endif
- /* 6 bytes are enough to have received magic + version */
- if (nob < 6) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- /* Future protocol version compatibility support!
- * If the iiblnd-specific protocol changes, or when LNET unifies
- * protocols over all LNDs, the initial connection will negotiate a
- * protocol version. If I find this, I avoid any console errors. If
- * my is doing connection establishment, the reject will tell the peer
- * which version I'm running. */
-
- if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
- flip = 0;
- } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
- flip = 1;
- } else {
- if (msg->ibm_magic == LNET_PROTO_MAGIC ||
- msg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
- return -EPROTO;
-
- /* Completely out to lunch */
- CERROR("Bad magic: %08x\n", msg->ibm_magic);
- return -EPROTO;
- }
-
- msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
- if (expected_version == 0) {
- if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD &&
- msg_version != IBNAL_MSG_VERSION)
- return -EPROTO;
- } else if (msg_version != expected_version) {
- CERROR("Bad version: %x(%x expected)\n",
- msg_version, expected_version);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
- if (msg_nob > nob) {
- CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
- return -EPROTO;
- }
-
- /* checksum must be computed with ibm_cksum zero and BEFORE anything
- * gets flipped */
- msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
- msg->ibm_cksum = 0;
- if (msg_cksum != 0 &&
- msg_cksum != kibnal_cksum(msg, msg_nob)) {
- CERROR("Bad checksum\n");
- return -EPROTO;
- }
- msg->ibm_cksum = msg_cksum;
-
- if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
- msg->ibm_version = msg_version;
- CLASSERT (sizeof(msg->ibm_type) == 1);
- CLASSERT (sizeof(msg->ibm_credits) == 1);
- msg->ibm_nob = msg_nob;
- __swab64s(&msg->ibm_srcnid);
- __swab64s(&msg->ibm_srcstamp);
- __swab64s(&msg->ibm_dstnid);
- __swab64s(&msg->ibm_dststamp);
- __swab64s(&msg->ibm_seq);
- }
-
- if (msg->ibm_srcnid == LNET_NID_ANY) {
- CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
- return -EPROTO;
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Unknown message type %x\n", msg->ibm_type);
- return -EPROTO;
-
- case IBNAL_MSG_NOOP:
- break;
-
- case IBNAL_MSG_IMMEDIATE:
- if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
- CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
- return -EPROTO;
- }
- break;
-
- case IBNAL_MSG_PUT_REQ:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) {
- CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.putreq)));
- return -EPROTO;
- }
- break;
-
- case IBNAL_MSG_PUT_ACK:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) {
- CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.putack)));
- return -EPROTO;
- }
-#if IBNAL_USE_FMR
- if (flip) {
- __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
- }
-#else
- if (flip) {
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
- }
-
- n = msg->ibm_u.putack.ibpam_rd.rd_nfrag;
- if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",
- n, IBNAL_MAX_RDMA_FRAGS);
- return -EPROTO;
- }
-
- if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
- CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
- return -EPROTO;
- }
-
- if (flip) {
- for (i = 0; i < n; i++) {
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);
- __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr);
- }
- }
-#endif
- break;
-
- case IBNAL_MSG_GET_REQ:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) {
- CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.get)));
- return -EPROTO;
- }
-#if IBNAL_USE_FMR
- if (flip) {
- __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
- }
-#else
- if (flip) {
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
- }
-
- n = msg->ibm_u.get.ibgm_rd.rd_nfrag;
- if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",
- n, IBNAL_MAX_RDMA_FRAGS);
- return -EPROTO;
- }
-
- if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
- CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
- return -EPROTO;
- }
-
- if (flip)
- for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) {
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
- __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr);
- }
-#endif
- break;
-
- case IBNAL_MSG_PUT_NAK:
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
- CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.completion)));
- return -EPROTO;
- }
- if (flip)
- __swab32s(&msg->ibm_u.completion.ibcm_status);
- break;
-
- case IBNAL_MSG_CONNREQ:
- case IBNAL_MSG_CONNACK:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
- CERROR("Short connreq/ack: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
- return -EPROTO;
- }
- if (flip) {
- __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
- __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
- __swab32s(&msg->ibm_u.connparams.ibcp_max_frags);
- }
- break;
- }
- return 0;
-}
-
-IB_HANDLE
-kibnal_create_cep(lnet_nid_t nid)
-{
- FSTATUS frc;
- __u32 u32val;
- IB_HANDLE cep;
-
- cep = iba_cm_create_cep(CM_RC_TYPE);
- if (cep == NULL) {
- CERROR ("Can't create CEP for %s\n",
- (nid == LNET_NID_ANY) ? "listener" :
- libcfs_nid2str(nid));
- return NULL;
- }
-
- if (nid == LNET_NID_ANY) {
- u32val = 1;
- frc = iba_cm_modify_cep(cep, CM_FLAG_ASYNC_ACCEPT,
- (char *)&u32val, sizeof(u32val), 0);
- if (frc != FSUCCESS) {
- CERROR("Can't set async_accept: %d\n", frc);
- goto failed;
- }
-
- u32val = 0; /* sets system max */
- frc = iba_cm_modify_cep(cep, CM_FLAG_LISTEN_BACKLOG,
- (char *)&u32val, sizeof(u32val), 0);
- if (frc != FSUCCESS) {
- CERROR("Can't set listen backlog: %d\n", frc);
- goto failed;
- }
- }
-
- u32val = 1;
- frc = iba_cm_modify_cep(cep, CM_FLAG_TIMEWAIT_CALLBACK,
- (char *)&u32val, sizeof(u32val), 0);
- if (frc != FSUCCESS) {
- CERROR("Can't set timewait_callback for %s: %d\n",
- (nid == LNET_NID_ANY) ? "listener" :
- libcfs_nid2str(nid), frc);
- goto failed;
- }
-
- return cep;
-
- failed:
- iba_cm_destroy_cep(cep);
- return NULL;
-}
-
-#define IBNAL_CHECK_ADVERT 1
-#if IBNAL_CHECK_ADVERT
-void
-kibnal_service_query_done (void *arg, QUERY *qry,
- QUERY_RESULT_VALUES *qry_result)
-{
- int *rcp = arg;
- FSTATUS frc = qry_result->Status;
- SERVICE_RECORD_RESULTS *svc_rslt;
- IB_SERVICE_RECORD *svc;
- lnet_nid_t nid;
-
- if (frc != FSUCCESS || qry_result->ResultDataSize == 0) {
- CERROR("Error checking advert: status %d data size %d\n",
- frc, qry_result->ResultDataSize);
- *rcp = -EIO;
- goto out;
- }
-
- svc_rslt = (SERVICE_RECORD_RESULTS *)qry_result->QueryResult;
-
- if (svc_rslt->NumServiceRecords < 1) {
- CERROR("Check advert: %d records\n",
- svc_rslt->NumServiceRecords);
- *rcp = -ENOENT;
- goto out;
- }
-
- svc = &svc_rslt->ServiceRecords[0];
- nid = le64_to_cpu(*kibnal_service_nid_field(svc));
-
- CDEBUG(D_NET, "Check advert: %s "LPX64" "LPX64":%04x\n",
- libcfs_nid2str(nid), svc->RID.ServiceID,
- svc->RID.ServiceGID.Type.Global.InterfaceID,
- svc->RID.ServiceP_Key);
-
- if (nid != kibnal_data.kib_ni->ni_nid) {
- CERROR("Check advert: Bad NID %s (%s expected)\n",
- libcfs_nid2str(nid),
- libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- *rcp = -EINVAL;
- goto out;
- }
-
- if (svc->RID.ServiceID != *kibnal_tunables.kib_service_number) {
- CERROR("Check advert: Bad ServiceID "LPX64" (%x expected)\n",
- svc->RID.ServiceID,
- *kibnal_tunables.kib_service_number);
- *rcp = -EINVAL;
- goto out;
- }
-
- if (svc->RID.ServiceGID.Type.Global.InterfaceID !=
- kibnal_data.kib_port_guid) {
- CERROR("Check advert: Bad GUID "LPX64" ("LPX64" expected)\n",
- svc->RID.ServiceGID.Type.Global.InterfaceID,
- kibnal_data.kib_port_guid);
- *rcp = -EINVAL;
- goto out;
- }
-
- if (svc->RID.ServiceP_Key != kibnal_data.kib_port_pkey) {
- CERROR("Check advert: Bad PKEY %04x (%04x expected)\n",
- svc->RID.ServiceP_Key, kibnal_data.kib_port_pkey);
- *rcp = -EINVAL;
- goto out;
- }
-
- CDEBUG(D_NET, "Check advert OK\n");
- *rcp = 0;
-
- out:
- up (&kibnal_data.kib_listener_signal);
-}
-
-int
-kibnal_check_advert (void)
-{
- /* single-threaded */
- static QUERY qry;
-
- FSTATUS frc;
- int rc;
-
- memset (&qry, 0, sizeof(qry));
- qry.InputType = InputTypeServiceRecord;
- qry.OutputType = OutputTypeServiceRecord;
- kibnal_set_service_keys(&qry.InputValue.ServiceRecordValue.ServiceRecord,
- kibnal_data.kib_ni->ni_nid);
- qry.InputValue.ServiceRecordValue.ComponentMask = KIBNAL_SERVICE_KEY_MASK;
-
- frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- &qry,
- kibnal_service_query_done,
- &kibnal_data.kib_sdretry,
- &rc);
- if (frc != FPENDING) {
- CERROR ("Immediate error %d checking SM service\n", frc);
- return -EIO;
- }
-
- down (&kibnal_data.kib_listener_signal);
-
- if (rc != 0)
- CERROR ("Error %d checking SM service\n", rc);
- return rc;
-}
-#else
-int
-kibnal_check_advert(void)
-{
- return 0;
-}
-#endif
-
-void
-kibnal_fill_fod(FABRIC_OPERATION_DATA *fod, FABRIC_OPERATION_TYPE type)
-{
- IB_SERVICE_RECORD *svc;
-
- memset (fod, 0, sizeof(*fod));
- fod->Type = type;
-
- svc = &fod->Value.ServiceRecordValue.ServiceRecord;
- svc->RID.ServiceID = *kibnal_tunables.kib_service_number;
- svc->RID.ServiceGID.Type.Global.InterfaceID = kibnal_data.kib_port_guid;
- svc->RID.ServiceGID.Type.Global.SubnetPrefix = DEFAULT_SUBNET_PREFIX;
- svc->RID.ServiceP_Key = kibnal_data.kib_port_pkey;
- svc->ServiceLease = 0xffffffff;
-
- kibnal_set_service_keys(svc, kibnal_data.kib_ni->ni_nid);
-}
-
-void
-kibnal_service_setunset_done (void *arg, FABRIC_OPERATION_DATA *fod,
- FSTATUS frc, uint32 madrc)
-{
- *(FSTATUS *)arg = frc;
- up (&kibnal_data.kib_listener_signal);
-}
-
-int
-kibnal_advertise (void)
-{
- /* Single threaded here */
- static FABRIC_OPERATION_DATA fod;
-
- IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord;
- FSTATUS frc;
- FSTATUS frc2;
-
- if (strlen(*kibnal_tunables.kib_service_name) >=
- sizeof(svc->ServiceName)) {
- CERROR("Service name '%s' too long (%d chars max)\n",
- *kibnal_tunables.kib_service_name,
- (int)sizeof(svc->ServiceName) - 1);
- return -EINVAL;
- }
-
- kibnal_fill_fod(&fod, FabOpSetServiceRecord);
-
- CDEBUG(D_NET, "Advertising service id "LPX64" %s:%s\n",
- svc->RID.ServiceID, svc->ServiceName,
- libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc))));
-
- frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- &fod,
- kibnal_service_setunset_done,
- &kibnal_data.kib_sdretry,
- &frc2);
-
- if (frc != FSUCCESS && frc != FPENDING) {
- CERROR ("Immediate error %d advertising NID %s\n",
- frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- return -EIO;
- }
-
- down (&kibnal_data.kib_listener_signal);
-
- frc = frc2;
- if (frc == FSUCCESS)
- return 0;
-
- CERROR ("Error %d advertising %s\n",
- frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- return -EIO;
-}
-
-void
-kibnal_unadvertise (int expect_success)
-{
- /* single threaded */
- static FABRIC_OPERATION_DATA fod;
-
- IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord;
- FSTATUS frc;
- FSTATUS frc2;
-
- LASSERT (kibnal_data.kib_ni->ni_nid != LNET_NID_ANY);
-
- kibnal_fill_fod(&fod, FabOpDeleteServiceRecord);
-
- CDEBUG(D_NET, "Unadvertising service %s:%s\n",
- svc->ServiceName,
- libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc))));
-
- frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- &fod,
- kibnal_service_setunset_done,
- &kibnal_data.kib_sdretry,
- &frc2);
- if (frc != FSUCCESS && frc != FPENDING) {
- CERROR ("Immediate error %d unadvertising NID %s\n",
- frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- return;
- }
-
- down (&kibnal_data.kib_listener_signal);
-
- CDEBUG(D_NET, "Unadvertise rc: %d\n", frc2);
-
- if ((frc2 == FSUCCESS) == !!expect_success)
- return;
-
- if (expect_success)
- CERROR("Error %d unadvertising NID %s\n",
- frc2, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- else
- CWARN("Removed conflicting NID %s\n",
- libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
-}
-
-void
-kibnal_stop_listener(int normal_shutdown)
-{
- /* NB this also disables peer creation and destroys all existing
- * peers */
- IB_HANDLE cep = kibnal_data.kib_listener_cep;
- unsigned long flags;
- FSTATUS frc;
-
- LASSERT (cep != NULL);
-
- kibnal_unadvertise(normal_shutdown);
-
- frc = iba_cm_cancel(cep);
- if (frc != FSUCCESS && frc != FPENDING)
- CERROR ("Error %d stopping listener\n", frc);
-
- down(&kibnal_data.kib_listener_signal);
-
- frc = iba_cm_destroy_cep(cep);
- if (frc != FSUCCESS)
- CERROR ("Error %d destroying listener CEP\n", frc);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- /* This assignment disables peer creation */
- kibnal_data.kib_listener_cep = NULL;
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- /* Start to tear down any peers created while the listener was
- * running */
- kibnal_del_peer(LNET_NID_ANY);
-}
-
-int
-kibnal_start_listener(void)
-{
- /* NB this also enables peer creation */
-
- IB_HANDLE cep;
- CM_LISTEN_INFO info;
- unsigned long flags;
- int rc;
- FSTATUS frc;
-
- LASSERT (kibnal_data.kib_listener_cep == NULL);
- init_MUTEX_LOCKED (&kibnal_data.kib_listener_signal);
-
- cep = kibnal_create_cep(LNET_NID_ANY);
- if (cep == NULL)
- return -ENOMEM;
-
- memset (&info, 0, sizeof(info));
- info.ListenAddr.EndPt.SID = *kibnal_tunables.kib_service_number;
-
- frc = iba_cm_listen(cep, &info, kibnal_listen_callback, NULL);
- if (frc != FSUCCESS && frc != FPENDING) {
- CERROR ("iba_cm_listen error: %d\n", frc);
-
- iba_cm_destroy_cep(cep);
- return -EIO;
- }
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- /* This assignment enables peer creation */
- kibnal_data.kib_listener_cep = cep;
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- rc = kibnal_advertise();
- if (rc == 0)
- rc = kibnal_check_advert();
-
- if (rc == 0)
- return 0;
-
- kibnal_stop_listener(0);
- return rc;
-}
-
-int
-kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- unsigned long flags;
- int rc;
-
- LASSERT (nid != LNET_NID_ANY);
-
- LIBCFS_ALLOC (peer, sizeof (*peer));
- if (peer == NULL) {
- CERROR("Cannot allocate peer\n");
- return -ENOMEM;
- }
-
- memset(peer, 0, sizeof(*peer)); /* zero flags etc */
-
- peer->ibp_nid = nid;
- atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */
-
- INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */
- INIT_LIST_HEAD (&peer->ibp_conns);
- INIT_LIST_HEAD (&peer->ibp_tx_queue);
-
- peer->ibp_error = 0;
- peer->ibp_last_alive = cfs_time_current();
- peer->ibp_reconnect_interval = 0; /* OK to connect at any time */
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (atomic_read(&kibnal_data.kib_npeers) >=
- *kibnal_tunables.kib_concurrent_peers) {
- rc = -EOVERFLOW; /* !! but at least it distinguishes */
- } else if (kibnal_data.kib_listener_cep == NULL) {
- rc = -ESHUTDOWN; /* shutdown has started */
- } else {
- rc = 0;
- /* npeers only grows with the global lock held */
- atomic_inc(&kibnal_data.kib_npeers);
- }
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- if (rc != 0) {
- CERROR("Can't create peer: %s\n",
- (rc == -ESHUTDOWN) ? "shutting down" :
- "too many peers");
- LIBCFS_FREE(peer, sizeof(*peer));
- } else {
- *peerp = peer;
- }
-
- return rc;
-}
-
-void
-kibnal_destroy_peer (kib_peer_t *peer)
-{
-
- LASSERT (atomic_read (&peer->ibp_refcount) == 0);
- LASSERT (peer->ibp_persistence == 0);
- LASSERT (!kibnal_peer_active(peer));
- LASSERT (!kibnal_peer_connecting(peer));
- LASSERT (list_empty (&peer->ibp_conns));
- LASSERT (list_empty (&peer->ibp_tx_queue));
-
- LIBCFS_FREE (peer, sizeof (*peer));
-
- /* NB a peer's connections keep a reference on their peer until
- * they are destroyed, so we can be assured that _all_ state to do
- * with this peer has been cleaned up when its refcount drops to
- * zero. */
- atomic_dec (&kibnal_data.kib_npeers);
-}
-
-/* the caller is responsible for accounting for the additional reference
- * that this creates */
-kib_peer_t *
-kibnal_find_peer_locked (lnet_nid_t nid)
-{
- struct list_head *peer_list = kibnal_nid2peerlist (nid);
- struct list_head *tmp;
- kib_peer_t *peer;
-
- list_for_each (tmp, peer_list) {
-
- peer = list_entry (tmp, kib_peer_t, ibp_list);
-
- LASSERT (peer->ibp_persistence != 0 ||
- kibnal_peer_connecting(peer) ||
- !list_empty (&peer->ibp_conns));
-
- if (peer->ibp_nid != nid)
- continue;
-
- CDEBUG(D_NET, "got peer %s (%d)\n",
- libcfs_nid2str(nid), atomic_read (&peer->ibp_refcount));
- return (peer);
- }
- return (NULL);
-}
-
-void
-kibnal_unlink_peer_locked (kib_peer_t *peer)
-{
- LASSERT (peer->ibp_persistence == 0);
- LASSERT (list_empty(&peer->ibp_conns));
-
- LASSERT (kibnal_peer_active(peer));
- list_del_init (&peer->ibp_list);
- /* lose peerlist's ref */
- kibnal_peer_decref(peer);
-}
-
-int
-kibnal_get_peer_info (int index, lnet_nid_t *nidp, int *persistencep)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- unsigned long flags;
- int i;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-
- list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- kibnal_peer_connecting(peer) ||
- !list_empty (&peer->ibp_conns));
-
- if (index-- > 0)
- continue;
-
- *nidp = peer->ibp_nid;
- *persistencep = peer->ibp_persistence;
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (0);
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (-ENOENT);
-}
-
-int
-kibnal_add_persistent_peer (lnet_nid_t nid)
-{
- unsigned long flags;
- kib_peer_t *peer;
- kib_peer_t *peer2;
- int rc;
-
- if (nid == LNET_NID_ANY)
- return (-EINVAL);
-
- rc = kibnal_create_peer(&peer, nid);
- if (rc != 0)
- return rc;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- /* I'm always called with a reference on kibnal_data.kib_ni
- * so shutdown can't have started */
- LASSERT (kibnal_data.kib_listener_cep != NULL);
-
- peer2 = kibnal_find_peer_locked (nid);
- if (peer2 != NULL) {
- kibnal_peer_decref (peer);
- peer = peer2;
- } else {
- /* peer table takes existing ref on peer */
- list_add_tail (&peer->ibp_list,
- kibnal_nid2peerlist (nid));
- }
-
- peer->ibp_persistence++;
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
- return (0);
-}
-
-void
-kibnal_del_peer_locked (kib_peer_t *peer)
-{
- struct list_head *ctmp;
- struct list_head *cnxt;
- kib_conn_t *conn;
-
- peer->ibp_persistence = 0;
-
- if (list_empty(&peer->ibp_conns)) {
- kibnal_unlink_peer_locked(peer);
- } else {
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, kib_conn_t, ibc_list);
-
- kibnal_close_conn_locked (conn, 0);
- }
- /* NB peer is no longer persistent; closing its last conn
- * unlinked it. */
- }
- /* NB peer now unlinked; might even be freed if the peer table had the
- * last ref on it. */
-}
-
-int
-kibnal_del_peer (lnet_nid_t nid)
-{
- unsigned long flags;
- CFS_LIST_HEAD (zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- kib_peer_t *peer;
- int lo;
- int hi;
- int i;
- int rc = -ENOENT;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
- else {
- lo = 0;
- hi = kibnal_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- kibnal_peer_connecting(peer) ||
- !list_empty (&peer->ibp_conns));
-
- if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
- continue;
-
- if (!list_empty(&peer->ibp_tx_queue)) {
- LASSERT (list_empty(&peer->ibp_conns));
-
- list_splice_init(&peer->ibp_tx_queue, &zombies);
- }
-
- kibnal_del_peer_locked (peer);
- rc = 0; /* matched something */
- }
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- kibnal_txlist_done(&zombies, -EIO);
-
- return (rc);
-}
-
-kib_conn_t *
-kibnal_get_conn_by_idx (int index)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- kib_conn_t *conn;
- struct list_head *ctmp;
- unsigned long flags;
- int i;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
- list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- kibnal_peer_connecting(peer) ||
- !list_empty (&peer->ibp_conns));
-
- list_for_each (ctmp, &peer->ibp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
- kibnal_conn_addref(conn);
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (conn);
- }
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (NULL);
-}
-
-int
-kibnal_conn_rts(kib_conn_t *conn,
- __u32 qpn, __u8 resp_res, __u8 init_depth, __u32 psn)
-{
- IB_PATH_RECORD *path = &conn->ibc_cvars->cv_path;
- IB_HANDLE qp = conn->ibc_qp;
- IB_QP_ATTRIBUTES_MODIFY modify_attr;
- FSTATUS frc;
- int rc;
-
- if (resp_res > kibnal_data.kib_hca_attrs.MaxQPResponderResources)
- resp_res = kibnal_data.kib_hca_attrs.MaxQPResponderResources;
-
- if (init_depth > kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth)
- init_depth = kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth;
-
- modify_attr = (IB_QP_ATTRIBUTES_MODIFY) {
- .RequestState = QPStateReadyToRecv,
- .RecvPSN = IBNAL_STARTING_PSN,
- .DestQPNumber = qpn,
- .ResponderResources = resp_res,
- .MinRnrTimer = UsecToRnrNakTimer(2000), /* 20 ms */
- .Attrs = (IB_QP_ATTR_RECVPSN |
- IB_QP_ATTR_DESTQPNUMBER |
- IB_QP_ATTR_RESPONDERRESOURCES |
- IB_QP_ATTR_DESTAV |
- IB_QP_ATTR_PATHMTU |
- IB_QP_ATTR_MINRNRTIMER),
- };
- GetAVFromPath(0, path, &modify_attr.PathMTU, NULL,
- &modify_attr.DestAV);
-
- frc = iba_modify_qp(qp, &modify_attr, NULL);
- if (frc != FSUCCESS) {
- CERROR("Can't set QP %s ready to receive: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- return -EIO;
- }
-
- rc = kibnal_post_receives(conn);
- if (rc != 0) {
- CERROR("Can't post receives for %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- return rc;
- }
-
- modify_attr = (IB_QP_ATTRIBUTES_MODIFY) {
- .RequestState = QPStateReadyToSend,
- .FlowControl = TRUE,
- .InitiatorDepth = init_depth,
- .SendPSN = psn,
- .LocalAckTimeout = path->PktLifeTime + 2, /* 2 or 1? */
- .RetryCount = IBNAL_RETRY,
- .RnrRetryCount = IBNAL_RNR_RETRY,
- .Attrs = (IB_QP_ATTR_FLOWCONTROL |
- IB_QP_ATTR_INITIATORDEPTH |
- IB_QP_ATTR_SENDPSN |
- IB_QP_ATTR_LOCALACKTIMEOUT |
- IB_QP_ATTR_RETRYCOUNT |
- IB_QP_ATTR_RNRRETRYCOUNT),
- };
-
- frc = iba_modify_qp(qp, &modify_attr, NULL);
- if (frc != FSUCCESS) {
- CERROR("Can't set QP %s ready to send: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- return -EIO;
- }
-
- frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL);
- if (frc != FSUCCESS) {
- CERROR ("Can't query QP %s attributes: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- return -EIO;
- }
-
- return 0;
-}
-
-kib_conn_t *
-kibnal_create_conn (lnet_nid_t nid, int proto_version)
-{
- kib_conn_t *conn;
- int i;
- int page_offset;
- int ipage;
- int rc;
- FSTATUS frc;
- union {
- IB_QP_ATTRIBUTES_CREATE qp_create;
- IB_QP_ATTRIBUTES_MODIFY qp_attr;
- } params;
-
- LIBCFS_ALLOC (conn, sizeof (*conn));
- if (conn == NULL) {
- CERROR ("Can't allocate connection for %s\n",
- libcfs_nid2str(nid));
- return (NULL);
- }
-
- /* zero flags, NULL pointers etc... */
- memset (conn, 0, sizeof (*conn));
- conn->ibc_state = IBNAL_CONN_INIT_NOTHING;
- conn->ibc_version = proto_version;
-
- INIT_LIST_HEAD (&conn->ibc_early_rxs);
- INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred);
- INIT_LIST_HEAD (&conn->ibc_tx_queue);
- INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
- INIT_LIST_HEAD (&conn->ibc_active_txs);
- spin_lock_init (&conn->ibc_lock);
-
- atomic_inc (&kibnal_data.kib_nconns);
- /* well not really, but I call destroy() on failure, which decrements */
-
- LIBCFS_ALLOC(conn->ibc_cvars, sizeof (*conn->ibc_cvars));
- if (conn->ibc_cvars == NULL) {
- CERROR ("Can't allocate connvars for %s\n",
- libcfs_nid2str(nid));
- goto failed;
- }
- memset(conn->ibc_cvars, 0, sizeof (*conn->ibc_cvars));
-
- LIBCFS_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
- if (conn->ibc_rxs == NULL) {
- CERROR("Cannot allocate RX descriptors for %s\n",
- libcfs_nid2str(nid));
- goto failed;
- }
- memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
- rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES);
- if (rc != 0) {
- CERROR("Can't allocate RX buffers for %s\n",
- libcfs_nid2str(nid));
- goto failed;
- }
-
- for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
- struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
- kib_rx_t *rx = &conn->ibc_rxs[i];
-
- rx->rx_conn = conn;
- rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
- page_offset);
-
- rx->rx_hca_msg = kibnal_data.kib_whole_mem.md_addr +
- lnet_page2phys(page) + page_offset;
-
- page_offset += IBNAL_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBNAL_RX_MSG_PAGES);
- }
- }
-
- params.qp_create = (IB_QP_ATTRIBUTES_CREATE) {
- .Type = QPTypeReliableConnected,
- .SendQDepth = (1 + IBNAL_MAX_RDMA_FRAGS) *
- (*kibnal_tunables.kib_concurrent_sends),
- .RecvQDepth = IBNAL_RX_MSGS,
- .SendDSListDepth = 1,
- .RecvDSListDepth = 1,
- .SendCQHandle = kibnal_data.kib_cq,
- .RecvCQHandle = kibnal_data.kib_cq,
- .PDHandle = kibnal_data.kib_pd,
- .SendSignaledCompletions = TRUE,
- };
- frc = iba_create_qp(kibnal_data.kib_hca, ¶ms.qp_create, NULL,
- &conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs);
- if (frc != 0) {
- CERROR ("Can't create QP %s: %d\n", libcfs_nid2str(nid), frc);
- goto failed;
- }
-
- /* Mark QP created */
- kibnal_set_conn_state(conn, IBNAL_CONN_INIT_QP);
-
- params.qp_attr = (IB_QP_ATTRIBUTES_MODIFY) {
- .RequestState = QPStateInit,
- .Attrs = (IB_QP_ATTR_PORTGUID |
- IB_QP_ATTR_PKEYINDEX |
- IB_QP_ATTR_ACCESSCONTROL),
- .PortGUID = kibnal_data.kib_port_guid,
- .PkeyIndex = 0,
- .AccessControl = {
- .s = {
- .RdmaWrite = 1,
- .RdmaRead = 1,
- },
- },
- };
- frc = iba_modify_qp(conn->ibc_qp, ¶ms.qp_attr, NULL);
- if (frc != 0) {
- CERROR ("Can't set QP %s state to INIT: %d\n",
- libcfs_nid2str(nid), frc);
- goto failed;
- }
-
- frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL);
- if (frc != FSUCCESS) {
- CERROR ("Can't query QP %s attributes: %d\n",
- libcfs_nid2str(nid), frc);
- goto failed;
- }
-
- /* 1 ref for caller */
- atomic_set (&conn->ibc_refcount, 1);
- CDEBUG(D_NET, "New conn %p\n", conn);
- return (conn);
-
- failed:
- kibnal_destroy_conn (conn);
- return (NULL);
-}
-
-void
-kibnal_destroy_conn (kib_conn_t *conn)
-{
- FSTATUS frc;
-
- LASSERT (!in_interrupt());
-
- CDEBUG (D_NET, "connection %s\n",
- (conn->ibc_peer) == NULL ? "<ANON>" :
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- LASSERT (atomic_read (&conn->ibc_refcount) == 0);
- LASSERT (list_empty(&conn->ibc_early_rxs));
- LASSERT (list_empty(&conn->ibc_tx_queue));
- LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
- LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
- LASSERT (list_empty(&conn->ibc_active_txs));
- LASSERT (conn->ibc_nsends_posted == 0);
-
- switch (conn->ibc_state) {
- case IBNAL_CONN_INIT_NOTHING:
- case IBNAL_CONN_INIT_QP:
- case IBNAL_CONN_DISCONNECTED:
- break;
-
- default:
- /* conn must either have never engaged with the CM, or have
- * completely disengaged from it */
- CERROR("Bad conn %s state %d\n",
- (conn->ibc_peer) == NULL ? "<anon>" :
- libcfs_nid2str(conn->ibc_peer->ibp_nid), conn->ibc_state);
- LBUG();
- }
-
- if (conn->ibc_cep != NULL) {
- frc = iba_cm_destroy_cep(conn->ibc_cep);
- if (frc != FSUCCESS)
- CERROR("Error destroying CEP %p: %d\n",
- conn->ibc_cep, frc);
- }
-
- if (conn->ibc_qp != NULL) {
- frc = iba_destroy_qp(conn->ibc_qp);
- if (frc != FSUCCESS)
- CERROR("Error destroying QP %p: %d\n",
- conn->ibc_qp, frc);
- }
-
- if (conn->ibc_rx_pages != NULL)
- kibnal_free_pages(conn->ibc_rx_pages);
-
- if (conn->ibc_rxs != NULL)
- LIBCFS_FREE(conn->ibc_rxs,
- IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
- if (conn->ibc_cvars != NULL)
- LIBCFS_FREE(conn->ibc_cvars, sizeof(*conn->ibc_cvars));
-
- if (conn->ibc_peer != NULL)
- kibnal_peer_decref(conn->ibc_peer);
-
- LIBCFS_FREE(conn, sizeof (*conn));
-
- atomic_dec(&kibnal_data.kib_nconns);
-}
-
-int
-kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- count++;
- kibnal_close_conn_locked (conn, why);
- }
-
- return (count);
-}
-
-int
-kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- if (conn->ibc_incarnation == incarnation)
- continue;
-
- CDEBUG(D_NET, "Closing stale conn nid:%s incarnation:"LPX64"("LPX64")\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_incarnation, incarnation);
-
- count++;
- kibnal_close_conn_locked (conn, -ESTALE);
- }
-
- return (count);
-}
-
-int
-kibnal_close_matching_conns (lnet_nid_t nid)
-{
- unsigned long flags;
- kib_peer_t *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- int count = 0;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
- else {
- lo = 0;
- hi = kibnal_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- kibnal_peer_connecting(peer) ||
- !list_empty (&peer->ibp_conns));
-
- if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
- continue;
-
- count += kibnal_close_peer_conns_locked (peer, 0);
- }
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- /* wildcards always succeed */
- if (nid == LNET_NID_ANY)
- return (0);
-
- return (count == 0 ? -ENOENT : 0);
-}
-
-int
-kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
- ENTRY;
-
- LASSERT (ni == kibnal_data.kib_ni);
-
- switch(cmd) {
- case IOC_LIBCFS_GET_PEER: {
- lnet_nid_t nid = 0;
- int share_count = 0;
-
- rc = kibnal_get_peer_info(data->ioc_count,
- &nid, &share_count);
- data->ioc_nid = nid;
- data->ioc_count = share_count;
- break;
- }
- case IOC_LIBCFS_ADD_PEER: {
- rc = kibnal_add_persistent_peer (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_DEL_PEER: {
- rc = kibnal_del_peer (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_GET_CONN: {
- kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count);
-
- if (conn == NULL)
- rc = -ENOENT;
- else {
- rc = 0;
- data->ioc_nid = conn->ibc_peer->ibp_nid;
- kibnal_conn_decref(conn);
- }
- break;
- }
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- rc = kibnal_close_matching_conns (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_REGISTER_MYNID: {
- if (ni->ni_nid == data->ioc_nid) {
- rc = 0;
- } else {
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- rc = -EINVAL;
- }
- break;
- }
- }
-
- RETURN(rc);
-}
-
-void
-kibnal_free_pages (kib_pages_t *p)
-{
- int npages = p->ibp_npages;
- int i;
-
- for (i = 0; i < npages; i++)
- if (p->ibp_pages[i] != NULL)
- __free_page(p->ibp_pages[i]);
-
- LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
-}
-
-int
-kibnal_alloc_pages (kib_pages_t **pp, int npages)
-{
- kib_pages_t *p;
- int i;
-
- LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
- if (p == NULL) {
- CERROR ("Can't allocate buffer %d\n", npages);
- return (-ENOMEM);
- }
-
- memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
- p->ibp_npages = npages;
-
- for (i = 0; i < npages; i++) {
- p->ibp_pages[i] = alloc_page (GFP_KERNEL);
- if (p->ibp_pages[i] == NULL) {
- CERROR ("Can't allocate page %d of %d\n", i, npages);
- kibnal_free_pages(p);
- return (-ENOMEM);
- }
- }
-
- *pp = p;
- return (0);
-}
-
-int
-kibnal_alloc_tx_descs (void)
-{
- int i;
-
- LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
- if (kibnal_data.kib_tx_descs == NULL)
- return -ENOMEM;
-
- memset(kibnal_data.kib_tx_descs, 0,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
-
-#if IBNAL_USE_FMR
- LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV *
- sizeof(*tx->tx_pages));
- if (tx->tx_pages == NULL)
- return -ENOMEM;
-#else
- LIBCFS_ALLOC(tx->tx_wrq,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_wrq));
- if (tx->tx_wrq == NULL)
- return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_gl,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_gl));
- if (tx->tx_gl == NULL)
- return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_rd,
- offsetof(kib_rdma_desc_t,
- rd_frags[IBNAL_MAX_RDMA_FRAGS]));
- if (tx->tx_rd == NULL)
- return -ENOMEM;
-#endif
- }
-
- return 0;
-}
-
-void
-kibnal_free_tx_descs (void)
-{
- int i;
-
- if (kibnal_data.kib_tx_descs == NULL)
- return;
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
-
-#if IBNAL_USE_FMR
- if (tx->tx_pages != NULL)
- LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV *
- sizeof(*tx->tx_pages));
-#else
- if (tx->tx_wrq != NULL)
- LIBCFS_FREE(tx->tx_wrq,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_wrq));
-
- if (tx->tx_gl != NULL)
- LIBCFS_FREE(tx->tx_gl,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_gl));
-
- if (tx->tx_rd != NULL)
- LIBCFS_FREE(tx->tx_rd,
- offsetof(kib_rdma_desc_t,
- rd_frags[IBNAL_MAX_RDMA_FRAGS]));
-#endif
- }
-
- LIBCFS_FREE(kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
-}
-
-int
-kibnal_setup_tx_descs (void)
-{
- int ipage = 0;
- int page_offset = 0;
- struct page *page;
- kib_tx_t *tx;
- int i;
- int rc;
-
- /* pre-mapped messages are not bigger than 1 page */
- CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
-
- /* No fancy arithmetic when we do the buffer calculations */
- CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
-
- rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
- IBNAL_TX_MSG_PAGES());
- if (rc != 0)
- return (rc);
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
- tx = &kibnal_data.kib_tx_descs[i];
-
-#if IBNAL_USE_FMR
- /* Allocate an FMR for this TX so it can map src/sink buffers
- * for large transfers */
-#endif
- tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
- page_offset);
-
- tx->tx_hca_msg = kibnal_data.kib_whole_mem.md_addr +
- lnet_page2phys(page) + page_offset;
-
- CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n",
- i, tx, tx->tx_msg, tx->tx_hca_msg);
-
- list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
-
- page_offset += IBNAL_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
- }
- }
-
- return (0);
-}
-
-int
-kibnal_register_all_memory(void)
-{
- /* CAVEAT EMPTOR: this assumes all physical memory is in 1 contiguous
- * chunk starting at 0 */
- struct sysinfo si;
- __u64 total;
- __u64 total2;
- __u64 roundup = (128<<20); /* round up in big chunks */
- IB_MR_PHYS_BUFFER phys;
- IB_ACCESS_CONTROL access;
- FSTATUS frc;
-
- memset(&access, 0, sizeof(access));
- access.s.MWBindable = 1;
- access.s.LocalWrite = 1;
- access.s.RdmaRead = 1;
- access.s.RdmaWrite = 1;
-
- /* XXX we don't bother with first-gen cards */
- if (kibnal_data.kib_hca_attrs.VendorId == 0xd0b7 &&
- kibnal_data.kib_hca_attrs.DeviceId == 0x3101) {
- CERROR("Can't register all memory on first generation HCAs\n");
- return -EINVAL;
- }
-
- si_meminfo(&si);
-
- CDEBUG(D_NET, "si_meminfo: %lu/%u, num_physpages %lu/%lu\n",
- si.totalram, si.mem_unit, num_physpages, PAGE_SIZE);
-
- total = ((__u64)si.totalram) * si.mem_unit;
- total2 = num_physpages * PAGE_SIZE;
- if (total < total2)
- total = total2;
-
- if (total == 0) {
- CERROR("Can't determine memory size\n");
- return -ENOMEM;
- }
-
- roundup = (128<<20);
- total = (total + (roundup - 1)) & ~(roundup - 1);
-
- phys.PhysAddr = 0;
- phys.Length = total;
-
- frc = iba_register_contig_pmr(kibnal_data.kib_hca, 0, &phys, 1, 0,
- kibnal_data.kib_pd, access,
- &kibnal_data.kib_whole_mem.md_handle,
- &kibnal_data.kib_whole_mem.md_addr,
- &kibnal_data.kib_whole_mem.md_lkey,
- &kibnal_data.kib_whole_mem.md_rkey);
-
- if (frc != FSUCCESS) {
- CERROR("registering physical memory failed: %d\n", frc);
- return -EIO;
- }
-
- CDEBUG(D_WARNING, "registered phys mem from 0("LPX64") for "LPU64"("LPU64") -> "LPX64"\n",
- phys.PhysAddr, total, phys.Length, kibnal_data.kib_whole_mem.md_addr);
-
- return 0;
-}
-
-void
-kibnal_shutdown (lnet_ni_t *ni)
-{
- int i;
- int rc;
-
- LASSERT (ni == kibnal_data.kib_ni);
- LASSERT (ni->ni_data == &kibnal_data);
-
- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- switch (kibnal_data.kib_init) {
- default:
- CERROR ("Unexpected state %d\n", kibnal_data.kib_init);
- LBUG();
-
- case IBNAL_INIT_ALL:
- /* stop accepting connections, prevent new peers and start to
- * tear down all existing ones... */
- kibnal_stop_listener(1);
-
- /* Wait for all peer state to clean up */
- i = 2;
- while (atomic_read (&kibnal_data.kib_npeers) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d peers to disconnect\n",
- atomic_read (&kibnal_data.kib_npeers));
- set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (HZ);
- }
- /* fall through */
-
- case IBNAL_INIT_CQ:
- rc = iba_destroy_cq(kibnal_data.kib_cq);
- if (rc != 0)
- CERROR ("Destroy CQ error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_TXD:
- kibnal_free_pages (kibnal_data.kib_tx_pages);
- /* fall through */
-
- case IBNAL_INIT_MD:
- rc = iba_deregister_mr(kibnal_data.kib_whole_mem.md_handle);
- if (rc != FSUCCESS)
- CERROR ("Deregister memory: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_PD:
- rc = iba_free_pd(kibnal_data.kib_pd);
- if (rc != 0)
- CERROR ("Destroy PD error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_SD:
- rc = iba_sd_deregister(kibnal_data.kib_sd);
- if (rc != 0)
- CERROR ("Deregister SD error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_PORTATTRS:
- LIBCFS_FREE(kibnal_data.kib_hca_attrs.PortAttributesList,
- kibnal_data.kib_hca_attrs.PortAttributesListSize);
- /* fall through */
-
- case IBNAL_INIT_HCA:
- rc = iba_close_ca(kibnal_data.kib_hca);
- if (rc != 0)
- CERROR ("Close HCA error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_DATA:
- LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0);
- LASSERT (kibnal_data.kib_peers != NULL);
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
- LASSERT (list_empty (&kibnal_data.kib_peers[i]));
- }
- LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
- LASSERT (list_empty (&kibnal_data.kib_connd_zombies));
- LASSERT (list_empty (&kibnal_data.kib_connd_conns));
- LASSERT (list_empty (&kibnal_data.kib_connd_peers));
-
- /* flag threads to terminate; wake and wait for them to die */
- kibnal_data.kib_shutdown = 1;
- wake_up_all (&kibnal_data.kib_sched_waitq);
- wake_up_all (&kibnal_data.kib_connd_waitq);
-
- i = 2;
- while (atomic_read (&kibnal_data.kib_nthreads) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d threads to terminate\n",
- atomic_read (&kibnal_data.kib_nthreads));
- set_current_state (TASK_INTERRUPTIBLE);
- schedule_timeout (HZ);
- }
- /* fall through */
-
- case IBNAL_INIT_NOTHING:
- break;
- }
-
- kibnal_free_tx_descs();
-
- if (kibnal_data.kib_peers != NULL)
- LIBCFS_FREE (kibnal_data.kib_peers,
- sizeof (struct list_head) *
- kibnal_data.kib_peer_hash_size);
-
- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- kibnal_data.kib_init = IBNAL_INIT_NOTHING;
- PORTAL_MODULE_UNUSE;
-}
-
-int
-kibnal_get_ipif_name(char *ifname, int ifname_size, int idx)
-{
- char *basename = *kibnal_tunables.kib_ipif_basename;
- int n = strlen(basename);
- int baseidx;
- int m;
-
- if (n == 0) { /* empty string */
- CERROR("Empty IP interface basename specified\n");
- return -EINVAL;
- }
-
- for (m = n; m > 0; m--) /* find max numeric postfix */
- if (sscanf(basename + m - 1, "%d", &baseidx) != 1)
- break;
-
- if (m == 0) /* just a number */
- m = n;
-
- if (m == n) /* no postfix */
- baseidx = 1; /* default to 1 */
-
- if (m >= ifname_size)
- m = ifname_size - 1;
-
- memcpy(ifname, basename, m); /* copy prefix name */
-
- snprintf(ifname + m, ifname_size - m, "%d", baseidx + idx);
-
- if (strlen(ifname) == ifname_size - 1) {
- CERROR("IP interface basename %s too long\n", basename);
- return -EINVAL;
- }
-
- return 0;
-}
-
-int
-kibnal_startup (lnet_ni_t *ni)
-{
- char ipif_name[32];
- __u32 ip;
- __u32 netmask;
- int up;
- int nob;
- struct timeval tv;
- IB_PORT_ATTRIBUTES *pattr;
- FSTATUS frc;
- int rc;
- __u32 n;
- int i;
-
- LASSERT (ni->ni_lnd == &the_kiblnd);
-
- /* Only 1 instance supported */
- if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) {
- CERROR ("Only 1 instance supported\n");
- return -EPERM;
- }
-
- if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) {
- CERROR ("Can't set credits(%d) > ntx(%d)\n",
- *kibnal_tunables.kib_credits,
- *kibnal_tunables.kib_ntx);
- return -EINVAL;
- }
-
- ni->ni_maxtxcredits = *kibnal_tunables.kib_credits;
- ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits;
-
- CLASSERT (LNET_MAX_INTERFACES > 1);
-
- if (ni->ni_interfaces[0] == NULL) {
- kibnal_data.kib_hca_idx = 0;
- } else {
- /* Use the HCA specified in 'networks=' */
- if (ni->ni_interfaces[1] != NULL) {
- CERROR("Multiple interfaces not supported\n");
- return -EPERM;
- }
-
- /* Parse <number> into kib_hca_idx */
- nob = strlen(ni->ni_interfaces[0]);
- if (sscanf(ni->ni_interfaces[0], "%d%n",
- &kibnal_data.kib_hca_idx, &nob) < 1 ||
- nob != strlen(ni->ni_interfaces[0])) {
- CERROR("Can't parse interface '%s'\n",
- ni->ni_interfaces[0]);
- return -EINVAL;
- }
- }
-
- rc = kibnal_get_ipif_name(ipif_name, sizeof(ipif_name),
- kibnal_data.kib_hca_idx);
- if (rc != 0)
- return rc;
-
- rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask);
- if (rc != 0) {
- CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc);
- return -ENETDOWN;
- }
-
- if (!up) {
- CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name);
- return -ENETDOWN;
- }
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
-
- ni->ni_data = &kibnal_data;
- kibnal_data.kib_ni = ni;
-
- do_gettimeofday(&tv);
- kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
- PORTAL_MODULE_USE;
-
- rwlock_init(&kibnal_data.kib_global_lock);
-
- kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
- LIBCFS_ALLOC (kibnal_data.kib_peers,
- sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
- if (kibnal_data.kib_peers == NULL) {
- goto failed;
- }
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++)
- INIT_LIST_HEAD(&kibnal_data.kib_peers[i]);
-
- spin_lock_init (&kibnal_data.kib_connd_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_conns);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies);
- init_waitqueue_head (&kibnal_data.kib_connd_waitq);
-
- spin_lock_init (&kibnal_data.kib_sched_lock);
- init_waitqueue_head (&kibnal_data.kib_sched_waitq);
-
- spin_lock_init (&kibnal_data.kib_tx_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
-
- rc = kibnal_alloc_tx_descs();
- if (rc != 0) {
- CERROR("Can't allocate tx descs\n");
- goto failed;
- }
-
- /* lists/ptrs/locks initialised */
- kibnal_data.kib_init = IBNAL_INIT_DATA;
- /*****************************************************/
-
- kibnal_data.kib_sdretry.RetryCount = *kibnal_tunables.kib_sd_retries;
- kibnal_data.kib_sdretry.Timeout = (*kibnal_tunables.kib_timeout * 1000)/
- *kibnal_tunables.kib_sd_retries;
-
- for (i = 0; i < IBNAL_N_SCHED; i++) {
- rc = kibnal_thread_start (kibnal_scheduler,
- (void *)(unsigned long)i);
- if (rc != 0) {
- CERROR("Can't spawn iib scheduler[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- rc = kibnal_thread_start (kibnal_connd, NULL);
- if (rc != 0) {
- CERROR ("Can't spawn iib connd: %d\n", rc);
- goto failed;
- }
-
- n = sizeof(kibnal_data.kib_hca_guids) /
- sizeof(kibnal_data.kib_hca_guids[0]);
- frc = iba_get_caguids(&n, kibnal_data.kib_hca_guids);
- if (frc != FSUCCESS) {
- CERROR ("Can't get HCA guids: %d\n", frc);
- goto failed;
- }
-
- if (n == 0) {
- CERROR ("No HCAs found\n");
- goto failed;
- }
-
- if (n <= kibnal_data.kib_hca_idx) {
- CERROR("Invalid HCA %d requested: (must be 0 - %d inclusive)\n",
- kibnal_data.kib_hca_idx, n - 1);
- goto failed;
- }
-
- /* Infinicon has per-HCA notification callbacks */
- frc = iba_open_ca(kibnal_data.kib_hca_guids[kibnal_data.kib_hca_idx],
- kibnal_hca_callback,
- kibnal_hca_async_callback,
- NULL,
- &kibnal_data.kib_hca);
- if (frc != FSUCCESS) {
- CERROR ("Can't open HCA[%d]: %d\n",
- kibnal_data.kib_hca_idx, frc);
- goto failed;
- }
-
- /* Channel Adapter opened */
- kibnal_data.kib_init = IBNAL_INIT_HCA;
- /*****************************************************/
-
- kibnal_data.kib_hca_attrs.PortAttributesList = NULL;
- kibnal_data.kib_hca_attrs.PortAttributesListSize = 0;
- frc = iba_query_ca(kibnal_data.kib_hca,
- &kibnal_data.kib_hca_attrs, NULL);
- if (frc != FSUCCESS) {
- CERROR ("Can't size port attrs: %d\n", frc);
- goto failed;
- }
-
- LIBCFS_ALLOC(kibnal_data.kib_hca_attrs.PortAttributesList,
- kibnal_data.kib_hca_attrs.PortAttributesListSize);
- if (kibnal_data.kib_hca_attrs.PortAttributesList == NULL)
- goto failed;
-
- /* Port attrs allocated */
- kibnal_data.kib_init = IBNAL_INIT_PORTATTRS;
- /*****************************************************/
-
- frc = iba_query_ca(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs,
- NULL);
- if (frc != FSUCCESS) {
- CERROR ("Can't get port attrs for HCA %d: %d\n",
- kibnal_data.kib_hca_idx, frc);
- goto failed;
- }
-
- for (i = 0, pattr = kibnal_data.kib_hca_attrs.PortAttributesList;
- pattr != NULL;
- i++, pattr = pattr->Next) {
- switch (pattr->PortState) {
- default:
- CERROR("Unexpected port[%d] state %d\n",
- i, pattr->PortState);
- continue;
- case PortStateDown:
- CDEBUG(D_NET, "port[%d] Down\n", i);
- continue;
- case PortStateInit:
- CDEBUG(D_NET, "port[%d] Init\n", i);
- continue;
- case PortStateArmed:
- CDEBUG(D_NET, "port[%d] Armed\n", i);
- continue;
-
- case PortStateActive:
- CDEBUG(D_NET, "port[%d] Active\n", i);
- kibnal_data.kib_port = i;
- kibnal_data.kib_port_guid = pattr->GUID;
- kibnal_data.kib_port_pkey = pattr->PkeyTable[0];
- break;
- }
- break;
- }
-
- if (pattr == NULL) {
- CERROR ("Can't find an active port\n");
- goto failed;
- }
-
- CDEBUG(D_NET, "got guid "LPX64"\n", kibnal_data.kib_port_guid);
-
- frc = iba_sd_register(&kibnal_data.kib_sd, NULL);
- if (frc != FSUCCESS) {
- CERROR ("Can't register with SD: %d\n", frc);
- goto failed;
- }
-
- /* Registered with SD OK */
- kibnal_data.kib_init = IBNAL_INIT_SD;
- /*****************************************************/
-
- frc = iba_alloc_pd(kibnal_data.kib_hca, 0, &kibnal_data.kib_pd);
- if (frc != FSUCCESS) {
- CERROR ("Can't create PD: %d\n", rc);
- goto failed;
- }
-
- /* flag PD initialised */
- kibnal_data.kib_init = IBNAL_INIT_PD;
- /*****************************************************/
-
- rc = kibnal_register_all_memory();
- if (rc != 0) {
- CERROR ("Can't register all memory\n");
- goto failed;
- }
-
- /* flag whole memory MD initialised */
- kibnal_data.kib_init = IBNAL_INIT_MD;
- /*****************************************************/
-
- rc = kibnal_setup_tx_descs();
- if (rc != 0) {
- CERROR ("Can't register tx descs: %d\n", rc);
- goto failed;
- }
-
- /* flag TX descs initialised */
- kibnal_data.kib_init = IBNAL_INIT_TXD;
- /*****************************************************/
-
- frc = iba_create_cq(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(),
- &kibnal_data.kib_cq, &kibnal_data.kib_cq,
- &n);
- if (frc != FSUCCESS) {
- CERROR ("Can't create RX CQ: %d\n", frc);
- goto failed;
- }
-
- /* flag CQ initialised */
- kibnal_data.kib_init = IBNAL_INIT_CQ;
- /*****************************************************/
-
- if (n < IBNAL_CQ_ENTRIES()) {
- CERROR ("CQ only has %d entries: %d needed\n",
- n, IBNAL_CQ_ENTRIES());
- goto failed;
- }
-
- rc = iba_rearm_cq(kibnal_data.kib_cq, CQEventSelNextWC);
- if (rc != 0) {
- CERROR ("Failed to re-arm completion queue: %d\n", rc);
- goto failed;
- }
-
- rc = kibnal_start_listener();
- if (rc != 0) {
- CERROR("Can't start listener: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- kibnal_data.kib_init = IBNAL_INIT_ALL;
- /*****************************************************/
-
- return (0);
-
- failed:
- kibnal_shutdown (ni);
- return (-ENETDOWN);
-}
-
-void __exit
-kibnal_module_fini (void)
-{
- lnet_unregister_lnd(&the_kiblnd);
- kibnal_tunables_fini();
-}
-
-int __init
-kibnal_module_init (void)
-{
- int rc;
-
- if (the_lnet.ln_ptlcompat != 0) {
- LCONSOLE_ERROR_MSG(0x12c, "IIB does not support portals "
- "compatibility mode\n");
- return -ENODEV;
- }
-
- rc = kibnal_tunables_init();
- if (rc != 0)
- return rc;
-
- lnet_register_lnd(&the_kiblnd);
-
- return 0;
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel Infinicon IB LND v1.00");
-MODULE_LICENSE("GPL");
-
-module_init(kibnal_module_init);
-module_exit(kibnal_module_fini);
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-
-#include <linux/iba/ibt.h>
-
-#define GCC_VERSION (__GNUC__ * 10000 \
- + __GNUC_MINOR__ * 100 \
- + __GNUC_PATCHLEVEL__)
-
-/* Test for GCC > 3.2.2 */
-#if GCC_VERSION <= 30202
-/* GCC 3.2.2, and presumably several versions before it, will
- * miscompile this driver. See
- * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9853. */
-#error Invalid GCC version. Must use GCC >= 3.2.3
-#endif
-
-#ifdef CONFIG_SMP
-# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */
-#else
-# define IBNAL_N_SCHED 1 /* # schedulers */
-#endif
-
-#define IBNAL_USE_FMR 0 /* map on demand v. use whole mem mapping */
-#define KIBLND_DETAILED_DEBUG 0
-
-/* tunables fixed at compile time */
-#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */
-#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */
-#define IBNAL_CREDIT_HIGHWATER 7 /* when to eagerly return credits */
-#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */
-#define IBNAL_RDMA_BASE 0x0eeb0000
-#define IBNAL_STARTING_PSN 1
-
-/* QP tunables */
-/* 7 indicates infinite retry attempts, Infinicon recommended 5 */
-#define IBNAL_RETRY 5 /* # times to retry */
-#define IBNAL_RNR_RETRY 5 /* */
-#define IBNAL_CM_RETRY 5 /* # times to retry connection */
-#define IBNAL_FLOW_CONTROL 1
-#define IBNAL_ACK_TIMEOUT 20 /* supposedly 4 secs */
-#define IBNAL_EE_FLOW 1
-#define IBNAL_LOCAL_SUB 1
-#define IBNAL_FAILOVER_ACCEPTED 0
-
-/************************/
-/* derived constants... */
-
-/* TX messages (shared by all connections) */
-#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx)
-#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE)
-#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)
-
-#if IBNAL_USE_FMR
-# define IBNAL_MAX_RDMA_FRAGS 1
-# define IBNAL_CONCURRENT_SENDS IBNAL_RX_MSGS
-#else
-# define IBNAL_MAX_RDMA_FRAGS LNET_MAX_IOV
-# define IBNAL_CONCURRENT_SENDS IBNAL_MSG_QUEUE_SIZE
-#endif
-
-/* RX messages (per connection) */
-#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE * 2)
-#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE)
-#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
-
-#define IBNAL_CQ_ENTRIES() (IBNAL_TX_MSGS() * (1 + IBNAL_MAX_RDMA_FRAGS) + \
- (IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers))
-
-typedef struct
-{
- char **kib_hca_basename; /* HCA base name */
- char **kib_ipif_basename; /* IPoIB interface base name */
- char **kib_service_name; /* global service name */
- unsigned int *kib_service_number; /* global service number */
- int *kib_min_reconnect_interval; /* min connect retry seconds... */
- int *kib_max_reconnect_interval; /* max connect retry seconds */
- int *kib_concurrent_peers; /* max # peers */
- int *kib_cksum; /* checksum kib_msg_t? */
- int *kib_timeout; /* comms timeout (seconds) */
- int *kib_keepalive; /* keepalive timeout (seconds) */
- int *kib_ntx; /* # tx descs */
- int *kib_credits; /* # concurrent sends */
- int *kib_peercredits; /* # concurrent sends to 1 peer */
- int *kib_sd_retries; /* # concurrent sends to 1 peer */
- int *kib_concurrent_sends; /* send work queue sizing */
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */
-#endif
-} kib_tunables_t;
-
-/* NB The Infinicon stack has specific typedefs for some things
- * (e.g. IB_{L,R}_KEY), that just map back to __u32 etc */
-typedef struct
-{
- int ibp_npages; /* # pages */
- struct page *ibp_pages[0];
-} kib_pages_t;
-
-typedef struct
-{
- IB_HANDLE md_handle;
- __u32 md_lkey;
- __u32 md_rkey;
- __u64 md_addr;
-} kib_md_t;
-
-typedef struct
-{
- int kib_init; /* initialisation state */
- __u64 kib_incarnation; /* which one am I */
- int kib_shutdown; /* shut down? */
- atomic_t kib_nthreads; /* # live threads */
- lnet_ni_t *kib_ni; /* _the_ iib instance */
-
- __u64 kib_port_guid; /* my GUID (lo 64 of GID)*/
- __u16 kib_port_pkey; /* my pkey, whatever that is */
- struct semaphore kib_listener_signal; /* signal completion */
- IB_HANDLE kib_listener_cep; /* connection end point */
-
- rwlock_t kib_global_lock; /* stabilize peer/conn ops */
- int kib_ready; /* CQ callback fired */
- int kib_checking_cq; /* a scheduler is checking the CQ */
-
- struct list_head *kib_peers; /* hash table of all my known peers */
- int kib_peer_hash_size; /* size of kib_peers */
- atomic_t kib_npeers; /* # peers extant */
- atomic_t kib_nconns; /* # connections extant */
-
- struct list_head kib_connd_zombies; /* connections to free */
- struct list_head kib_connd_conns; /* connections to progress */
- struct list_head kib_connd_peers; /* peers waiting for a connection */
- wait_queue_head_t kib_connd_waitq; /* connection daemon sleep here */
- spinlock_t kib_connd_lock; /* serialise */
-
- wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */
- spinlock_t kib_sched_lock; /* serialise */
-
- struct kib_tx *kib_tx_descs; /* all the tx descriptors */
- kib_pages_t *kib_tx_pages; /* premapped tx msg pages */
-
- struct list_head kib_idle_txs; /* idle tx descriptors */
- __u64 kib_next_tx_cookie; /* RDMA completion cookie */
- spinlock_t kib_tx_lock; /* serialise */
-
- IB_HANDLE kib_hca; /* The HCA */
- int kib_port; /* port on the device */
- IB_HANDLE kib_pd; /* protection domain */
- IB_HANDLE kib_sd; /* SD handle */
- IB_HANDLE kib_cq; /* completion queue */
- kib_md_t kib_whole_mem; /* whole-mem registration */
-
- int kib_hca_idx; /* my HCA number */
- uint64 kib_hca_guids[8]; /* all the HCA guids */
- IB_CA_ATTRIBUTES kib_hca_attrs; /* where to get HCA attrs */
-
- COMMAND_CONTROL_PARAMETERS kib_sdretry; /* control SD query retries */
-} kib_data_t;
-
-#define IBNAL_INIT_NOTHING 0
-#define IBNAL_INIT_DATA 1
-#define IBNAL_INIT_LIB 2
-#define IBNAL_INIT_HCA 3
-#define IBNAL_INIT_PORTATTRS 4
-#define IBNAL_INIT_SD 5
-#define IBNAL_INIT_PD 6
-#define IBNAL_INIT_MD 7
-#define IBNAL_INIT_TXD 8
-#define IBNAL_INIT_CQ 9
-#define IBNAL_INIT_ALL 10
-
-/************************************************************************
- * Wire message structs.
- * These are sent in sender's byte order (i.e. receiver flips).
- * CAVEAT EMPTOR: other structs communicated between nodes (e.g. MAD
- * private data and SM service info), is LE on the wire.
- */
-
-typedef struct kib_connparams
-{
- __u32 ibcp_queue_depth;
- __u32 ibcp_max_msg_size;
- __u32 ibcp_max_frags;
-} WIRE_ATTR kib_connparams_t;
-
-typedef struct
-{
- lnet_hdr_t ibim_hdr; /* portals header */
- char ibim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR kib_immediate_msg_t;
-
-#if IBNAL_USE_FMR
-typedef struct
-{
- __u64 rd_addr; /* IO VMA address */
- __u32 rd_nob; /* # of bytes */
- __u32 rd_key; /* remote key */
-} WIRE_ATTR kib_rdma_desc_t;
-#else
-typedef struct
-{
- __u32 rf_nob; /* # of bytes */
- __u64 rf_addr; /* remote io vaddr */
-} WIRE_ATTR kib_rdma_frag_t;
-
-typedef struct
-{
- __u32 rd_key; /* local/remote key */
- __u32 rd_nfrag; /* # fragments */
- kib_rdma_frag_t rd_frags[0]; /* buffer frags */
-} WIRE_ATTR kib_rdma_desc_t;
-#endif
-
-typedef struct
-{
- lnet_hdr_t ibprm_hdr; /* LNET header */
- __u64 ibprm_cookie; /* opaque completion cookie */
-} WIRE_ATTR kib_putreq_msg_t;
-
-typedef struct
-{
- __u64 ibpam_src_cookie; /* reflected completion cookie */
- __u64 ibpam_dst_cookie; /* opaque completion cookie */
- kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */
-} WIRE_ATTR kib_putack_msg_t;
-
-typedef struct
-{
- lnet_hdr_t ibgm_hdr; /* LNET header */
- __u64 ibgm_cookie; /* opaque completion cookie */
- kib_rdma_desc_t ibgm_rd; /* sender's sink buffer */
-} WIRE_ATTR kib_get_msg_t;
-
-typedef struct
-{
- __u64 ibcm_cookie; /* opaque completion cookie */
- __u32 ibcm_status; /* completion status */
-} WIRE_ATTR kib_completion_msg_t;
-
-typedef struct
-{
- /* First 2 fields fixed FOR ALL TIME */
- __u32 ibm_magic; /* I'm an openibnal message */
- __u16 ibm_version; /* this is my version number */
-
- __u8 ibm_type; /* msg type */
- __u8 ibm_credits; /* returned credits */
- __u32 ibm_nob; /* # bytes in whole message */
- __u32 ibm_cksum; /* checksum (0 == no checksum) */
- __u64 ibm_srcnid; /* sender's NID */
- __u64 ibm_srcstamp; /* sender's incarnation */
- __u64 ibm_dstnid; /* destination's NID */
- __u64 ibm_dststamp; /* destination's incarnation */
- __u64 ibm_seq; /* sequence number */
-
- union {
- kib_connparams_t connparams;
- kib_immediate_msg_t immediate;
- kib_putreq_msg_t putreq;
- kib_putack_msg_t putack;
- kib_get_msg_t get;
- kib_completion_msg_t completion;
- } WIRE_ATTR ibm_u;
-} WIRE_ATTR kib_msg_t;
-
-#define IBNAL_MSG_MAGIC LNET_PROTO_IIB_MAGIC /* unique magic */
-#define IBNAL_MSG_VERSION 2 /* current protocol version */
-#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 1 /* previous version */
-
-#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */
-#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */
-#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */
-#define IBNAL_MSG_IMMEDIATE 0xd1 /* immediate */
-#define IBNAL_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */
-#define IBNAL_MSG_PUT_NAK 0xd3 /* completion (sink->src) */
-#define IBNAL_MSG_PUT_ACK 0xd4 /* putack (sink->src) */
-#define IBNAL_MSG_PUT_DONE 0xd5 /* completion (src->sink) */
-#define IBNAL_MSG_GET_REQ 0xd6 /* getreq (sink->src) */
-#define IBNAL_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */
-
-/* connection rejection reasons */
-#define IBNAL_REJECT_CONN_RACE 0 /* You lost connection race */
-#define IBNAL_REJECT_NO_RESOURCES 1 /* Out of memory/conns etc */
-#define IBNAL_REJECT_FATAL 2 /* Anything else */
-
-/***********************************************************************/
-
-typedef struct kib_rx /* receive message */
-{
- struct list_head rx_list; /* queue for attention */
- struct kib_conn *rx_conn; /* owning conn */
- int rx_nob; /* # bytes received (-1 while posted) */
- __u64 rx_hca_msg; /* pre-mapped buffer (hca vaddr) */
- kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */
- IB_WORK_REQ2 rx_wrq;
- IB_LOCAL_DATASEGMENT rx_gl; /* and its memory */
-} kib_rx_t;
-
-typedef struct kib_tx /* transmit message */
-{
- struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */
- struct kib_conn *tx_conn; /* owning conn */
- int tx_mapped; /* mapped for RDMA? */
- int tx_sending; /* # tx callbacks outstanding */
- int tx_queued; /* queued for sending */
- int tx_waiting; /* waiting for peer */
- int tx_status; /* completion status */
- unsigned long tx_deadline; /* completion deadline */
- __u64 tx_cookie; /* completion cookie */
- lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */
- kib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */
- __u64 tx_hca_msg; /* pre-mapped buffer (HCA vaddr) */
- int tx_nwrq; /* # send work items */
-#if IBNAL_USE_FMR
- IB_WORK_REQ2 tx_wrq[2]; /* send work items... */
- IB_LOCAL_DATASEGMENT tx_gl[2]; /* ...and their memory */
- kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */
- kib_md_t tx_md; /* mapping */
- __u64 *tx_pages; /* page phys addrs */
-#else
- IB_WORK_REQ2 *tx_wrq; /* send work items... */
- IB_LOCAL_DATASEGMENT *tx_gl; /* ...and their memory */
- kib_rdma_desc_t *tx_rd; /* rdma descriptor (src buffers) */
-#endif
-} kib_tx_t;
-
-typedef struct
-{
- /* scratchpad during connection establishment */
- IB_QP_ATTRIBUTES_QUERY cv_qpattrs;
- QUERY cv_query;
- IB_SERVICE_RECORD cv_svcrec;
- IB_PATH_RECORD cv_path;
- CM_CONN_INFO cv_cmci;
-} kib_connvars_t;
-
-typedef struct kib_conn
-{
- struct kib_peer *ibc_peer; /* owning peer */
- struct list_head ibc_list; /* stash on peer's conn list */
- __u64 ibc_incarnation; /* which instance of the peer */
- __u64 ibc_txseq; /* tx sequence number */
- __u64 ibc_rxseq; /* rx sequence number */
- __u32 ibc_version; /* peer protocol version */
- atomic_t ibc_refcount; /* # users */
- int ibc_state; /* what's happening */
- int ibc_nsends_posted; /* # uncompleted sends */
- int ibc_credits; /* # credits I have */
- int ibc_outstanding_credits; /* # credits to return */
- int ibc_reserved_credits; /* # credits for ACK/DONE msgs */
- unsigned long ibc_last_send; /* time of last send */
- struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */
- struct list_head ibc_tx_queue_nocred; /* sends that don't need a cred */
- struct list_head ibc_tx_queue_rsrvd; /* sends that need a reserved cred */
- struct list_head ibc_tx_queue; /* send queue */
- struct list_head ibc_active_txs; /* active tx awaiting completion */
- spinlock_t ibc_lock; /* serialise */
- kib_rx_t *ibc_rxs; /* the rx descs */
- kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */
- IB_HANDLE ibc_qp; /* queue pair */
- IB_HANDLE ibc_cep; /* CM endpoint */
- kib_connvars_t *ibc_cvars; /* connection scratchpad */
-} kib_conn_t;
-
-#define IBNAL_CONN_INIT_NOTHING 0 /* initial state */
-#define IBNAL_CONN_INIT_QP 1 /* ibc_qp set up */
-#define IBNAL_CONN_CONNECTING 2 /* started to connect */
-#define IBNAL_CONN_ESTABLISHED 3 /* connection established */
-#define IBNAL_CONN_DISCONNECTING 4 /* to send disconnect req */
-#define IBNAL_CONN_DISCONNECTED 5 /* no more QP or CM traffic */
-
-/* types of connection */
-#define IBNAL_CONN_ACTIVE 0 /* active connect */
-#define IBNAL_CONN_PASSIVE 1 /* passive connect */
-#define IBNAL_CONN_WAITING 2 /* waiting for connect */
-
-typedef struct kib_peer
-{
- struct list_head ibp_list; /* stash on global peer list */
- struct list_head ibp_connd_list; /* schedule on kib_connd_peers */
- lnet_nid_t ibp_nid; /* who's on the other end(s) */
- atomic_t ibp_refcount; /* # users */
- int ibp_persistence; /* "known" peer refs */
- int ibp_version; /* protocol version */
- struct list_head ibp_conns; /* all active connections */
- struct list_head ibp_tx_queue; /* msgs waiting for a conn */
- int ibp_connecting; /* active connects in progress */
- int ibp_accepting; /* passive connects in progress */
- int ibp_passivewait; /* waiting for peer to connect */
- unsigned long ibp_passivewait_deadline; /* when passive wait must complete */
- unsigned long ibp_reconnect_time; /* when reconnect may be attempted */
- unsigned long ibp_reconnect_interval; /* exponential backoff */
- int ibp_error; /* errno on closing this peer */
- cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */
-} kib_peer_t;
-
-
-extern kib_data_t kibnal_data;
-extern kib_tunables_t kibnal_tunables;
-
-/******************************************************************************/
-
-/* these are purposely avoiding using local vars so they don't increase
- * stack consumption. */
-
-#define kibnal_conn_addref(conn) \
-do { \
- CDEBUG(D_NET, "conn[%p] (%d)++\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \
- atomic_inc(&(conn)->ibc_refcount); \
-} while (0)
-
-#define kibnal_conn_decref(conn) \
-do { \
- unsigned long flags; \
- \
- CDEBUG(D_NET, "conn[%p] (%d)--\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \
- if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); \
- list_add_tail(&(conn)->ibc_list, \
- &kibnal_data.kib_connd_zombies); \
- wake_up(&kibnal_data.kib_connd_waitq); \
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); \
- } \
-} while (0)
-
-#define kibnal_peer_addref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \
- atomic_inc(&(peer)->ibp_refcount); \
-} while (0)
-
-#define kibnal_peer_decref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \
- if (atomic_dec_and_test(&(peer)->ibp_refcount)) \
- kibnal_destroy_peer(peer); \
-} while (0)
-
-/******************************************************************************/
-
-static inline struct list_head *
-kibnal_nid2peerlist (lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size;
-
- return (&kibnal_data.kib_peers [hash]);
-}
-
-static inline int
-kibnal_peer_active(kib_peer_t *peer)
-{
- /* Am I in the peer hash table? */
- return (!list_empty(&peer->ibp_list));
-}
-
-static inline int
-kibnal_peer_connecting(kib_peer_t *peer)
-{
- /* Am I expecting a connection to materialise? */
- return (peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- peer->ibp_passivewait);
-}
-
-static inline void
-kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
-{
- struct list_head *q;
-
- LASSERT (tx->tx_nwrq > 0); /* work items set up */
- LASSERT (!tx->tx_queued); /* not queued for sending already */
-
- tx->tx_queued = 1;
- tx->tx_deadline = jiffies + (*kibnal_tunables.kib_timeout * HZ);
-
- if (tx->tx_conn == NULL) {
- kibnal_conn_addref(conn);
- tx->tx_conn = conn;
- LASSERT (tx->tx_msg->ibm_type != IBNAL_MSG_PUT_DONE);
- } else {
- LASSERT (tx->tx_conn == conn);
- LASSERT (tx->tx_msg->ibm_type == IBNAL_MSG_PUT_DONE);
- }
-
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- /* All messages have simple credit control */
- q = &conn->ibc_tx_queue;
- } else {
- LASSERT (conn->ibc_version == IBNAL_MSG_VERSION);
-
- switch (tx->tx_msg->ibm_type) {
- case IBNAL_MSG_PUT_REQ:
- case IBNAL_MSG_GET_REQ:
- /* RDMA request: reserve a buffer for the RDMA reply
- * before sending */
- q = &conn->ibc_tx_queue_rsrvd;
- break;
-
- case IBNAL_MSG_PUT_NAK:
- case IBNAL_MSG_PUT_ACK:
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- /* RDMA reply/completion: no credits; peer has reserved
- * a reply buffer */
- q = &conn->ibc_tx_queue_nocred;
- break;
-
- case IBNAL_MSG_NOOP:
- case IBNAL_MSG_IMMEDIATE:
- /* Otherwise: consume a credit before sending */
- q = &conn->ibc_tx_queue;
- break;
-
- default:
- LBUG();
- q = NULL;
- }
- }
-
- list_add_tail(&tx->tx_list, q);
-}
-
-static inline int
-kibnal_send_keepalive(kib_conn_t *conn)
-{
- return (*kibnal_tunables.kib_keepalive > 0) &&
- time_after(jiffies, conn->ibc_last_send +
- *kibnal_tunables.kib_keepalive*HZ);
-}
-
-#define KIBNAL_SERVICE_KEY_MASK (IB_SERVICE_RECORD_COMP_SERVICENAME | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_1 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_2 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_3 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_4 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_5 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_6 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_7 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_8)
-
-static inline __u64*
-kibnal_service_nid_field(IB_SERVICE_RECORD *srv)
-{
- /* must be consistent with KIBNAL_SERVICE_KEY_MASK */
- return (__u64 *)srv->ServiceData8;
-}
-
-static inline void
-kibnal_set_service_keys(IB_SERVICE_RECORD *srv, lnet_nid_t nid)
-{
- char *svc_name = *kibnal_tunables.kib_service_name;
-
- LASSERT (strlen(svc_name) < sizeof(srv->ServiceName));
- memset (srv->ServiceName, 0, sizeof(srv->ServiceName));
- strcpy (srv->ServiceName, svc_name);
-
- *kibnal_service_nid_field(srv) = cpu_to_le64(nid);
-}
-
-/* CAVEAT EMPTOR: We rely on tx/rx descriptor alignment to allow us to use the
- * lowest 2 bits of the work request id to stash the work item type (the op
- * field is not valid when the wc completes in error). */
-
-#define IBNAL_WID_TX 0
-#define IBNAL_WID_RX 1
-#define IBNAL_WID_RDMA 2
-#define IBNAL_WID_MASK 3UL
-
-static inline __u64
-kibnal_ptr2wreqid (void *ptr, int type)
-{
- unsigned long lptr = (unsigned long)ptr;
-
- LASSERT ((lptr & IBNAL_WID_MASK) == 0);
- LASSERT ((type & ~IBNAL_WID_MASK) == 0);
- return (__u64)(lptr | type);
-}
-
-static inline void *
-kibnal_wreqid2ptr (__u64 wreqid)
-{
- return (void *)(((unsigned long)wreqid) & ~IBNAL_WID_MASK);
-}
-
-static inline int
-kibnal_wreqid2type (__u64 wreqid)
-{
- return (wreqid & IBNAL_WID_MASK);
-}
-
-static inline void
-kibnal_set_conn_state (kib_conn_t *conn, int state)
-{
- CDEBUG(D_NET,"%p state %d\n", conn, state);
- conn->ibc_state = state;
- mb();
-}
-
-#if IBNAL_USE_FMR
-
-static inline int
-kibnal_rd_size (kib_rdma_desc_t *rd)
-{
- return rd->rd_nob;
-}
-
-#else
-static inline int
-kibnal_rd_size (kib_rdma_desc_t *rd)
-{
- int i;
- int size;
-
- for (i = size = 0; i < rd->rd_nfrag; i++)
- size += rd->rd_frags[i].rf_nob;
-
- return size;
-}
-#endif
-
-int kibnal_startup (lnet_ni_t *ni);
-void kibnal_shutdown (lnet_ni_t *ni);
-int kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int kibnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kibnal_eager_recv (lnet_ni_t *ni, void *private,
- lnet_msg_t *lntmsg, void **new_private);
-int kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob);
-void kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
- lnet_nid_t dstnid, __u64 dststamp, __u64 seq);
-void kibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob, int type,
- lnet_nid_t dstnid, __u64 dststamp);
-int kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob);
-IB_HANDLE kibnal_create_cep(lnet_nid_t nid);
-int kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid);
-void kibnal_destroy_peer (kib_peer_t *peer);
-kib_peer_t *kibnal_find_peer_locked (lnet_nid_t nid);
-int kibnal_del_peer (lnet_nid_t nid);
-void kibnal_peer_alive (kib_peer_t *peer);
-void kibnal_unlink_peer_locked (kib_peer_t *peer);
-int kibnal_add_persistent_peer (lnet_nid_t nid);
-int kibnal_close_stale_conns_locked (kib_peer_t *peer,
- __u64 incarnation);
-int kibnal_conn_rts(kib_conn_t *conn,
- __u32 qpn, __u8 resp_res, __u8 init_depth, __u32 psn);
-kib_conn_t *kibnal_create_conn (lnet_nid_t nid, int proto_version);
-void kibnal_destroy_conn (kib_conn_t *conn);
-void kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg);
-int kibnal_alloc_pages (kib_pages_t **pp, int npages);
-void kibnal_free_pages (kib_pages_t *p);
-void kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn);
-void kibnal_txlist_done (struct list_head *txlist, int status);
-int kibnal_post_receives (kib_conn_t *conn);
-int kibnal_init_rdma (kib_tx_t *tx, int type, int nob,
- kib_rdma_desc_t *dstrd, __u64 dstcookie);
-void kibnal_check_sends (kib_conn_t *conn);
-void kibnal_close_conn_locked (kib_conn_t *conn, int error);
-int kibnal_thread_start (int (*fn)(void *arg), void *arg);
-int kibnal_scheduler(void *arg);
-int kibnal_connd (void *arg);
-void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob);
-void kibnal_close_conn (kib_conn_t *conn, int why);
-void kibnal_start_active_rdma (int type, int status,
- kib_rx_t *rx, lnet_msg_t *lntmsg,
- unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int nob);
-void kibnal_hca_async_callback (void *hca_arg, IB_EVENT_RECORD *ev);
-void kibnal_hca_callback (void *hca_arg, void *cq_arg);
-int kibnal_tunables_init (void);
-void kibnal_tunables_fini (void);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "iiblnd.h"
-
-void
-hexdump(char *string, void *ptr, int len)
-{
- unsigned char *c = ptr;
- int i;
-
- return;
-
- if (len < 0 || len > 2048) {
- printk("XXX what the hell? %d\n",len);
- return;
- }
-
- printk("%d bytes of '%s' from 0x%p\n", len, string, ptr);
-
- for (i = 0; i < len;) {
- printk("%02x",*(c++));
- i++;
- if (!(i & 15)) {
- printk("\n");
- } else if (!(i&1)) {
- printk(" ");
- }
- }
-
- if(len & 15) {
- printk("\n");
- }
-}
-
-void
-kibnal_tx_done (kib_tx_t *tx)
-{
- lnet_msg_t *lntmsg[2];
- int rc = tx->tx_status;
- int i;
-
- LASSERT (!in_interrupt());
- LASSERT (!tx->tx_queued); /* mustn't be queued for sending */
- LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */
- LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */
-
-#if IBNAL_USE_FMR
- /* Handle unmapping if required */
-#endif
- /* tx may have up to 2 lnet msgs to finalise */
- lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
- lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
-
- if (tx->tx_conn != NULL) {
- kibnal_conn_decref(tx->tx_conn);
- tx->tx_conn = NULL;
- }
-
- tx->tx_nwrq = 0;
- tx->tx_status = 0;
-
- spin_lock(&kibnal_data.kib_tx_lock);
-
- list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
-
- spin_unlock(&kibnal_data.kib_tx_lock);
-
- /* delay finalize until my descs have been freed */
- for (i = 0; i < 2; i++) {
- if (lntmsg[i] == NULL)
- continue;
-
- lnet_finalize (kibnal_data.kib_ni, lntmsg[i], rc);
- }
-}
-
-kib_tx_t *
-kibnal_get_idle_tx (void)
-{
- kib_tx_t *tx;
-
- spin_lock(&kibnal_data.kib_tx_lock);
-
- if (list_empty (&kibnal_data.kib_idle_txs)) {
- spin_unlock(&kibnal_data.kib_tx_lock);
- return NULL;
- }
-
- tx = list_entry (kibnal_data.kib_idle_txs.next, kib_tx_t, tx_list);
- list_del (&tx->tx_list);
-
- /* Allocate a new completion cookie. It might not be needed,
- * but we've got a lock right now and we're unlikely to
- * wrap... */
- tx->tx_cookie = kibnal_data.kib_next_tx_cookie++;
-
- spin_unlock(&kibnal_data.kib_tx_lock);
-
- LASSERT (tx->tx_nwrq == 0);
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_sending == 0);
- LASSERT (!tx->tx_waiting);
- LASSERT (tx->tx_status == 0);
- LASSERT (tx->tx_conn == NULL);
- LASSERT (tx->tx_lntmsg[0] == NULL);
- LASSERT (tx->tx_lntmsg[1] == NULL);
-
- return tx;
-}
-
-int
-kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit)
-{
- kib_conn_t *conn = rx->rx_conn;
- int rc = 0;
- FSTATUS frc;
-
- LASSERT (!in_interrupt());
- /* old peers don't reserve rxs for RDMA replies */
- LASSERT (!rsrvd_credit ||
- conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
-
- rx->rx_gl = (IB_LOCAL_DATASEGMENT) {
- .Address = rx->rx_hca_msg,
- .Lkey = kibnal_data.kib_whole_mem.md_lkey,
- .Length = IBNAL_MSG_SIZE,
- };
-
- rx->rx_wrq = (IB_WORK_REQ2) {
- .Next = NULL,
- .WorkReqId = kibnal_ptr2wreqid(rx, IBNAL_WID_RX),
- .MessageLen = IBNAL_MSG_SIZE,
- .DSList = &rx->rx_gl,
- .DSListDepth = 1,
- .Operation = WROpRecv,
- };
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_CONNECTING);
- LASSERT (rx->rx_nob >= 0); /* not posted */
-
- CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n",
- rx->rx_wrq.DSList->Length,
- rx->rx_wrq.DSList->Lkey,
- rx->rx_wrq.DSList->Address);
-
- if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) {
- /* No more posts for this rx; so lose its ref */
- kibnal_conn_decref(conn);
- return 0;
- }
-
- rx->rx_nob = -1; /* flag posted */
- mb();
-
- frc = iba_post_recv2(conn->ibc_qp, &rx->rx_wrq, NULL);
- if (frc == FSUCCESS) {
- if (credit || rsrvd_credit) {
- spin_lock(&conn->ibc_lock);
-
- if (credit)
- conn->ibc_outstanding_credits++;
- if (rsrvd_credit)
- conn->ibc_reserved_credits++;
-
- spin_unlock(&conn->ibc_lock);
-
- kibnal_check_sends(conn);
- }
- return 0;
- }
-
- CERROR ("post rx -> %s failed %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- rc = -EIO;
- kibnal_close_conn(rx->rx_conn, rc);
- /* No more posts for this rx; so lose its ref */
- kibnal_conn_decref(conn);
- return rc;
-}
-
-int
-kibnal_post_receives (kib_conn_t *conn)
-{
- int i;
- int rc;
-
- LASSERT (conn->ibc_state == IBNAL_CONN_CONNECTING);
-
- for (i = 0; i < IBNAL_RX_MSGS; i++) {
- /* +1 ref for rx desc. This ref remains until kibnal_post_rx
- * fails (i.e. actual failure or we're disconnecting) */
- kibnal_conn_addref(conn);
- rc = kibnal_post_rx (&conn->ibc_rxs[i], 0, 0);
- if (rc != 0)
- return rc;
- }
-
- return 0;
-}
-
-kib_tx_t *
-kibnal_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie)
-{
- struct list_head *tmp;
-
- list_for_each(tmp, &conn->ibc_active_txs) {
- kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
-
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_sending != 0 || tx->tx_waiting);
-
- if (tx->tx_cookie != cookie)
- continue;
-
- if (tx->tx_waiting &&
- tx->tx_msg->ibm_type == txtype)
- return tx;
-
- CWARN("Bad completion: %swaiting, type %x (wanted %x)\n",
- tx->tx_waiting ? "" : "NOT ",
- tx->tx_msg->ibm_type, txtype);
- }
- return NULL;
-}
-
-void
-kibnal_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie)
-{
- kib_tx_t *tx;
- int idle;
-
- spin_lock(&conn->ibc_lock);
-
- tx = kibnal_find_waiting_tx_locked(conn, txtype, cookie);
- if (tx == NULL) {
- spin_unlock(&conn->ibc_lock);
-
- CWARN("Unmatched completion type %x cookie "LPX64" from %s\n",
- txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_close_conn (conn, -EPROTO);
- return;
- }
-
- if (tx->tx_status == 0) { /* success so far */
- if (status < 0) { /* failed? */
- tx->tx_status = status;
- } else if (txtype == IBNAL_MSG_GET_REQ) {
- lnet_set_reply_msg_len(kibnal_data.kib_ni,
- tx->tx_lntmsg[1], status);
- }
- }
-
- tx->tx_waiting = 0;
-
- idle = !tx->tx_queued && (tx->tx_sending == 0);
- if (idle)
- list_del(&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kibnal_tx_done(tx);
-}
-
-void
-kibnal_send_completion (kib_conn_t *conn, int type, int status, __u64 cookie)
-{
- kib_tx_t *tx = kibnal_get_idle_tx();
-
- if (tx == NULL) {
- CERROR("Can't get tx for completion %x for %s\n",
- type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
- }
-
- tx->tx_msg->ibm_u.completion.ibcm_status = status;
- tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
- kibnal_init_tx_msg(tx, type, sizeof(kib_completion_msg_t));
-
- kibnal_queue_tx(tx, conn);
-}
-
-void
-kibnal_handle_rx (kib_rx_t *rx)
-{
- kib_msg_t *msg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- int credits = msg->ibm_credits;
- kib_tx_t *tx;
- int rc = 0;
- int repost = 1;
- int rsrvd_credit = 0;
- int rc2;
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- CDEBUG (D_NET, "Received %x[%d] from %s\n",
- msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- if (credits != 0) {
- /* Have I received credits that will let me send? */
- spin_lock(&conn->ibc_lock);
- conn->ibc_credits += credits;
- spin_unlock(&conn->ibc_lock);
-
- kibnal_check_sends(conn);
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Bad IBNAL message type %x from %s\n",
- msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- rc = -EPROTO;
- break;
-
- case IBNAL_MSG_NOOP:
- break;
-
- case IBNAL_MSG_IMMEDIATE:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr,
- msg->ibm_srcnid, rx, 0);
- repost = rc < 0; /* repost on error */
- break;
-
- case IBNAL_MSG_PUT_REQ:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.putreq.ibprm_hdr,
- msg->ibm_srcnid, rx, 1);
- repost = rc < 0; /* repost on error */
- break;
-
- case IBNAL_MSG_PUT_NAK:
- rsrvd_credit = 1; /* rdma reply (was pre-reserved) */
-
- CWARN ("PUT_NACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_handle_completion(conn, IBNAL_MSG_PUT_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBNAL_MSG_PUT_ACK:
- rsrvd_credit = 1; /* rdma reply (was pre-reserved) */
-
- spin_lock(&conn->ibc_lock);
- tx = kibnal_find_waiting_tx_locked(conn, IBNAL_MSG_PUT_REQ,
- msg->ibm_u.putack.ibpam_src_cookie);
- if (tx != NULL)
- list_del(&tx->tx_list);
- spin_unlock(&conn->ibc_lock);
-
- if (tx == NULL) {
- CERROR("Unmatched PUT_ACK from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- rc = -EPROTO;
- break;
- }
-
- LASSERT (tx->tx_waiting);
- /* CAVEAT EMPTOR: I could be racing with tx_complete, but...
- * (a) I can overwrite tx_msg since my peer has received it!
- * (b) tx_waiting set tells tx_complete() it's not done. */
-
- tx->tx_nwrq = 0; /* overwrite PUT_REQ */
-
- rc2 = kibnal_init_rdma(tx, IBNAL_MSG_PUT_DONE,
- kibnal_rd_size(&msg->ibm_u.putack.ibpam_rd),
- &msg->ibm_u.putack.ibpam_rd,
- msg->ibm_u.putack.ibpam_dst_cookie);
- if (rc2 < 0)
- CERROR("Can't setup rdma for PUT to %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2);
-
- spin_lock(&conn->ibc_lock);
- if (tx->tx_status == 0 && rc2 < 0)
- tx->tx_status = rc2;
- tx->tx_waiting = 0; /* clear waiting and queue atomically */
- kibnal_queue_tx_locked(tx, conn);
- spin_unlock(&conn->ibc_lock);
- break;
-
- case IBNAL_MSG_PUT_DONE:
- /* This buffer was pre-reserved by not returning the credit
- * when the PUT_REQ's buffer was reposted, so I just return it
- * now */
- kibnal_handle_completion(conn, IBNAL_MSG_PUT_ACK,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBNAL_MSG_GET_REQ:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.get.ibgm_hdr,
- msg->ibm_srcnid, rx, 1);
- repost = rc < 0; /* repost on error */
- break;
-
- case IBNAL_MSG_GET_DONE:
- rsrvd_credit = 1; /* rdma reply (was pre-reserved) */
-
- kibnal_handle_completion(conn, IBNAL_MSG_GET_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
- }
-
- if (rc < 0) /* protocol error */
- kibnal_close_conn(conn, rc);
-
- if (repost) {
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD)
- rsrvd_credit = 0; /* peer isn't pre-reserving */
-
- kibnal_post_rx(rx, !rsrvd_credit, rsrvd_credit);
- }
-}
-
-void
-kibnal_rx_complete (IB_WORK_COMPLETION *wc, __u64 rxseq)
-{
- kib_rx_t *rx = (kib_rx_t *)kibnal_wreqid2ptr(wc->WorkReqId);
- int nob = wc->Length;
- kib_msg_t *msg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- unsigned long flags;
- int rc;
- int err = -EIO;
-
- LASSERT (rx->rx_nob < 0); /* was posted */
- rx->rx_nob = 0; /* isn't now */
- mb();
-
- /* receives complete with error in any case after we've started
- * disconnecting */
- if (conn->ibc_state > IBNAL_CONN_ESTABLISHED)
- goto ignore;
-
- if (wc->Status != WRStatusSuccess) {
- CERROR("Rx from %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), wc->Status);
- goto failed;
- }
-
- rc = kibnal_unpack_msg(msg, conn->ibc_version, nob);
- if (rc != 0) {
- CERROR ("Error %d unpacking rx from %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- goto failed;
- }
-
- rx->rx_nob = nob; /* Now I know nob > 0 */
- mb();
-
- if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
- msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid ||
- msg->ibm_srcstamp != conn->ibc_incarnation ||
- msg->ibm_dststamp != kibnal_data.kib_incarnation) {
- CERROR ("Stale rx from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- err = -ESTALE;
- goto failed;
- }
-
- if (msg->ibm_seq != rxseq) {
- CERROR ("Out-of-sequence rx from %s"
- ": got "LPD64" but expected "LPD64"\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- msg->ibm_seq, rxseq);
- goto failed;
- }
-
- /* set time last known alive */
- kibnal_peer_alive(conn->ibc_peer);
-
- /* racing with connection establishment/teardown! */
-
- if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) {
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- /* must check holding global lock to eliminate race */
- if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) {
- list_add_tail(&rx->rx_list, &conn->ibc_early_rxs);
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return;
- }
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- }
- kibnal_handle_rx(rx);
- return;
-
- failed:
- kibnal_close_conn(conn, err);
- ignore:
- /* Don't re-post rx & drop its ref on conn */
- kibnal_conn_decref(conn);
-}
-
-struct page *
-kibnal_kvaddr_to_page (unsigned long vaddr)
-{
- struct page *page;
-
- if (vaddr >= VMALLOC_START &&
- vaddr < VMALLOC_END) {
- page = vmalloc_to_page ((void *)vaddr);
- LASSERT (page != NULL);
- return page;
- }
-#ifdef CONFIG_HIGHMEM
- if (vaddr >= PKMAP_BASE &&
- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
- /* No highmem pages only used for bulk (kiov) I/O */
- CERROR("find page for address in highmem\n");
- LBUG();
- }
-#endif
- page = virt_to_page (vaddr);
- LASSERT (page != NULL);
- return page;
-}
-
-#if !IBNAL_USE_FMR
-int
-kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page,
- unsigned long page_offset, unsigned long len)
-{
- kib_rdma_frag_t *frag = &rd->rd_frags[rd->rd_nfrag];
-
- if (rd->rd_nfrag >= IBNAL_MAX_RDMA_FRAGS) {
- CERROR ("Too many RDMA fragments\n");
- return -EMSGSIZE;
- }
-
- if (active) {
- if (rd->rd_nfrag == 0)
- rd->rd_key = kibnal_data.kib_whole_mem.md_lkey;
- } else {
- if (rd->rd_nfrag == 0)
- rd->rd_key = kibnal_data.kib_whole_mem.md_rkey;
- }
-
- frag->rf_nob = len;
- frag->rf_addr = kibnal_data.kib_whole_mem.md_addr +
- lnet_page2phys(page) + page_offset;
-
- CDEBUG(D_NET,"map key %x frag [%d]["LPX64" for %d]\n",
- rd->rd_key, rd->rd_nfrag, frag->rf_addr, frag->rf_nob);
-
- rd->rd_nfrag++;
- return 0;
-}
-
-int
-kibnal_setup_rd_iov(kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- unsigned int niov, struct iovec *iov, int offset, int nob)
-
-{
- int fragnob;
- int rc;
- unsigned long vaddr;
- struct page *page;
- int page_offset;
-
- LASSERT (nob > 0);
- LASSERT (niov > 0);
- LASSERT ((rd != tx->tx_rd) == !active);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- rd->rd_nfrag = 0;
- do {
- LASSERT (niov > 0);
-
- vaddr = ((unsigned long)iov->iov_base) + offset;
- page_offset = vaddr & (PAGE_SIZE - 1);
- page = kibnal_kvaddr_to_page(vaddr);
- if (page == NULL) {
- CERROR ("Can't find page\n");
- return -EFAULT;
- }
-
- fragnob = min((int)(iov->iov_len - offset), nob);
- fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
-
- rc = kibnal_append_rdfrag(rd, active, page,
- page_offset, fragnob);
- if (rc != 0)
- return rc;
-
- if (offset + fragnob < iov->iov_len) {
- offset += fragnob;
- } else {
- offset = 0;
- iov++;
- niov--;
- }
- nob -= fragnob;
- } while (nob > 0);
-
- return 0;
-}
-
-int
-kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
-{
- int fragnob;
- int rc;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT (nob > 0);
- LASSERT (nkiov > 0);
- LASSERT ((rd != tx->tx_rd) == !active);
-
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- nkiov--;
- kiov++;
- LASSERT (nkiov > 0);
- }
-
- rd->rd_nfrag = 0;
- do {
- LASSERT (nkiov > 0);
- fragnob = min((int)(kiov->kiov_len - offset), nob);
-
- rc = kibnal_append_rdfrag(rd, active, kiov->kiov_page,
- kiov->kiov_offset + offset,
- fragnob);
- if (rc != 0)
- return rc;
-
- offset = 0;
- kiov++;
- nkiov--;
- nob -= fragnob;
- } while (nob > 0);
-
- return 0;
-}
-#else
-int
-kibnal_map_tx (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- int npages, unsigned long page_offset, int nob)
-{
- IB_ACCESS_CONTROL access = {0,};
- FSTATUS frc;
-
- LASSERT ((rd != tx->tx_rd) == !active);
- LASSERT (!tx->tx_md.md_active);
- LASSERT (tx->tx_md.md_fmrcount > 0);
- LASSERT (page_offset < PAGE_SIZE);
- LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT)));
- LASSERT (npages <= LNET_MAX_IOV);
-
- if (!active) {
- // access.s.MWBindable = 1;
- access.s.LocalWrite = 1;
- access.s.RdmaWrite = 1;
- }
-
- /* Map the memory described by tx->tx_pages
- frc = iibt_register_physical_memory(kibnal_data.kib_hca,
- IBNAL_RDMA_BASE,
- tx->tx_pages, npages,
- page_offset,
- kibnal_data.kib_pd,
- access,
- &tx->tx_md.md_handle,
- &tx->tx_md.md_addr,
- &tx->tx_md.md_lkey,
- &tx->tx_md.md_rkey);
- */
- return -EINVAL;
-}
-
-int
-kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- unsigned int niov, struct iovec *iov, int offset, int nob)
-
-{
- int resid;
- int fragnob;
- struct page *page;
- int npages;
- unsigned long page_offset;
- unsigned long vaddr;
-
- LASSERT (nob > 0);
- LASSERT (niov > 0);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- if (nob > iov->iov_len - offset) {
- CERROR ("Can't map multiple vaddr fragments\n");
- return (-EMSGSIZE);
- }
-
- vaddr = ((unsigned long)iov->iov_base) + offset;
-
- page_offset = vaddr & (PAGE_SIZE - 1);
- resid = nob;
- npages = 0;
-
- do {
- LASSERT (npages < LNET_MAX_IOV);
-
- page = kibnal_kvaddr_to_page(vaddr);
- if (page == NULL) {
- CERROR("Can't find page for %lu\n", vaddr);
- return -EFAULT;
- }
-
- tx->tx_pages[npages++] = lnet_page2phys(page);
-
- fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1));
- vaddr += fragnob;
- resid -= fragnob;
-
- } while (resid > 0);
-
- return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);
-}
-
-int
-kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
-{
- int resid;
- int npages;
- unsigned long page_offset;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT (nob > 0);
- LASSERT (nkiov > 0);
- LASSERT (nkiov <= LNET_MAX_IOV);
- LASSERT (!tx->tx_md.md_active);
- LASSERT ((rd != tx->tx_rd) == !active);
-
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- nkiov--;
- kiov++;
- LASSERT (nkiov > 0);
- }
-
- page_offset = kiov->kiov_offset + offset;
-
- resid = offset + nob;
- npages = 0;
-
- do {
- LASSERT (npages < LNET_MAX_IOV);
- LASSERT (nkiov > 0);
-
- if ((npages > 0 && kiov->kiov_offset != 0) ||
- (resid > kiov->kiov_len &&
- (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) {
- /* Can't have gaps */
- CERROR ("Can't make payload contiguous in I/O VM:"
- "page %d, offset %d, len %d \n",
- npages, kiov->kiov_offset, kiov->kiov_len);
-
- return -EINVAL;
- }
-
- tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page);
- resid -= kiov->kiov_len;
- kiov++;
- nkiov--;
- } while (resid > 0);
-
- return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);
-}
-#endif
-
-kib_conn_t *
-kibnal_find_conn_locked (kib_peer_t *peer)
-{
- struct list_head *tmp;
-
- /* just return the first connection */
- list_for_each (tmp, &peer->ibp_conns) {
- return (list_entry(tmp, kib_conn_t, ibc_list));
- }
-
- return (NULL);
-}
-
-void
-kibnal_check_sends (kib_conn_t *conn)
-{
- kib_tx_t *tx;
- FSTATUS frc;
- int rc;
- int consume_cred;
- int done;
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- spin_lock(&conn->ibc_lock);
-
- LASSERT (conn->ibc_nsends_posted <=
- *kibnal_tunables.kib_concurrent_sends);
- LASSERT (conn->ibc_reserved_credits >= 0);
-
- while (conn->ibc_reserved_credits > 0 &&
- !list_empty(&conn->ibc_tx_queue_rsrvd)) {
- LASSERT (conn->ibc_version !=
- IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
- tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
- kib_tx_t, tx_list);
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
- conn->ibc_reserved_credits--;
- }
-
- if (list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER ||
- kibnal_send_keepalive(conn))) {
- spin_unlock(&conn->ibc_lock);
-
- tx = kibnal_get_idle_tx();
- if (tx != NULL)
- kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0);
-
- spin_lock(&conn->ibc_lock);
-
- if (tx != NULL)
- kibnal_queue_tx_locked(tx, conn);
- }
-
- for (;;) {
- if (!list_empty(&conn->ibc_tx_queue_nocred)) {
- LASSERT (conn->ibc_version !=
- IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
- tx = list_entry (conn->ibc_tx_queue_nocred.next,
- kib_tx_t, tx_list);
- consume_cred = 0;
- } else if (!list_empty (&conn->ibc_tx_queue)) {
- tx = list_entry (conn->ibc_tx_queue.next,
- kib_tx_t, tx_list);
- consume_cred = 1;
- } else {
- /* nothing waiting */
- break;
- }
-
- LASSERT (tx->tx_queued);
- /* We rely on this for QP sizing */
- LASSERT (tx->tx_nwrq > 0 && tx->tx_nwrq <= 1 + IBNAL_MAX_RDMA_FRAGS);
-
- LASSERT (conn->ibc_outstanding_credits >= 0);
- LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE);
- LASSERT (conn->ibc_credits >= 0);
- LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE);
-
- if (conn->ibc_nsends_posted ==
- *kibnal_tunables.kib_concurrent_sends) {
- /* We've got some tx completions outstanding... */
- CDEBUG(D_NET, "%s: posted enough\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
- }
-
- if (consume_cred) {
- if (conn->ibc_credits == 0) { /* no credits */
- CDEBUG(D_NET, "%s: no credits\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
- }
-
- if (conn->ibc_credits == 1 && /* last credit reserved for */
- conn->ibc_outstanding_credits == 0) { /* giving back credits */
- CDEBUG(D_NET, "%s: not using last credit\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
- }
- }
-
- list_del (&tx->tx_list);
- tx->tx_queued = 0;
-
- /* NB don't drop ibc_lock before bumping tx_sending */
-
- if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP &&
- (!list_empty(&conn->ibc_tx_queue) ||
- !list_empty(&conn->ibc_tx_queue_nocred) ||
- (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER &&
- !kibnal_send_keepalive(conn)))) {
- /* redundant NOOP */
- spin_unlock(&conn->ibc_lock);
- kibnal_tx_done(tx);
- spin_lock(&conn->ibc_lock);
- CDEBUG(D_NET, "%s: redundant noop\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- continue;
- }
-
- kibnal_pack_msg(tx->tx_msg, conn->ibc_version,
- conn->ibc_outstanding_credits,
- conn->ibc_peer->ibp_nid, conn->ibc_incarnation,
- conn->ibc_txseq);
-
- conn->ibc_txseq++;
- conn->ibc_outstanding_credits = 0;
- conn->ibc_nsends_posted++;
- if (consume_cred)
- conn->ibc_credits--;
-
- /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA
- * PUT. If so, it was first queued here as a PUT_REQ, sent and
- * stashed on ibc_active_txs, matched by an incoming PUT_ACK,
- * and then re-queued here. It's (just) possible that
- * tx_sending is non-zero if we've not done the tx_complete() from
- * the first send; hence the ++ rather than = below. */
- tx->tx_sending++;
-
- list_add (&tx->tx_list, &conn->ibc_active_txs);
-
- LASSERT (tx->tx_nwrq > 0);
-
- rc = 0;
- frc = FSUCCESS;
- if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) {
- rc = -ECONNABORTED;
- } else {
- frc = iba_post_send2(conn->ibc_qp, tx->tx_wrq, NULL);
- if (frc != FSUCCESS)
- rc = -EIO;
- }
-
- conn->ibc_last_send = jiffies;
-
- if (rc != 0) {
- /* NB credits are transferred in the actual
- * message, which can only be the last work item */
- conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits;
- if (consume_cred)
- conn->ibc_credits++;
- conn->ibc_nsends_posted--;
-
- tx->tx_status = rc;
- tx->tx_waiting = 0;
- tx->tx_sending--;
-
- done = (tx->tx_sending == 0);
- if (done)
- list_del (&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (conn->ibc_state == IBNAL_CONN_ESTABLISHED)
- CERROR ("Error %d posting transmit to %s\n",
- frc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- else
- CDEBUG (D_NET, "Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- kibnal_close_conn (conn, rc);
-
- if (done)
- kibnal_tx_done (tx);
- return;
- }
- }
-
- spin_unlock(&conn->ibc_lock);
-}
-
-void
-kibnal_tx_complete (IB_WORK_COMPLETION *wc)
-{
- kib_tx_t *tx = (kib_tx_t *)kibnal_wreqid2ptr(wc->WorkReqId);
- kib_conn_t *conn = tx->tx_conn;
- int failed = wc->Status != WRStatusSuccess;
- int idle;
-
- CDEBUG(D_NET, "%s: sending %d nwrq %d status %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- tx->tx_sending, tx->tx_nwrq, wc->Status);
-
- LASSERT (tx->tx_sending > 0);
-
- if (failed &&
- tx->tx_status == 0 &&
- conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
-#if KIBLND_DETAILED_DEBUG
- int i;
- IB_WORK_REQ2 *wrq = &tx->tx_wrq[0];
- IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[0];
- lnet_msg_t *lntmsg = tx->tx_lntmsg[0];
-#endif
- CDEBUG(D_NETERROR, "tx -> %s type %x cookie "LPX64
- " sending %d waiting %d failed %d nwrk %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- tx->tx_msg->ibm_type, tx->tx_cookie,
- tx->tx_sending, tx->tx_waiting, wc->Status,
- tx->tx_nwrq);
-#if KIBLND_DETAILED_DEBUG
- for (i = 0; i < tx->tx_nwrq; i++, wrq++, gl++) {
- switch (wrq->Operation) {
- default:
- CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p OP %d "
- "DSList %p(%p)/%d: "LPX64"/%d K %x\n",
- i, wrq, wrq->Next, wrq->Operation,
- wrq->DSList, gl, wrq->DSListDepth,
- gl->Address, gl->Length, gl->Lkey);
- break;
- case WROpSend:
- CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p SEND "
- "DSList %p(%p)/%d: "LPX64"/%d K %x\n",
- i, wrq, wrq->Next,
- wrq->DSList, gl, wrq->DSListDepth,
- gl->Address, gl->Length, gl->Lkey);
- break;
- case WROpRdmaWrite:
- CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p DMA "
- "DSList: %p(%p)/%d "LPX64"/%d K %x -> "
- LPX64" K %x\n",
- i, wrq, wrq->Next,
- wrq->DSList, gl, wrq->DSListDepth,
- gl->Address, gl->Length, gl->Lkey,
- wrq->Req.SendRC.RemoteDS.Address,
- wrq->Req.SendRC.RemoteDS.Rkey);
- break;
- }
- }
-
- switch (tx->tx_msg->ibm_type) {
- default:
- CDEBUG(D_NETERROR, " msg type %x %p/%d, No RDMA\n",
- tx->tx_msg->ibm_type,
- tx->tx_msg, tx->tx_msg->ibm_nob);
- break;
-
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- CDEBUG(D_NETERROR, " msg type %x %p/%d, RDMA key %x frags %d...\n",
- tx->tx_msg->ibm_type,
- tx->tx_msg, tx->tx_msg->ibm_nob,
- tx->tx_rd->rd_key, tx->tx_rd->rd_nfrag);
- for (i = 0; i < tx->tx_rd->rd_nfrag; i++)
- CDEBUG(D_NETERROR, " [%d] "LPX64"/%d\n", i,
- tx->tx_rd->rd_frags[i].rf_addr,
- tx->tx_rd->rd_frags[i].rf_nob);
- if (lntmsg == NULL) {
- CDEBUG(D_NETERROR, " No lntmsg\n");
- } else if (lntmsg->msg_iov != NULL) {
- CDEBUG(D_NETERROR, " lntmsg in %d VIRT frags...\n",
- lntmsg->msg_niov);
- for (i = 0; i < lntmsg->msg_niov; i++)
- CDEBUG(D_NETERROR, " [%d] %p/%d\n", i,
- lntmsg->msg_iov[i].iov_base,
- lntmsg->msg_iov[i].iov_len);
- } else if (lntmsg->msg_kiov != NULL) {
- CDEBUG(D_NETERROR, " lntmsg in %d PAGE frags...\n",
- lntmsg->msg_niov);
- for (i = 0; i < lntmsg->msg_niov; i++)
- CDEBUG(D_NETERROR, " [%d] %p+%d/%d\n", i,
- lntmsg->msg_kiov[i].kiov_page,
- lntmsg->msg_kiov[i].kiov_offset,
- lntmsg->msg_kiov[i].kiov_len);
- } else {
- CDEBUG(D_NETERROR, " lntmsg in %d frags\n",
- lntmsg->msg_niov);
- }
-
- break;
- }
-#endif
- }
-
- spin_lock(&conn->ibc_lock);
-
- /* I could be racing with rdma completion. Whoever makes 'tx' idle
- * gets to free it, which also drops its ref on 'conn'. */
-
- tx->tx_sending--;
- conn->ibc_nsends_posted--;
-
- if (failed) {
- tx->tx_waiting = 0;
- tx->tx_status = -EIO;
- }
-
- idle = (tx->tx_sending == 0) && /* This is the final callback */
- !tx->tx_waiting && /* Not waiting for peer */
- !tx->tx_queued; /* Not re-queued (PUT_DONE) */
- if (idle)
- list_del(&tx->tx_list);
-
- kibnal_conn_addref(conn); /* 1 ref for me.... */
-
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kibnal_tx_done (tx);
-
- if (failed) {
- kibnal_close_conn (conn, -EIO);
- } else {
- kibnal_peer_alive(conn->ibc_peer);
- kibnal_check_sends(conn);
- }
-
- kibnal_conn_decref(conn); /* ...until here */
-}
-
-void
-kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob)
-{
- IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[tx->tx_nwrq];
- IB_WORK_REQ2 *wrq = &tx->tx_wrq[tx->tx_nwrq];
- int nob = offsetof (kib_msg_t, ibm_u) + body_nob;
-
- LASSERT (tx->tx_nwrq >= 0 &&
- tx->tx_nwrq < (1 + IBNAL_MAX_RDMA_FRAGS));
- LASSERT (nob <= IBNAL_MSG_SIZE);
-
- kibnal_init_msg(tx->tx_msg, type, body_nob);
-
- *gl = (IB_LOCAL_DATASEGMENT) {
- .Address = tx->tx_hca_msg,
- .Length = IBNAL_MSG_SIZE,
- .Lkey = kibnal_data.kib_whole_mem.md_lkey,
- };
-
- wrq->Next = NULL; /* This is the last one */
-
- wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_TX);
- wrq->Operation = WROpSend;
- wrq->DSList = gl;
- wrq->DSListDepth = 1;
- wrq->MessageLen = nob;
- wrq->Req.SendRC.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.SolicitedEvent = 1;
- wrq->Req.SendRC.Options.s.SignaledCompletion = 1;
- wrq->Req.SendRC.Options.s.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.Fence = 0;
- /* fence only needed on RDMA reads */
-
- tx->tx_nwrq++;
-}
-
-int
-kibnal_init_rdma (kib_tx_t *tx, int type, int nob,
- kib_rdma_desc_t *dstrd, __u64 dstcookie)
-{
- kib_msg_t *ibmsg = tx->tx_msg;
- kib_rdma_desc_t *srcrd = tx->tx_rd;
- IB_LOCAL_DATASEGMENT *gl;
- IB_WORK_REQ2 *wrq;
- int rc;
-
-#if IBNAL_USE_FMR
- LASSERT (tx->tx_nwrq == 0);
-
- gl = &tx->tx_gl[0];
- gl->Length = nob;
- gl->Address = srcrd->rd_addr;
- gl->Lkey = srcrd->rd_key;
-
- wrq = &tx->tx_wrq[0];
-
- wrq->Next = wrq + 1;
- wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA);
- wrq->Operation = WROpRdmaWrite;
- wrq->DSList = gl;
- wrq->DSListDepth = 1;
- wrq->MessageLen = nob;
-
- wrq->Req.SendRC.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.SolicitedEvent = 0;
- wrq->Req.SendRC.Options.s.SignaledCompletion = 0;
- wrq->Req.SendRC.Options.s.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.Fence = 0;
-
- wrq->Req.SendRC.RemoteDS.Address = dstrd->rd_addr;
- wrq->Req.SendRC.RemoteDS.Rkey = dstrd->rd_key;
-
- tx->tx_nwrq = 1;
- rc = nob;
-#else
- /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */
- int resid = nob;
- kib_rdma_frag_t *srcfrag;
- int srcidx;
- kib_rdma_frag_t *dstfrag;
- int dstidx;
- int wrknob;
-
- /* Called by scheduler */
- LASSERT (!in_interrupt());
-
- LASSERT (type == IBNAL_MSG_GET_DONE ||
- type == IBNAL_MSG_PUT_DONE);
-
- srcidx = dstidx = 0;
- srcfrag = &srcrd->rd_frags[0];
- dstfrag = &dstrd->rd_frags[0];
- rc = resid;
-
- while (resid > 0) {
- if (srcidx >= srcrd->rd_nfrag) {
- CERROR("Src buffer exhausted: %d frags\n", srcidx);
- rc = -EPROTO;
- break;
- }
-
- if (dstidx == dstrd->rd_nfrag) {
- CERROR("Dst buffer exhausted: %d frags\n", dstidx);
- rc = -EPROTO;
- break;
- }
-
- if (tx->tx_nwrq == IBNAL_MAX_RDMA_FRAGS) {
- CERROR("RDMA too fragmented: %d/%d src %d/%d dst frags\n",
- srcidx, srcrd->rd_nfrag,
- dstidx, dstrd->rd_nfrag);
- rc = -EMSGSIZE;
- break;
- }
-
- wrknob = MIN(MIN(srcfrag->rf_nob, dstfrag->rf_nob), resid);
-
- gl = &tx->tx_gl[tx->tx_nwrq];
- gl->Length = wrknob;
- gl->Address = srcfrag->rf_addr;
- gl->Lkey = srcrd->rd_key;
-
- wrq = &tx->tx_wrq[tx->tx_nwrq];
-
- wrq->Next = wrq + 1;
- wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA);
- wrq->Operation = WROpRdmaWrite;
- wrq->DSList = gl;
- wrq->DSListDepth = 1;
- wrq->MessageLen = nob;
-
- wrq->Req.SendRC.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.SolicitedEvent = 0;
- wrq->Req.SendRC.Options.s.SignaledCompletion = 0;
- wrq->Req.SendRC.Options.s.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.Fence = 0;
-
- wrq->Req.SendRC.RemoteDS.Address = dstfrag->rf_addr;
- wrq->Req.SendRC.RemoteDS.Rkey = dstrd->rd_key;
-
- resid -= wrknob;
- if (wrknob < srcfrag->rf_nob) {
- srcfrag->rf_addr += wrknob;
- srcfrag->rf_nob -= wrknob;
- } else {
- srcfrag++;
- srcidx++;
- }
-
- if (wrknob < dstfrag->rf_nob) {
- dstfrag->rf_addr += wrknob;
- dstfrag->rf_nob -= wrknob;
- } else {
- dstfrag++;
- dstidx++;
- }
-
- tx->tx_nwrq++;
- }
-
- if (rc < 0) /* no RDMA if completing with failure */
- tx->tx_nwrq = 0;
-#endif
-
- ibmsg->ibm_u.completion.ibcm_status = rc;
- ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
- kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t));
-
- return rc;
-}
-
-void
-kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn)
-{
- spin_lock(&conn->ibc_lock);
- kibnal_queue_tx_locked (tx, conn);
- spin_unlock(&conn->ibc_lock);
-
- kibnal_check_sends(conn);
-}
-
-void
-kibnal_schedule_active_connect_locked (kib_peer_t *peer, int proto_version)
-{
- /* Called holding kib_global_lock exclusive with IRQs disabled */
-
- peer->ibp_version = proto_version; /* proto version for new conn */
- peer->ibp_connecting++; /* I'm connecting */
- kibnal_peer_addref(peer); /* extra ref for connd */
-
- spin_lock(&kibnal_data.kib_connd_lock);
-
- list_add_tail (&peer->ibp_connd_list, &kibnal_data.kib_connd_peers);
- wake_up (&kibnal_data.kib_connd_waitq);
-
- spin_unlock(&kibnal_data.kib_connd_lock);
-}
-
-void
-kibnal_schedule_active_connect (kib_peer_t *peer, int proto_version)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- kibnal_schedule_active_connect_locked(peer, proto_version);
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-}
-
-void
-kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- kib_conn_t *conn;
- unsigned long flags;
- rwlock_t *g_lock = &kibnal_data.kib_global_lock;
- int retry;
- int rc;
-
- /* If I get here, I've committed to send, so I complete the tx with
- * failure on any problems */
-
- LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */
- LASSERT (tx->tx_nwrq > 0); /* work items have been set up */
-
- for (retry = 0; ; retry = 1) {
- read_lock_irqsave(g_lock, flags);
-
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL) {
- conn = kibnal_find_conn_locked (peer);
- if (conn != NULL) {
- kibnal_conn_addref(conn); /* 1 ref for me... */
- read_unlock_irqrestore(g_lock, flags);
-
- kibnal_queue_tx (tx, conn);
- kibnal_conn_decref(conn); /* ...to here */
- return;
- }
- }
-
- /* Making one or more connections; I'll need a write lock... */
- read_unlock(g_lock);
- write_lock(g_lock);
-
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL)
- break;
-
- write_unlock_irqrestore(g_lock, flags);
-
- if (retry) {
- CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
-
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kibnal_tx_done (tx);
- return;
- }
-
- rc = kibnal_add_persistent_peer(nid);
- if (rc != 0) {
- CERROR("Can't add peer %s: %d\n",
- libcfs_nid2str(nid), rc);
-
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kibnal_tx_done (tx);
- return;
- }
- }
-
- conn = kibnal_find_conn_locked (peer);
- if (conn != NULL) {
- /* Connection exists; queue message on it */
- kibnal_conn_addref(conn); /* 1 ref for me... */
- write_unlock_irqrestore(g_lock, flags);
-
- kibnal_queue_tx (tx, conn);
- kibnal_conn_decref(conn); /* ...until here */
- return;
- }
-
- if (!kibnal_peer_connecting(peer)) {
- if (!(peer->ibp_reconnect_interval == 0 || /* first attempt */
- time_after_eq(jiffies, peer->ibp_reconnect_time))) {
- write_unlock_irqrestore(g_lock, flags);
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kibnal_tx_done (tx);
- return;
- }
-
- kibnal_schedule_active_connect_locked(peer, IBNAL_MSG_VERSION);
- }
-
- /* A connection is being established; queue the message... */
- list_add_tail (&tx->tx_list, &peer->ibp_tx_queue);
-
- write_unlock_irqrestore(g_lock, flags);
-}
-
-void
-kibnal_txlist_done (struct list_head *txlist, int status)
-{
- kib_tx_t *tx;
-
- while (!list_empty (txlist)) {
- tx = list_entry (txlist->next, kib_tx_t, tx_list);
-
- list_del (&tx->tx_list);
- /* complete now */
- tx->tx_waiting = 0;
- tx->tx_status = status;
- kibnal_tx_done (tx);
- }
-}
-
-int
-kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- lnet_hdr_t *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- kib_msg_t *ibmsg;
- kib_tx_t *tx;
- int nob;
- int rc;
-
- /* NB 'private' is different depending on what we're sending.... */
-
- CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT (payload_nob == 0 || payload_niov > 0);
- LASSERT (payload_niov <= LNET_MAX_IOV);
-
- /* Thread context */
- LASSERT (!in_interrupt());
- /* payload is either all vaddrs or all pages */
- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
- switch (type) {
- default:
- LBUG();
- return (-EIO);
-
- case LNET_MSG_ACK:
- LASSERT (payload_nob == 0);
- break;
-
- case LNET_MSG_GET:
- if (routing || target_is_router)
- break; /* send IMMEDIATE */
-
- /* is the REPLY message too small for RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
- if (nob <= IBNAL_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can allocate txd for GET to %s: \n",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.get.ibgm_hdr = *hdr;
- ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
-
- if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
- rc = kibnal_setup_rd_iov(tx, &ibmsg->ibm_u.get.ibgm_rd,
- 0,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.iov,
- 0, lntmsg->msg_md->md_length);
- else
- rc = kibnal_setup_rd_kiov(tx, &ibmsg->ibm_u.get.ibgm_rd,
- 0,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.kiov,
- 0, lntmsg->msg_md->md_length);
- if (rc != 0) {
- CERROR("Can't setup GET sink for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kibnal_tx_done(tx);
- return -EIO;
- }
-
-#if IBNAL_USE_FMR
- nob = sizeof(kib_get_msg_t);
-#else
- {
- int n = ibmsg->ibm_u.get.ibgm_rd.rd_nfrag;
-
- nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]);
- }
-#endif
- kibnal_init_tx_msg(tx, IBNAL_MSG_GET_REQ, nob);
-
- tx->tx_lntmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni,
- lntmsg);
- if (tx->tx_lntmsg[1] == NULL) {
- CERROR("Can't create reply for GET -> %s\n",
- libcfs_nid2str(target.nid));
- kibnal_tx_done(tx);
- return -EIO;
- }
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */
- tx->tx_waiting = 1; /* waiting for GET_DONE */
- kibnal_launch_tx(tx, target.nid);
- return 0;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- /* Is the payload small enough not to need RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob <= IBNAL_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can't allocate %s txd for %s\n",
- type == LNET_MSG_PUT ? "PUT" : "REPLY",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- if (payload_kiov == NULL)
- rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 1,
- payload_niov, payload_iov,
- payload_offset, payload_nob);
- else
- rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 1,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- if (rc != 0) {
- CERROR("Can't setup PUT src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kibnal_tx_done(tx);
- return -EIO;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
- ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
- kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_REQ, sizeof(kib_putreq_msg_t));
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */
- kibnal_launch_tx(tx, target.nid);
- return 0;
- }
-
- /* send IMMEDIATE */
-
- LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob])
- <= IBNAL_MSG_SIZE);
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR ("Can't send %d to %s: tx descs exhausted\n",
- type, libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
-
- if (payload_kiov != NULL)
- lnet_copy_kiov2flat(IBNAL_MSG_SIZE, ibmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- else
- lnet_copy_iov2flat(IBNAL_MSG_SIZE, ibmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-
- nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]);
- kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- kibnal_launch_tx(tx, target.nid);
- return 0;
-}
-
-void
-kibnal_reply(lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg)
-{
- lnet_process_id_t target = lntmsg->msg_target;
- unsigned int niov = lntmsg->msg_niov;
- struct iovec *iov = lntmsg->msg_iov;
- lnet_kiov_t *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- kib_tx_t *tx;
- int rc;
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can't get tx for REPLY to %s\n",
- libcfs_nid2str(target.nid));
- goto failed_0;
- }
-
- if (nob == 0)
- rc = 0;
- else if (kiov == NULL)
- rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 1,
- niov, iov, offset, nob);
- else
- rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 1,
- niov, kiov, offset, nob);
-
- if (rc != 0) {
- CERROR("Can't setup GET src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- rc = kibnal_init_rdma(tx, IBNAL_MSG_GET_DONE, nob,
- &rx->rx_msg->ibm_u.get.ibgm_rd,
- rx->rx_msg->ibm_u.get.ibgm_cookie);
- if (rc < 0) {
- CERROR("Can't setup rdma for GET from %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- if (rc == 0) {
- /* No RDMA: local completion may happen now! */
- lnet_finalize(ni, lntmsg, 0);
- } else {
- /* RDMA: lnet_finalize(lntmsg) when it
- * completes */
- tx->tx_lntmsg[0] = lntmsg;
- }
-
- kibnal_queue_tx(tx, rx->rx_conn);
- return;
-
- failed_1:
- kibnal_tx_done(tx);
- failed_0:
- lnet_finalize(ni, lntmsg, -EIO);
-}
-
-int
-kibnal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- void **new_private)
-{
- kib_rx_t *rx = private;
- kib_conn_t *conn = rx->rx_conn;
-
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- /* Can't block if RDMA completions need normal credits */
- LCONSOLE_ERROR_MSG(0x12d, "Dropping message from %s: no "
- "buffers free. %s is running an old version"
- " of LNET that may deadlock if messages "
- "wait for buffers)\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return -EDEADLK;
- }
-
- *new_private = private;
- return 0;
-}
-
-int
-kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- kib_rx_t *rx = private;
- kib_msg_t *rxmsg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- kib_tx_t *tx;
- kib_msg_t *txmsg;
- int nob;
- int post_cred = 1;
- int rc = 0;
-
- LASSERT (mlen <= rlen);
- LASSERT (!in_interrupt());
- /* Either all pages or all vaddrs */
- LASSERT (!(kiov != NULL && iov != NULL));
-
- switch (rxmsg->ibm_type) {
- default:
- LBUG();
-
- case IBNAL_MSG_IMMEDIATE:
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]);
- if (nob > rx->rx_nob) {
- CERROR ("Immediate message from %s too big: %d(%d)\n",
- libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
- nob, rx->rx_nob);
- rc = -EPROTO;
- break;
- }
-
- if (kiov != NULL)
- lnet_copy_flat2kiov(niov, kiov, offset,
- IBNAL_MSG_SIZE, rxmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- mlen);
- else
- lnet_copy_flat2iov(niov, iov, offset,
- IBNAL_MSG_SIZE, rxmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- mlen);
- lnet_finalize (ni, lntmsg, 0);
- break;
-
- case IBNAL_MSG_PUT_REQ:
- if (mlen == 0) {
- lnet_finalize(ni, lntmsg, 0);
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, 0,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can't allocate tx for %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /* Not replying will break the connection */
- rc = -ENOMEM;
- break;
- }
-
- txmsg = tx->tx_msg;
- if (kiov == NULL)
- rc = kibnal_setup_rd_iov(tx,
- &txmsg->ibm_u.putack.ibpam_rd,
- 0,
- niov, iov, offset, mlen);
- else
- rc = kibnal_setup_rd_kiov(tx,
- &txmsg->ibm_u.putack.ibpam_rd,
- 0,
- niov, kiov, offset, mlen);
- if (rc != 0) {
- CERROR("Can't setup PUT sink for %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kibnal_tx_done(tx);
- /* tell peer it's over */
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, rc,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
- txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
-#if IBNAL_USE_FMR
- nob = sizeof(kib_putack_msg_t);
-#else
- {
- int n = tx->tx_msg->ibm_u.putack.ibpam_rd.rd_nfrag;
-
- nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]);
- }
-#endif
- kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_ACK, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_DONE */
- kibnal_queue_tx(tx, conn);
-
- if (conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD)
- post_cred = 0; /* peer still owns 'rx' for sending PUT_DONE */
- break;
-
- case IBNAL_MSG_GET_REQ:
- if (lntmsg != NULL) {
- /* Optimized GET; RDMA lntmsg's payload */
- kibnal_reply(ni, rx, lntmsg);
- } else {
- /* GET didn't match anything */
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_GET_DONE,
- -ENODATA,
- rxmsg->ibm_u.get.ibgm_cookie);
- }
- break;
- }
-
- kibnal_post_rx(rx, post_cred, 0);
- return rc;
-}
-
-int
-kibnal_thread_start (int (*fn)(void *arg), void *arg)
-{
- long pid = kernel_thread (fn, arg, 0);
-
- if (pid < 0)
- return ((int)pid);
-
- atomic_inc (&kibnal_data.kib_nthreads);
- return (0);
-}
-
-void
-kibnal_thread_fini (void)
-{
- atomic_dec (&kibnal_data.kib_nthreads);
-}
-
-void
-kibnal_peer_alive (kib_peer_t *peer)
-{
- /* This is racy, but everyone's only writing cfs_time_current() */
- peer->ibp_last_alive = cfs_time_current();
- mb();
-}
-
-void
-kibnal_peer_notify (kib_peer_t *peer)
-{
- time_t last_alive = 0;
- int error = 0;
- unsigned long flags;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (list_empty(&peer->ibp_conns) &&
- peer->ibp_accepting == 0 &&
- peer->ibp_connecting == 0 &&
- peer->ibp_error != 0) {
- error = peer->ibp_error;
- peer->ibp_error = 0;
- last_alive = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() -
- peer->ibp_last_alive);
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- if (error != 0)
- lnet_notify(kibnal_data.kib_ni, peer->ibp_nid, 0, last_alive);
-}
-
-void
-kibnal_schedule_conn (kib_conn_t *conn)
-{
- unsigned long flags;
-
- kibnal_conn_addref(conn); /* ++ref for connd */
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns);
- wake_up (&kibnal_data.kib_connd_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
-}
-
-void
-kibnal_close_conn_locked (kib_conn_t *conn, int error)
-{
- /* This just does the immediate housekeeping to start shutdown of an
- * established connection. 'error' is zero for a normal shutdown.
- * Caller holds kib_global_lock exclusively in irq context */
- kib_peer_t *peer = conn->ibc_peer;
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- if (conn->ibc_state != IBNAL_CONN_ESTABLISHED)
- return; /* already being handled */
-
- /* NB Can't take ibc_lock here (could be in IRQ context), without
- * risking deadlock, so access to ibc_{tx_queue,active_txs} is racey */
-
- if (error == 0 &&
- list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_rsrvd) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- list_empty(&conn->ibc_active_txs)) {
- CDEBUG(D_NET, "closing conn to %s"
- " rx# "LPD64" tx# "LPD64"\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_txseq, conn->ibc_rxseq);
- } else {
- CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s"
- " rx# "LPD64" tx# "LPD64"\n",
- libcfs_nid2str(peer->ibp_nid), error,
- list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
- list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
- list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
- list_empty(&conn->ibc_active_txs) ? "" : "(waiting)",
- conn->ibc_txseq, conn->ibc_rxseq);
-#if 0
- /* can't skip down the queue without holding ibc_lock (see above) */
- list_for_each(tmp, &conn->ibc_tx_queue) {
- kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
-
- CERROR(" queued tx type %x cookie "LPX64
- " sending %d waiting %d ticks %ld/%d\n",
- tx->tx_msg->ibm_type, tx->tx_cookie,
- tx->tx_sending, tx->tx_waiting,
- (long)(tx->tx_deadline - jiffies), HZ);
- }
-
- list_for_each(tmp, &conn->ibc_active_txs) {
- kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
-
- CERROR(" active tx type %x cookie "LPX64
- " sending %d waiting %d ticks %ld/%d\n",
- tx->tx_msg->ibm_type, tx->tx_cookie,
- tx->tx_sending, tx->tx_waiting,
- (long)(tx->tx_deadline - jiffies), HZ);
- }
-#endif
- }
-
- list_del (&conn->ibc_list);
-
- if (list_empty (&peer->ibp_conns)) { /* no more conns */
- if (peer->ibp_persistence == 0 && /* non-persistent peer */
- kibnal_peer_active(peer)) /* still in peer table */
- kibnal_unlink_peer_locked (peer);
-
- peer->ibp_error = error; /* set/clear error on last conn */
- }
-
- kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTING);
-
- kibnal_schedule_conn(conn);
- kibnal_conn_decref(conn); /* lose ibc_list's ref */
-}
-
-void
-kibnal_close_conn (kib_conn_t *conn, int error)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- kibnal_close_conn_locked (conn, error);
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-}
-
-void
-kibnal_handle_early_rxs(kib_conn_t *conn)
-{
- unsigned long flags;
- kib_rx_t *rx;
-
- LASSERT (!in_interrupt());
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- while (!list_empty(&conn->ibc_early_rxs)) {
- rx = list_entry(conn->ibc_early_rxs.next,
- kib_rx_t, rx_list);
- list_del(&rx->rx_list);
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_handle_rx(rx);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- }
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-}
-
-void
-kibnal_abort_txs(kib_conn_t *conn, struct list_head *txs)
-{
- LIST_HEAD (zombies);
- struct list_head *tmp;
- struct list_head *nxt;
- kib_tx_t *tx;
-
- spin_lock(&conn->ibc_lock);
-
- list_for_each_safe (tmp, nxt, txs) {
- tx = list_entry (tmp, kib_tx_t, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_waiting || tx->tx_sending != 0);
- } else {
- LASSERT (tx->tx_queued);
- }
-
- tx->tx_status = -ECONNABORTED;
- tx->tx_queued = 0;
- tx->tx_waiting = 0;
-
- if (tx->tx_sending == 0) {
- list_del (&tx->tx_list);
- list_add (&tx->tx_list, &zombies);
- }
- }
-
- spin_unlock(&conn->ibc_lock);
-
- kibnal_txlist_done(&zombies, -ECONNABORTED);
-}
-
-void
-kibnal_conn_disconnected(kib_conn_t *conn)
-{
- static IB_QP_ATTRIBUTES_MODIFY qpam = {.RequestState = QPStateError};
-
- FSTATUS frc;
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_INIT_QP);
-
- kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTED);
-
- /* move QP to error state to make posted work items complete */
- frc = iba_modify_qp(conn->ibc_qp, &qpam, NULL);
- if (frc != FSUCCESS)
- CERROR("can't move qp state to error: %d\n", frc);
-
- /* Complete all tx descs not waiting for sends to complete.
- * NB we should be safe from RDMA now that the QP has changed state */
-
- kibnal_abort_txs(conn, &conn->ibc_tx_queue);
- kibnal_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
- kibnal_abort_txs(conn, &conn->ibc_tx_queue);
- kibnal_abort_txs(conn, &conn->ibc_active_txs);
-
- kibnal_handle_early_rxs(conn);
-}
-
-void
-kibnal_peer_connect_failed (kib_peer_t *peer, int type, int error)
-{
- LIST_HEAD (zombies);
- unsigned long flags;
-
- LASSERT (error != 0);
- LASSERT (!in_interrupt());
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- LASSERT (kibnal_peer_connecting(peer));
-
- switch (type) {
- case IBNAL_CONN_ACTIVE:
- LASSERT (peer->ibp_connecting > 0);
- peer->ibp_connecting--;
- break;
-
- case IBNAL_CONN_PASSIVE:
- LASSERT (peer->ibp_accepting > 0);
- peer->ibp_accepting--;
- break;
-
- case IBNAL_CONN_WAITING:
- /* Can't assert; I might be racing with a successful connection
- * which clears passivewait */
- peer->ibp_passivewait = 0;
- break;
- default:
- LBUG();
- }
-
- if (kibnal_peer_connecting(peer) || /* another attempt underway */
- !list_empty(&peer->ibp_conns)) { /* got connected */
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
- return;
- }
-
- /* Say when active connection can be re-attempted */
- peer->ibp_reconnect_interval *= 2;
- peer->ibp_reconnect_interval =
- MAX(peer->ibp_reconnect_interval,
- *kibnal_tunables.kib_min_reconnect_interval);
- peer->ibp_reconnect_interval =
- MIN(peer->ibp_reconnect_interval,
- *kibnal_tunables.kib_max_reconnect_interval);
-
- peer->ibp_reconnect_time = jiffies + peer->ibp_reconnect_interval * HZ;
-
- /* Take peer's blocked transmits to complete with error */
- list_add(&zombies, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- if (kibnal_peer_active(peer) &&
- peer->ibp_persistence == 0) {
- /* failed connection attempt on non-persistent peer */
- kibnal_unlink_peer_locked (peer);
- }
-
- peer->ibp_error = error;
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_peer_notify(peer);
-
- if (list_empty (&zombies))
- return;
-
- CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kibnal_txlist_done (&zombies, -EHOSTUNREACH);
-}
-
-void
-kibnal_connreq_done (kib_conn_t *conn, int type, int status)
-{
- kib_peer_t *peer = conn->ibc_peer;
- struct list_head txs;
- kib_tx_t *tx;
- unsigned long flags;
-
- LASSERT (!in_interrupt());
- LASSERT (type == IBNAL_CONN_ACTIVE || type == IBNAL_CONN_PASSIVE);
- LASSERT (conn->ibc_state >= IBNAL_CONN_INIT_QP);
- LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED);
- LASSERT (kibnal_peer_connecting(peer));
-
- LIBCFS_FREE(conn->ibc_cvars, sizeof(*conn->ibc_cvars));
- conn->ibc_cvars = NULL;
-
- if (status != 0) {
- /* failed to establish connection */
- kibnal_peer_connect_failed(conn->ibc_peer, type, status);
- kibnal_conn_disconnected(conn);
- kibnal_conn_decref(conn); /* Lose CM's ref */
- return;
- }
-
- /* connection established */
- LASSERT(conn->ibc_state == IBNAL_CONN_CONNECTING);
-
- conn->ibc_last_send = jiffies;
- kibnal_set_conn_state(conn, IBNAL_CONN_ESTABLISHED);
- kibnal_peer_alive(peer);
-
- CDEBUG(D_NET, "Connection %s ESTABLISHED\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- peer->ibp_passivewait = 0; /* not waiting (got conn now) */
- kibnal_conn_addref(conn); /* +1 ref for ibc_list */
- list_add_tail(&conn->ibc_list, &peer->ibp_conns);
-
- if (!kibnal_peer_active(peer)) {
- /* peer has been deleted */
- kibnal_close_conn_locked(conn, -ECONNABORTED);
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- kibnal_peer_connect_failed(conn->ibc_peer, type, -ECONNABORTED);
- kibnal_conn_decref(conn); /* lose CM's ref */
- return;
- }
-
- switch (type) {
- case IBNAL_CONN_ACTIVE:
- LASSERT (peer->ibp_connecting > 0);
- peer->ibp_connecting--;
- break;
-
- case IBNAL_CONN_PASSIVE:
- LASSERT (peer->ibp_accepting > 0);
- peer->ibp_accepting--;
- break;
- default:
- LBUG();
- }
-
- peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */
-
- /* Nuke any dangling conns from a different peer instance... */
- kibnal_close_stale_conns_locked(peer, conn->ibc_incarnation);
-
- /* grab txs blocking for a conn */
- list_add(&txs, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- /* Schedule blocked txs */
- spin_lock (&conn->ibc_lock);
- while (!list_empty (&txs)) {
- tx = list_entry (txs.next, kib_tx_t, tx_list);
- list_del (&tx->tx_list);
-
- kibnal_queue_tx_locked (tx, conn);
- }
- spin_unlock (&conn->ibc_lock);
- kibnal_check_sends (conn);
-}
-
-void
-kibnal_reject (lnet_nid_t nid, IB_HANDLE cep, int why)
-{
- static CM_REJECT_INFO msgs[3];
- CM_REJECT_INFO *msg = &msgs[why];
- FSTATUS frc;
-
- LASSERT (why >= 0 && why < sizeof(msgs)/sizeof(msgs[0]));
-
- /* If I wasn't so lazy, I'd initialise this only once; it's effectively
- * read-only... */
- msg->Reason = RC_USER_REJ;
- msg->PrivateData[0] = (IBNAL_MSG_MAGIC) & 0xff;
- msg->PrivateData[1] = (IBNAL_MSG_MAGIC >> 8) & 0xff;
- msg->PrivateData[2] = (IBNAL_MSG_MAGIC >> 16) & 0xff;
- msg->PrivateData[3] = (IBNAL_MSG_MAGIC >> 24) & 0xff;
- msg->PrivateData[4] = (IBNAL_MSG_VERSION) & 0xff;
- msg->PrivateData[5] = (IBNAL_MSG_VERSION >> 8) & 0xff;
- msg->PrivateData[6] = why;
-
- frc = iba_cm_reject(cep, msg);
- if (frc != FSUCCESS)
- CERROR("Error %d rejecting %s\n", frc, libcfs_nid2str(nid));
-}
-
-void
-kibnal_check_connreject(kib_conn_t *conn, int type, CM_REJECT_INFO *rej)
-{
- kib_peer_t *peer = conn->ibc_peer;
- unsigned long flags;
- int magic;
- int version;
- int why;
-
- LASSERT (type == IBNAL_CONN_ACTIVE ||
- type == IBNAL_CONN_PASSIVE);
-
- CDEBUG(D_NET, "%s connection with %s rejected: %d\n",
- (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive",
- libcfs_nid2str(peer->ibp_nid), rej->Reason);
-
- switch (rej->Reason) {
- case RC_STALE_CONN:
- if (type == IBNAL_CONN_PASSIVE) {
- CERROR("Connection to %s rejected (stale QP)\n",
- libcfs_nid2str(peer->ibp_nid));
- } else {
- CWARN("Connection from %s rejected (stale QP): "
- "retrying...\n", libcfs_nid2str(peer->ibp_nid));
-
- /* retry from scratch to allocate a new conn
- * which will use a different QP */
- kibnal_schedule_active_connect(peer, peer->ibp_version);
- }
-
- /* An FCM_DISCONNECTED callback is still outstanding: give it a
- * ref since kibnal_connreq_done() drops the CM's ref on conn
- * on failure */
- kibnal_conn_addref(conn);
- break;
-
- case RC_USER_REJ:
- magic = (rej->PrivateData[0]) |
- (rej->PrivateData[1] << 8) |
- (rej->PrivateData[2] << 16) |
- (rej->PrivateData[3] << 24);
- version = (rej->PrivateData[4]) |
- (rej->PrivateData[5] << 8);
- why = (rej->PrivateData[6]);
-
- /* retry with old proto version */
- if (magic == IBNAL_MSG_MAGIC &&
- version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD &&
- conn->ibc_version == IBNAL_MSG_VERSION &&
- type != IBNAL_CONN_PASSIVE) {
- /* retry with a new conn */
- CWARN ("Connection to %s refused: "
- "retrying with old protocol version 0x%x\n",
- libcfs_nid2str(peer->ibp_nid), version);
- kibnal_schedule_active_connect(peer, version);
- break;
- }
-
- if (magic != IBNAL_MSG_MAGIC ||
- version != IBNAL_MSG_VERSION) {
- CERROR("%s connection with %s rejected "
- "(magic/ver %08x/%d why %d): "
- "incompatible protocol\n",
- (type == IBNAL_CONN_ACTIVE) ?
- "Active" : "Passive",
- libcfs_nid2str(peer->ibp_nid),
- magic, version, why);
- break;
- }
-
- if (type == IBNAL_CONN_ACTIVE &&
- why == IBNAL_REJECT_CONN_RACE) {
- /* lost connection race */
- CWARN("Connection to %s rejected: "
- "lost connection race\n",
- libcfs_nid2str(peer->ibp_nid));
-
- write_lock_irqsave(&kibnal_data.kib_global_lock,
- flags);
-
- if (list_empty(&peer->ibp_conns)) {
- peer->ibp_passivewait = 1;
- peer->ibp_passivewait_deadline =
- jiffies +
- (*kibnal_tunables.kib_timeout * HZ);
- }
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- break;
- }
-
- CERROR("%s connection with %s rejected: %d\n",
- (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive",
- libcfs_nid2str(peer->ibp_nid), why);
- break;
-
- default:
- CERROR("%s connection with %s rejected: %d\n",
- (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive",
- libcfs_nid2str(peer->ibp_nid), rej->Reason);
- }
-
- kibnal_connreq_done(conn, type, -ECONNREFUSED);
-}
-
-void
-kibnal_cm_disconnect_callback(kib_conn_t *conn, CM_CONN_INFO *info)
-{
- CDEBUG(D_NET, "%s: state %d, status 0x%x\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- conn->ibc_state, info->Status);
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- switch (info->Status) {
- default:
- LBUG();
- break;
-
- case FCM_DISCONNECT_REQUEST:
- /* Schedule conn to iba_cm_disconnect() if it wasn't already */
- kibnal_close_conn (conn, 0);
- break;
-
- case FCM_DISCONNECT_REPLY: /* peer acks my disconnect req */
- case FCM_DISCONNECTED: /* end of TIME_WAIT */
- CDEBUG(D_NET, "Connection %s disconnected.\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_conn_decref(conn); /* Lose CM's ref */
- break;
- }
-}
-
-void
-kibnal_cm_passive_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
-{
- kib_conn_t *conn = arg;
-
- CDEBUG(D_NET, "status 0x%x\n", info->Status);
-
- /* Established Connection Notifier */
- switch (info->Status) {
- default:
- CERROR("Unexpected status %d on Connection %s\n",
- info->Status, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- LBUG();
- break;
-
- case FCM_CONNECT_TIMEOUT:
- kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ETIMEDOUT);
- break;
-
- case FCM_CONNECT_REJECT:
- kibnal_check_connreject(conn, IBNAL_CONN_PASSIVE,
- &info->Info.Reject);
- break;
-
- case FCM_CONNECT_ESTABLISHED:
- kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, 0);
- break;
-
- case FCM_DISCONNECT_REQUEST:
- case FCM_DISCONNECT_REPLY:
- case FCM_DISCONNECTED:
- kibnal_cm_disconnect_callback(conn, info);
- break;
- }
-}
-
-int
-kibnal_accept (kib_conn_t **connp, IB_HANDLE cep, kib_msg_t *msg, int nob)
-{
- lnet_nid_t nid;
- kib_conn_t *conn;
- kib_peer_t *peer;
- kib_peer_t *peer2;
- unsigned long flags;
- int rc;
-
- rc = kibnal_unpack_msg(msg, 0, nob);
- if (rc != 0) {
- /* SILENT! kibnal_unpack_msg() complains if required */
- kibnal_reject(LNET_NID_ANY, cep, IBNAL_REJECT_FATAL);
- return -EPROTO;
- }
-
- nid = msg->ibm_srcnid;
-
- if (msg->ibm_version != IBNAL_MSG_VERSION)
- CWARN("Connection from %s: old protocol version 0x%x\n",
- libcfs_nid2str(nid), msg->ibm_version);
-
- if (msg->ibm_type != IBNAL_MSG_CONNREQ) {
- CERROR("Can't accept %s: bad request type %d (%d expected)\n",
- libcfs_nid2str(nid), msg->ibm_type, IBNAL_MSG_CONNREQ);
- kibnal_reject(nid, cep, IBNAL_REJECT_FATAL);
- return -EPROTO;
- }
-
- if (msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid) {
- CERROR("Can't accept %s: bad dst NID %s (%s expected)\n",
- libcfs_nid2str(nid),
- libcfs_nid2str(msg->ibm_dstnid),
- libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- kibnal_reject(nid, cep, IBNAL_REJECT_FATAL);
- return -EPROTO;
- }
-
- if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE ||
- msg->ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE ||
- msg->ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Reject %s: q %d sz %d frag %d, (%d %d %d expected)\n",
- libcfs_nid2str(nid),
- msg->ibm_u.connparams.ibcp_queue_depth,
- msg->ibm_u.connparams.ibcp_max_msg_size,
- msg->ibm_u.connparams.ibcp_max_frags,
- IBNAL_MSG_QUEUE_SIZE,
- IBNAL_MSG_SIZE,
- IBNAL_MAX_RDMA_FRAGS);
- kibnal_reject(nid, cep, IBNAL_REJECT_FATAL);
- return -EPROTO;
- }
-
- conn = kibnal_create_conn(nid, msg->ibm_version);
- if (conn == NULL) {
- kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES);
- return -ENOMEM;
- }
-
- /* assume 'nid' is a new peer */
- rc = kibnal_create_peer(&peer, nid);
- if (rc != 0) {
- kibnal_conn_decref(conn);
- kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES);
- return -ENOMEM;
- }
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (kibnal_data.kib_listener_cep == NULL) { /* shutdown started */
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_peer_decref(peer);
- kibnal_conn_decref(conn);
- kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES);
- return -ESHUTDOWN;
- }
-
- peer2 = kibnal_find_peer_locked(nid);
- if (peer2 == NULL) {
- /* peer table takes my ref on peer */
- list_add_tail (&peer->ibp_list, kibnal_nid2peerlist(nid));
- LASSERT (peer->ibp_connecting == 0);
- } else {
- kibnal_peer_decref(peer);
- peer = peer2;
-
- if (peer->ibp_connecting != 0 &&
- peer->ibp_nid < kibnal_data.kib_ni->ni_nid) {
- /* Resolve concurrent connection attempts in favour of
- * the higher NID */
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- kibnal_conn_decref(conn);
- kibnal_reject(nid, cep, IBNAL_REJECT_CONN_RACE);
- return -EALREADY;
- }
- }
-
- kibnal_peer_addref(peer); /* +1 ref for conn */
- peer->ibp_accepting++;
-
- kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING);
- conn->ibc_peer = peer;
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
- conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE;
- LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
- <= IBNAL_RX_MSGS);
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- *connp = conn;
- return 0;
-}
-
-void
-kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
-{
-
- CM_REQUEST_INFO *req = &info->Info.Request;
- CM_REPLY_INFO *rep;
- kib_conn_t *conn;
- FSTATUS frc;
- int rc;
-
- LASSERT(arg == NULL); /* no conn yet for passive */
-
- CDEBUG(D_NET, "%x\n", info->Status);
-
- if (info->Status == FCM_CONNECT_CANCEL) {
- up(&kibnal_data.kib_listener_signal);
- return;
- }
-
- LASSERT (info->Status == FCM_CONNECT_REQUEST);
-
- rc = kibnal_accept(&conn, cep, (kib_msg_t *)req->PrivateData,
- CM_REQUEST_INFO_USER_LEN);
- if (rc != 0) /* kibnal_accept has rejected */
- return;
-
- conn->ibc_cvars->cv_path = req->PathInfo.Path;
-
- rc = kibnal_conn_rts(conn,
- req->CEPInfo.QPN,
- req->CEPInfo.OfferedInitiatorDepth,
- req->CEPInfo.OfferedResponderResources,
- req->CEPInfo.StartingPSN);
- if (rc != 0) {
- kibnal_reject(conn->ibc_peer->ibp_nid, cep,
- IBNAL_REJECT_NO_RESOURCES);
- kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ECONNABORTED);
- return;
- }
-
- memset(&conn->ibc_cvars->cv_cmci, 0, sizeof(conn->ibc_cvars->cv_cmci));
- rep = &conn->ibc_cvars->cv_cmci.Info.Reply;
-
- rep->QPN = conn->ibc_cvars->cv_qpattrs.QPNumber;
- rep->QKey = conn->ibc_cvars->cv_qpattrs.Qkey;
- rep->StartingPSN = conn->ibc_cvars->cv_qpattrs.RecvPSN;
- rep->EndToEndFlowControl = conn->ibc_cvars->cv_qpattrs.FlowControl;
- rep->ArbInitiatorDepth = conn->ibc_cvars->cv_qpattrs.InitiatorDepth;
- rep->ArbResponderResources = conn->ibc_cvars->cv_qpattrs.ResponderResources;
- rep->TargetAckDelay = kibnal_data.kib_hca_attrs.LocalCaAckDelay;
- rep->FailoverAccepted = IBNAL_FAILOVER_ACCEPTED;
- rep->RnRRetryCount = req->CEPInfo.RnrRetryCount;
-
- CLASSERT (CM_REPLY_INFO_USER_LEN >=
- offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t));
-
- kibnal_pack_connmsg((kib_msg_t *)rep->PrivateData,
- conn->ibc_version,
- CM_REPLY_INFO_USER_LEN,
- IBNAL_MSG_CONNACK,
- conn->ibc_peer->ibp_nid, conn->ibc_incarnation);
-
- LASSERT (conn->ibc_cep == NULL);
- kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING);
-
- frc = iba_cm_accept(cep,
- &conn->ibc_cvars->cv_cmci,
- NULL,
- kibnal_cm_passive_callback, conn,
- &conn->ibc_cep);
-
- if (frc == FSUCCESS || frc == FPENDING)
- return;
-
- CERROR("iba_cm_accept(%s) failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ECONNABORTED);
-}
-
-void
-kibnal_check_connreply(kib_conn_t *conn, CM_REPLY_INFO *rep)
-{
- kib_msg_t *msg = (kib_msg_t *)rep->PrivateData;
- lnet_nid_t nid = conn->ibc_peer->ibp_nid;
- FSTATUS frc;
- int rc;
-
- rc = kibnal_unpack_msg(msg, conn->ibc_version, CM_REPLY_INFO_USER_LEN);
- if (rc != 0) {
- CERROR ("Error %d unpacking connack from %s\n",
- rc, libcfs_nid2str(nid));
- kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO);
- return;
- }
-
- if (msg->ibm_type != IBNAL_MSG_CONNACK) {
- CERROR("Bad connack request type %d (%d expected) from %s\n",
- msg->ibm_type, IBNAL_MSG_CONNREQ,
- libcfs_nid2str(msg->ibm_srcnid));
- kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO);
- return;
- }
-
- if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
- msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid ||
- msg->ibm_dststamp != kibnal_data.kib_incarnation) {
- CERROR("Stale connack from %s(%s): %s(%s), "LPX64"("LPX64")\n",
- libcfs_nid2str(msg->ibm_srcnid),
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- libcfs_nid2str(msg->ibm_dstnid),
- libcfs_nid2str(kibnal_data.kib_ni->ni_nid),
- msg->ibm_dststamp, kibnal_data.kib_incarnation);
- kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ESTALE);
- return;
- }
-
- if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE ||
- msg->ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE ||
- msg->ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Reject %s: q %d sz %d frag %d, (%d %d %d expected)\n",
- libcfs_nid2str(msg->ibm_srcnid),
- msg->ibm_u.connparams.ibcp_queue_depth,
- msg->ibm_u.connparams.ibcp_max_msg_size,
- msg->ibm_u.connparams.ibcp_max_frags,
- IBNAL_MSG_QUEUE_SIZE,
- IBNAL_MSG_SIZE,
- IBNAL_MAX_RDMA_FRAGS);
- kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO);
- return;
- }
-
- CDEBUG(D_NET, "Connection %s REP_RECEIVED.\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
- conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE;
- LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
- <= IBNAL_RX_MSGS);
-
- rc = kibnal_conn_rts(conn,
- rep->QPN,
- rep->ArbInitiatorDepth,
- rep->ArbResponderResources,
- rep->StartingPSN);
- if (rc != 0) {
- kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_NO_RESOURCES);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EIO);
- return;
- }
-
- memset(&conn->ibc_cvars->cv_cmci, 0, sizeof(conn->ibc_cvars->cv_cmci));
-
- frc = iba_cm_accept(conn->ibc_cep,
- &conn->ibc_cvars->cv_cmci,
- NULL, NULL, NULL, NULL);
-
- if (frc == FCM_CONNECT_ESTABLISHED) {
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, 0);
- return;
- }
-
- CERROR("Connection %s CMAccept failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ECONNABORTED);
-}
-
-void
-kibnal_cm_active_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
-{
- kib_conn_t *conn = arg;
-
- CDEBUG(D_NET, "status 0x%x\n", info->Status);
-
- switch (info->Status) {
- default:
- CERROR("unknown status %d on Connection %s\n",
- info->Status, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- LBUG();
- break;
-
- case FCM_CONNECT_TIMEOUT:
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ETIMEDOUT);
- break;
-
- case FCM_CONNECT_REJECT:
- kibnal_check_connreject(conn, IBNAL_CONN_ACTIVE,
- &info->Info.Reject);
- break;
-
- case FCM_CONNECT_REPLY:
- kibnal_check_connreply(conn, &info->Info.Reply);
- break;
-
- case FCM_DISCONNECT_REQUEST:
- case FCM_DISCONNECT_REPLY:
- case FCM_DISCONNECTED:
- kibnal_cm_disconnect_callback(conn, info);
- break;
- }
-}
-
-void
-dump_path_records(PATH_RESULTS *results)
-{
- IB_PATH_RECORD *path;
- int i;
-
- for (i = 0; i < results->NumPathRecords; i++) {
- path = &results->PathRecords[i];
- CDEBUG(D_NET, "%d: sgid "LPX64":"LPX64" dgid "
- LPX64":"LPX64" pkey %x\n",
- i,
- path->SGID.Type.Global.SubnetPrefix,
- path->SGID.Type.Global.InterfaceID,
- path->DGID.Type.Global.SubnetPrefix,
- path->DGID.Type.Global.InterfaceID,
- path->P_Key);
- }
-}
-
-void
-kibnal_pathreq_callback (void *arg, QUERY *qry,
- QUERY_RESULT_VALUES *qrslt)
-{
- IB_CA_ATTRIBUTES *ca_attr = &kibnal_data.kib_hca_attrs;
- kib_conn_t *conn = arg;
- CM_REQUEST_INFO *req = &conn->ibc_cvars->cv_cmci.Info.Request;
- PATH_RESULTS *path = (PATH_RESULTS *)qrslt->QueryResult;
- FSTATUS frc;
-
- if (qrslt->Status != FSUCCESS ||
- qrslt->ResultDataSize < sizeof(*path)) {
- CDEBUG (D_NETERROR, "pathreq %s failed: status %d data size %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- qrslt->Status, qrslt->ResultDataSize);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
- return;
- }
-
- if (path->NumPathRecords < 1) {
- CDEBUG (D_NETERROR, "pathreq %s failed: no path records\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
- return;
- }
-
- //dump_path_records(path);
- conn->ibc_cvars->cv_path = path->PathRecords[0];
-
- LASSERT (conn->ibc_cep == NULL);
-
- conn->ibc_cep = kibnal_create_cep(conn->ibc_peer->ibp_nid);
- if (conn->ibc_cep == NULL) {
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ENOMEM);
- return;
- }
-
- memset(req, 0, sizeof(*req));
- req->SID = conn->ibc_cvars->cv_svcrec.RID.ServiceID;
- req->CEPInfo.CaGUID = kibnal_data.kib_hca_guids[kibnal_data.kib_hca_idx];
- req->CEPInfo.EndToEndFlowControl = IBNAL_EE_FLOW;
- req->CEPInfo.PortGUID = conn->ibc_cvars->cv_path.SGID.Type.Global.InterfaceID;
- req->CEPInfo.RetryCount = IBNAL_RETRY;
- req->CEPInfo.RnrRetryCount = IBNAL_RNR_RETRY;
- req->CEPInfo.AckTimeout = IBNAL_ACK_TIMEOUT;
- req->CEPInfo.StartingPSN = IBNAL_STARTING_PSN;
- req->CEPInfo.QPN = conn->ibc_cvars->cv_qpattrs.QPNumber;
- req->CEPInfo.QKey = conn->ibc_cvars->cv_qpattrs.Qkey;
- req->CEPInfo.OfferedResponderResources = ca_attr->MaxQPResponderResources;
- req->CEPInfo.OfferedInitiatorDepth = ca_attr->MaxQPInitiatorDepth;
- req->PathInfo.bSubnetLocal = IBNAL_LOCAL_SUB;
- req->PathInfo.Path = conn->ibc_cvars->cv_path;
-
- CLASSERT (CM_REQUEST_INFO_USER_LEN >=
- offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t));
-
- kibnal_pack_connmsg((kib_msg_t *)req->PrivateData,
- conn->ibc_version,
- CM_REQUEST_INFO_USER_LEN,
- IBNAL_MSG_CONNREQ,
- conn->ibc_peer->ibp_nid, 0);
-
- if (the_lnet.ln_testprotocompat != 0) {
- /* single-shot proto test */
- LNET_LOCK();
- if ((the_lnet.ln_testprotocompat & 1) != 0) {
- ((kib_msg_t *)req->PrivateData)->ibm_version++;
- the_lnet.ln_testprotocompat &= ~1;
- }
- if ((the_lnet.ln_testprotocompat & 2) != 0) {
- ((kib_msg_t *)req->PrivateData)->ibm_magic =
- LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~2;
- }
- LNET_UNLOCK();
- }
-
- /* Flag I'm getting involved with the CM... */
- kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING);
-
- /* cm callback gets my conn ref */
- frc = iba_cm_connect(conn->ibc_cep, req,
- kibnal_cm_active_callback, conn);
- if (frc == FPENDING || frc == FSUCCESS)
- return;
-
- CERROR ("Connect %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
-}
-
-void
-kibnal_dump_service_records(SERVICE_RECORD_RESULTS *results)
-{
- IB_SERVICE_RECORD *svc;
- int i;
-
- for (i = 0; i < results->NumServiceRecords; i++) {
- svc = &results->ServiceRecords[i];
- CDEBUG(D_NET, "%d: sid "LPX64" gid "LPX64":"LPX64" pkey %x\n",
- i,
- svc->RID.ServiceID,
- svc->RID.ServiceGID.Type.Global.SubnetPrefix,
- svc->RID.ServiceGID.Type.Global.InterfaceID,
- svc->RID.ServiceP_Key);
- }
-}
-
-void
-kibnal_service_get_callback (void *arg, QUERY *qry,
- QUERY_RESULT_VALUES *qrslt)
-{
- kib_conn_t *conn = arg;
- SERVICE_RECORD_RESULTS *svc;
- FSTATUS frc;
-
- if (qrslt->Status != FSUCCESS ||
- qrslt->ResultDataSize < sizeof(*svc)) {
- CDEBUG (D_NETERROR, "Lookup %s failed: status %d data size %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- qrslt->Status, qrslt->ResultDataSize);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
- return;
- }
-
- svc = (SERVICE_RECORD_RESULTS *)qrslt->QueryResult;
- if (svc->NumServiceRecords < 1) {
- CDEBUG (D_NETERROR, "lookup %s failed: no service records\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
- return;
- }
-
- //kibnal_dump_service_records(svc);
- conn->ibc_cvars->cv_svcrec = svc->ServiceRecords[0];
-
- qry = &conn->ibc_cvars->cv_query;
- memset(qry, 0, sizeof(*qry));
-
- qry->OutputType = OutputTypePathRecord;
- qry->InputType = InputTypePortGuidPair;
-
- qry->InputValue.PortGuidPair.SourcePortGuid =
- kibnal_data.kib_port_guid;
- qry->InputValue.PortGuidPair.DestPortGuid =
- conn->ibc_cvars->cv_svcrec.RID.ServiceGID.Type.Global.InterfaceID;
-
- /* kibnal_pathreq_callback gets my conn ref */
- frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- qry,
- kibnal_pathreq_callback,
- &kibnal_data.kib_sdretry,
- conn);
- if (frc == FPENDING)
- return;
-
- CERROR ("pathreq %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
-}
-
-void
-kibnal_connect_peer (kib_peer_t *peer)
-{
- QUERY *qry;
- FSTATUS frc;
- kib_conn_t *conn;
-
- LASSERT (peer->ibp_connecting != 0);
-
- conn = kibnal_create_conn(peer->ibp_nid, peer->ibp_version);
- if (conn == NULL) {
- CERROR ("Can't allocate conn\n");
- kibnal_peer_connect_failed(peer, IBNAL_CONN_ACTIVE, -ENOMEM);
- return;
- }
-
- conn->ibc_peer = peer;
- kibnal_peer_addref(peer);
-
- qry = &conn->ibc_cvars->cv_query;
- memset(qry, 0, sizeof(*qry));
-
- qry->OutputType = OutputTypeServiceRecord;
- qry->InputType = InputTypeServiceRecord;
-
- qry->InputValue.ServiceRecordValue.ComponentMask =
- KIBNAL_SERVICE_KEY_MASK;
- kibnal_set_service_keys(
- &qry->InputValue.ServiceRecordValue.ServiceRecord,
- peer->ibp_nid);
-
- /* kibnal_service_get_callback gets my conn ref */
- frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- qry,
- kibnal_service_get_callback,
- &kibnal_data.kib_sdretry,
- conn);
- if (frc == FPENDING)
- return;
-
- CERROR("Lookup %s failed: %d\n", libcfs_nid2str(peer->ibp_nid), frc);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
-}
-
-int
-kibnal_check_txs (kib_conn_t *conn, struct list_head *txs)
-{
- kib_tx_t *tx;
- struct list_head *ttmp;
- int timed_out = 0;
-
- spin_lock(&conn->ibc_lock);
-
- list_for_each (ttmp, txs) {
- tx = list_entry (ttmp, kib_tx_t, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_waiting || tx->tx_sending != 0);
- } else {
- LASSERT (tx->tx_queued);
- }
-
- if (time_after_eq (jiffies, tx->tx_deadline)) {
- timed_out = 1;
- break;
- }
- }
-
- spin_unlock(&conn->ibc_lock);
- return timed_out;
-}
-
-int
-kibnal_conn_timed_out (kib_conn_t *conn)
-{
- return kibnal_check_txs(conn, &conn->ibc_tx_queue) ||
- kibnal_check_txs(conn, &conn->ibc_tx_queue_rsrvd) ||
- kibnal_check_txs(conn, &conn->ibc_tx_queue_nocred) ||
- kibnal_check_txs(conn, &conn->ibc_active_txs);
-}
-
-void
-kibnal_check_peers (int idx)
-{
- rwlock_t *rwlock = &kibnal_data.kib_global_lock;
- struct list_head *peers = &kibnal_data.kib_peers[idx];
- struct list_head *ptmp;
- kib_peer_t *peer;
- kib_conn_t *conn;
- struct list_head *ctmp;
- unsigned long flags;
-
- again:
- /* NB. We expect to have a look at all the peers and not find any
- * rdmas to time out, so we just use a shared lock while we
- * take a look... */
- read_lock_irqsave(rwlock, flags);
-
- list_for_each (ptmp, peers) {
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
-
- if (peer->ibp_passivewait) {
- LASSERT (list_empty(&peer->ibp_conns));
-
- if (!time_after_eq(jiffies,
- peer->ibp_passivewait_deadline))
- continue;
-
- kibnal_peer_addref(peer); /* ++ ref for me... */
- read_unlock_irqrestore(rwlock, flags);
-
- kibnal_peer_connect_failed(peer, IBNAL_CONN_WAITING,
- -ETIMEDOUT);
- kibnal_peer_decref(peer); /* ...until here */
-
- /* start again now I've dropped the lock */
- goto again;
- }
-
- list_for_each (ctmp, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED);
-
- /* In case we have enough credits to return via a
- * NOOP, but there were no non-blocking tx descs
- * free to do it last time... */
- kibnal_check_sends(conn);
-
- if (!kibnal_conn_timed_out(conn))
- continue;
-
- /* Handle timeout by closing the whole connection. We
- * can only be sure RDMA activity has ceased once the
- * QP has been modified. */
-
- kibnal_conn_addref(conn); /* 1 ref for me... */
-
- read_unlock_irqrestore(rwlock, flags);
-
- CERROR("Timed out RDMA with %s\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kibnal_close_conn (conn, -ETIMEDOUT);
- kibnal_conn_decref(conn); /* ...until here */
-
- /* start again now I've dropped the lock */
- goto again;
- }
- }
-
- read_unlock_irqrestore(rwlock, flags);
-}
-
-void
-kibnal_disconnect_conn (kib_conn_t *conn)
-{
- FSTATUS frc;
-
- LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECTING);
-
- kibnal_conn_disconnected(conn);
-
- frc = iba_cm_disconnect(conn->ibc_cep, NULL, NULL);
- switch (frc) {
- case FSUCCESS:
- break;
-
- case FINSUFFICIENT_RESOURCES:
- CERROR("ENOMEM disconnecting %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /* This might cause the module to become unloadable since the
- * FCM_DISCONNECTED callback is still outstanding */
- break;
-
- default:
- CERROR("Unexpected error disconnecting %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- LBUG();
- }
-
- kibnal_peer_notify(conn->ibc_peer);
-}
-
-int
-kibnal_connd (void *arg)
-{
- wait_queue_t wait;
- unsigned long flags;
- kib_conn_t *conn;
- kib_peer_t *peer;
- int timeout;
- int i;
- int did_something;
- int peer_index = 0;
- unsigned long deadline = jiffies;
-
- cfs_daemonize ("kibnal_connd");
- cfs_block_allsigs ();
-
- init_waitqueue_entry (&wait, current);
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- while (!kibnal_data.kib_shutdown) {
- did_something = 0;
-
- if (!list_empty (&kibnal_data.kib_connd_zombies)) {
- conn = list_entry (kibnal_data.kib_connd_zombies.next,
- kib_conn_t, ibc_list);
- list_del (&conn->ibc_list);
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
- did_something = 1;
-
- kibnal_destroy_conn(conn);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- if (!list_empty (&kibnal_data.kib_connd_conns)) {
- conn = list_entry (kibnal_data.kib_connd_conns.next,
- kib_conn_t, ibc_list);
- list_del (&conn->ibc_list);
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
- did_something = 1;
-
- kibnal_disconnect_conn(conn);
- kibnal_conn_decref(conn);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- if (!list_empty (&kibnal_data.kib_connd_peers)) {
- peer = list_entry (kibnal_data.kib_connd_peers.next,
- kib_peer_t, ibp_connd_list);
-
- list_del_init (&peer->ibp_connd_list);
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
- did_something = 1;
-
- kibnal_connect_peer (peer);
- kibnal_peer_decref (peer);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- /* careful with the jiffy wrap... */
- while ((timeout = (int)(deadline - jiffies)) <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = kibnal_data.kib_peer_hash_size;
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- /* Time to check for RDMA timeouts on a few more
- * peers: I do checks every 'p' seconds on a
- * proportion of the peer table and I need to check
- * every connection 'n' times within a timeout
- * interval, to ensure I detect a timeout on any
- * connection within (n+1)/n times the timeout
- * interval. */
-
- if (*kibnal_tunables.kib_timeout > n * p)
- chunk = (chunk * n * p) /
- *kibnal_tunables.kib_timeout;
- if (chunk == 0)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kibnal_check_peers (peer_index);
- peer_index = (peer_index + 1) %
- kibnal_data.kib_peer_hash_size;
- }
-
- deadline += p * HZ;
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- did_something = 1;
- }
-
- if (did_something)
- continue;
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- set_current_state (TASK_INTERRUPTIBLE);
- add_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
-
- if (!kibnal_data.kib_shutdown &&
- list_empty (&kibnal_data.kib_connd_conns) &&
- list_empty (&kibnal_data.kib_connd_peers))
- schedule_timeout (timeout);
-
- set_current_state (TASK_RUNNING);
- remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- kibnal_thread_fini ();
- return (0);
-}
-
-
-void
-kibnal_hca_async_callback (void *hca_arg, IB_EVENT_RECORD *ev)
-{
- /* XXX flesh out. this seems largely for async errors */
- CERROR("type: %d code: %u\n", ev->EventType, ev->EventCode);
-}
-
-void
-kibnal_hca_callback (void *hca_arg, void *cq_arg)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- kibnal_data.kib_ready = 1;
- wake_up(&kibnal_data.kib_sched_waitq);
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-}
-
-int
-kibnal_scheduler(void *arg)
-{
- long id = (long)arg;
- wait_queue_t wait;
- char name[16];
- FSTATUS frc;
- FSTATUS frc2;
- IB_WORK_COMPLETION wc;
- kib_rx_t *rx;
- unsigned long flags;
- __u64 rxseq = 0;
- int busy_loops = 0;
-
- snprintf(name, sizeof(name), "kibnal_sd_%02ld", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- init_waitqueue_entry(&wait, current);
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
-
- while (!kibnal_data.kib_shutdown) {
- if (busy_loops++ >= IBNAL_RESCHED) {
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- our_cond_resched();
- busy_loops = 0;
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- }
-
- if (kibnal_data.kib_ready &&
- !kibnal_data.kib_checking_cq) {
- /* take ownership of completion polling */
- kibnal_data.kib_checking_cq = 1;
- /* Assume I'll exhaust the CQ */
- kibnal_data.kib_ready = 0;
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- frc = iba_poll_cq(kibnal_data.kib_cq, &wc);
- if (frc == FNOT_DONE) {
- /* CQ empty */
- frc2 = iba_rearm_cq(kibnal_data.kib_cq,
- CQEventSelNextWC);
- LASSERT (frc2 == FSUCCESS);
- }
-
- if (frc == FSUCCESS &&
- kibnal_wreqid2type(wc.WorkReqId) == IBNAL_WID_RX) {
- rx = (kib_rx_t *)kibnal_wreqid2ptr(wc.WorkReqId);
-
- /* Grab the RX sequence number NOW before
- * anyone else can get an RX completion */
- rxseq = rx->rx_conn->ibc_rxseq++;
- }
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- /* give up ownership of completion polling */
- kibnal_data.kib_checking_cq = 0;
-
- if (frc == FNOT_DONE)
- continue;
-
- LASSERT (frc == FSUCCESS);
- /* Assume there's more: get another scheduler to check
- * while I handle this completion... */
-
- kibnal_data.kib_ready = 1;
- wake_up(&kibnal_data.kib_sched_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- switch (kibnal_wreqid2type(wc.WorkReqId)) {
- case IBNAL_WID_RX:
- kibnal_rx_complete(&wc, rxseq);
- break;
-
- case IBNAL_WID_TX:
- kibnal_tx_complete(&wc);
- break;
-
- case IBNAL_WID_RDMA:
- /* We only get RDMA completion notification if
- * it fails. So we just ignore them completely
- * because...
- *
- * 1) If an RDMA fails, all subsequent work
- * items, including the final SEND will fail
- * too, so I'm still guaranteed to notice that
- * this connection is hosed.
- *
- * 2) It's positively dangerous to look inside
- * the tx descriptor obtained from an RDMA work
- * item. As soon as I drop the kib_sched_lock,
- * I give a scheduler on another CPU a chance
- * to get the final SEND completion, so the tx
- * descriptor can get freed as I inspect it. */
- CERROR ("RDMA failed: %d\n", wc.Status);
- break;
-
- default:
- LBUG();
- }
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- continue;
- }
-
- /* Nothing to do; sleep... */
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&kibnal_data.kib_sched_waitq, &wait);
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- schedule();
-
- remove_wait_queue(&kibnal_data.kib_sched_waitq, &wait);
- set_current_state(TASK_RUNNING);
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- }
-
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-
- kibnal_thread_fini();
- return (0);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "iiblnd.h"
-
-static char *ipif_basename = "ib";
-CFS_MODULE_PARM(ipif_basename, "s", charp, 0444,
- "IPoIB interface base name");
-
-static char *service_name = "iiblnd";
-CFS_MODULE_PARM(service_name, "s", charp, 0444,
- "IB service name");
-
-static int service_number = 0x11b9a2;
-CFS_MODULE_PARM(service_number, "i", int, 0444,
- "IB service number");
-
-static int min_reconnect_interval = 1;
-CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644,
- "minimum connection retry interval (seconds)");
-
-static int max_reconnect_interval = 60;
-CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644,
- "maximum connection retry interval (seconds)");
-
-static int concurrent_peers = 1152;
-CFS_MODULE_PARM(concurrent_peers, "i", int, 0444,
- "maximum number of peers that may connect");
-
-static int cksum = 0;
-CFS_MODULE_PARM(cksum, "i", int, 0644,
- "set non-zero to enable message (not RDMA) checksums");
-
-static int timeout = 50;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
- "timeout (seconds)");
-
-static int ntx = 256;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
- "# of message descriptors");
-
-static int credits = 128;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "# concurrent sends");
-
-static int peer_credits = 8;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
- "# concurrent sends to 1 peer");
-
-static int sd_retries = 8;
-CFS_MODULE_PARM(sd_retries, "i", int, 0444,
- "# times to retry SD queries");
-
-static int keepalive = 100;
-CFS_MODULE_PARM(keepalive, "i", int, 0644,
- "Idle time in seconds before sending a keepalive");
-
-static int concurrent_sends = IBNAL_RX_MSGS;
-CFS_MODULE_PARM(concurrent_sends, "i", int, 0644,
- "Send work queue sizing");
-
-kib_tunables_t kibnal_tunables = {
- .kib_ipif_basename = &ipif_basename,
- .kib_service_name = &service_name,
- .kib_service_number = &service_number,
- .kib_min_reconnect_interval = &min_reconnect_interval,
- .kib_max_reconnect_interval = &max_reconnect_interval,
- .kib_concurrent_peers = &concurrent_peers,
- .kib_cksum = &cksum,
- .kib_timeout = &timeout,
- .kib_keepalive = &keepalive,
- .kib_ntx = &ntx,
- .kib_credits = &credits,
- .kib_peercredits = &peer_credits,
- .kib_sd_retries = &sd_retries,
- .kib_concurrent_sends = &concurrent_sends,
-};
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
-/* NB max_size specified for proc_dostring entries only needs to be big enough
- * not to truncate the printout; it only needs to be the actual size of the
- * string buffer if we allow writes (and we don't) */
-
-static cfs_sysctl_table_t kibnal_ctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "ipif_basename",
- .data = &ipif_basename,
- .maxlen = 1024,
- .mode = 0444,
- .proc_handler = &proc_dostring
- },
- {
- .ctl_name = 2,
- .procname = "service_name",
- .data = &service_name,
- .maxlen = 1024,
- .mode = 0444,
- .proc_handler = &proc_dostring
- },
- {
- .ctl_name = 3,
- .procname = "service_number",
- .data = &service_number,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 4,
- .procname = "min_reconnect_interval",
- .data = &min_reconnect_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 5,
- .procname = "max_reconnect_interval",
- .data = &max_reconnect_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 6,
- .procname = "concurrent_peers",
- .data = &concurrent_peers,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 7,
- .procname = "cksum",
- .data = &cksum,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 8,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 9,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 10,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 11,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 12,
- .procname = "sd_retries",
- .data = &sd_retries,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 13,
- .procname = "keepalive",
- .data = &keepalive,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 14,
- .procname = "concurrent_sends",
- .data = &concurrent_sends,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {0}
-};
-
-static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
- {
- .ctl_name = 203,
- .procname = "openibnal",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = kibnal_ctl_table
- },
- {0}
-};
-
-int
-kibnal_tunables_init ()
-{
- kibnal_tunables.kib_sysctl =
- cfs_register_sysctl_table(kibnal_top_ctl_table, 0);
-
- if (kibnal_tunables.kib_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-
- if (*kibnal_tunables.kib_concurrent_sends > IBNAL_RX_MSGS)
- *kibnal_tunables.kib_concurrent_sends = IBNAL_RX_MSGS;
- if (*kibnal_tunables.kib_concurrent_sends < IBNAL_MSG_QUEUE_SIZE)
- *kibnal_tunables.kib_concurrent_sends = IBNAL_MSG_QUEUE_SIZE;
-
- return 0;
-}
-
-void
-kibnal_tunables_fini ()
-{
- if (kibnal_tunables.kib_sysctl != NULL)
- cfs_unregister_sysctl_table(kibnal_tunables.kib_sysctl);
-}
-
-#else
-
-int
-kibnal_tunables_init ()
-{
- return 0;
-}
-
-void
-kibnal_tunables_fini ()
-{
-}
-
-#endif
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
-
+++ /dev/null
-MODULES := kmxlnd
-kmxlnd-objs := mxlnd.o mxlnd_cb.o mxlnd_modparams.o
-
-EXTRA_POST_CFLAGS := @MXCPPFLAGS@
-
-@INCLUDE_RULES@
+++ /dev/null
-*************************************************************************
-* *
-* Myrinet Express Lustre Networking Driver (MXLND) documentation *
-* *
-*************************************************************************
-
-README of MXLND
-
-MXLND provides support for Myricom's Myrinet Express (MX) communication
-layer in Lustre.
-
-MXLND may be used with either MX-10G or MX-2G. See MX's README for
-supported NICs.
-
-Table of Contents:
- I. Installation
- 1. Configuring and compiling
- 2. Module Parameters
- II. MXLND Performance
- III. Caveats
- 1. Systems with different page sizes
- 2. Multi-homing
- 3. MX endpoint collision
- IV. License
- V. Support
-
-================
-I. Installation
-================
-
-MXLND is supported on Linux 2.6. It may be possible to run it on 2.4,
-but it has not been tested. MXLND requires Myricom's MX version 1.2.1
-or higher. See MX's README for the supported list of processors.
-
-1. Configuring and compiling
-
-MXLND should be already integrated into the Lustre build process. To
-build MXLND, you will need to set the path to your MX installation
-in Lustre's ./configure:
-
- --with-mx=/opt/mx
-
-replacing /opt with the actual path. Configure will check to ensure that
-the MX version has the required functions. If not, it will fail to build.
-To check if MXLND built, look for:
-
- checking whether to enable Myrinet MX support... yes
-
-in configure's output or the presence of Makefile in
-$LUSTRE/lnet/klnds/mxlnd.
-
-2. Module Parameters
-
-MXLND supports a number of load-time parameters using Linux's module
-parameter system. On our test systems, we created the following file:
-
- /etc/modprobe.d/kmxlnd
-
-On some (older?) systems, you may need to modify /etc/modprobe.conf.
-
-The available options are:
-
- n_waitd # of completion daemons
- max_peers maximum number of peers that may connect
- cksum set non-zero to enable small message (< 4KB) checksums
- ntx # of total tx message descriptors
- credits # concurrent sends to a single peer
- board index value of the Myrinet board (NIC)
- ep_id MX endpoint ID
- polling Use 0 to block (wait). A value > 0 will poll that many times before blocking
- hosts IP-to-hostname resolution file
-
-Of these, only hosts is required. It must be the absolute path to the
-MXLND hosts file. For example:
-
- options kmxlnd hosts=/etc/hosts.mxlnd
-
-The file format for the hosts file is as follows:
-
-IP HOST BOARD EP_ID
-
-The values must be space and/or tab separated where:
-
- IP is a valid IPv4 address
- HOST is the name returned by `hostname` on that machine
- BOARD is the index of the Myricom NIC (0 for the first card, etc.)
- EP_ID is the MX endpoint ID
-
-You may want to vary the remaining options to obtain the optimal performance
-for your platform.
-
- n_waitd sets the number of threads that process completed MX requests
-(sends and receives). In our testing, the default of 1 performed best.
-
- max_peers tells MXLND the upper limit of machines that it will need to
-communicate with. This affects how many receives it will pre-post and each
-receive will use one page of memory. Ideally, on clients, this value will
-be equal to the total number of Lustre servers (MDS and OSS). On servers,
-it needs to equal the total number of machines in the storage system.
-
- cksum turns on small message checksums. It can be used to aid in trouble-
-shooting. MX also provides an optional checksumming feature which can check
-all messages (large and small). See the MX README for details.
-
- ntx is the number of total sends in flight from this machine. In actuality,
-MXLND reserves half of them for connect messages so make this value twice as large
-as you want for the total number of sends in flight.
-
- credits is the number of in-flight messages for a specific peer. This is part
-of the flow-control system in Lustre. Increasing this value may improve performance
-but it requires more memory since each message requires at least one page.
-
- board is the index of the Myricom NIC. Hosts can have multiple Myricom NICs
-and this identifies which one MXLND should use. This value must match the board
-value in your MXLND hosts file for this host.
-
- ep_id is the MX endpoint ID. Each process that uses MX is required to have at
-least one MX endpoint to access the MX library and NIC. The ID is a simple index
-starting at 0. This value must match the endpoint ID value in your MXLND hosts
-file for this host.
-
- polling determines whether this host will poll or block for MX request com-
-pletions. A value of 0 blocks and any positive value will poll that many times
-before blocking. Since polling increases CPU usage, we suggest you set this to
-0 on the client and experiment with different values for servers.
-
-=====================
-II. MXLND Performance
-=====================
-
-On MX-2G systems, MXLND should easily saturate the link and use minimal CPU
-(5-10% for read and write operations). On MX-10G systems, MXLND can saturate
-the link and use moderate CPU resources (20-30% for read and write operations).
-MX-10G relies on PCI-Express which is relatively new and performance varies
-considerably by processor, motherboard and PCI-E chipset. Refer to Myricom's
-website for the latest DMA read/write performance results by motherboard. The
-DMA results will place an upper-bound on MXLND performance.
-
-============
-III. Caveats
-============
-
-1. Systems with different page sizes
-
-MXLND will set the maximum small message size equal to the kernel's page size.
-This means that machines running MXLND that have different page sizes are not
-able to communicate with each other. If you wish to run MXLND in this case,
-send email to help@myri.com.
-
-2. Multi-homing
-
-At this time, the MXLND does not support more than one interface at a time.
-Thus, a single Lustre router cannot route between two MX-10G, between two
-MX-2G, or between MX-10G and MX-2G fabrics.
-
-3. MX endpoint collision
-
-Each process that uses MX is required to have at least one MX endpoint to
-access the MX library and NIC. Other processes may need to use MX and no two
-processes can use the same endpoint ID. MPICH-MX dynamically chooses one at
-MPI startup and should not interfere with MXLND. Sockets-MX, on the other hand,
-is hard coded to use 0 for its ID. If it is possible that anyone will want to
-run Sockets-MX on this system, use a non-0 value for MXLND's endpoint ID.
-
-
-===========
-IV. License
-===========
-
-MXLND is copyright (C) 2006 of Myricom, Inc.
-
-MXLND is part of Lustre, http://www.lustre.org.
-
-MXLND is free software; you can redistribute it and/or modify it under the
-terms of version 2 of the GNU General Public License as published by the Free
-Software Foundation.
-
-MXLND is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA.
-
-==========
-V. Support
-==========
-
-If you have questions about MXLND, please contact help@myri.com.
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if BUILD_MXLND
-modulenet_DATA = kmxlnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kmxlnd-objs:%.o=%.c) mxlnd.h mxlnd_wire.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Copyright (C) 2006 Myricom, Inc.
- * Author: Scott Atchley <atchley at myri.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "mxlnd.h"
-
-lnd_t the_kmxlnd = {
- .lnd_type = MXLND,
- .lnd_startup = mxlnd_startup,
- .lnd_shutdown = mxlnd_shutdown,
- .lnd_ctl = mxlnd_ctl,
- .lnd_send = mxlnd_send,
- .lnd_recv = mxlnd_recv,
-};
-
-kmx_data_t kmxlnd_data;
-
-/**
- * mxlnd_ctx_free - free ctx struct
- * @ctx - a kmx_peer pointer
- *
- * The calling function should remove the ctx from the ctx list first
- * then free it.
- */
-void
-mxlnd_ctx_free(struct kmx_ctx *ctx)
-{
- if (ctx == NULL) return;
-
- if (ctx->mxc_page != NULL) {
- __free_page(ctx->mxc_page);
- spin_lock(&kmxlnd_data.kmx_global_lock);
- kmxlnd_data.kmx_mem_used -= MXLND_EAGER_SIZE;
- spin_unlock(&kmxlnd_data.kmx_global_lock);
- }
-
- if (ctx->mxc_seg_list != NULL) {
- LASSERT(ctx->mxc_nseg > 0);
- MXLND_FREE(ctx->mxc_seg_list, ctx->mxc_nseg * sizeof(mx_ksegment_t));
- }
-
- MXLND_FREE (ctx, sizeof (*ctx));
- return;
-}
-
-/**
- * mxlnd_ctx_alloc - allocate and initialize a new ctx struct
- * @ctxp - address of a kmx_ctx pointer
- *
- * Returns 0 on success and -EINVAL, -ENOMEM on failure
- */
-int
-mxlnd_ctx_alloc(struct kmx_ctx **ctxp, enum kmx_req_type type)
-{
- int ret = 0;
- struct kmx_ctx *ctx = NULL;
-
- if (ctxp == NULL) return -EINVAL;
-
- MXLND_ALLOC(ctx, sizeof (*ctx));
- if (ctx == NULL) {
- CDEBUG(D_NETERROR, "Cannot allocate ctx\n");
- return -ENOMEM;
- }
- memset(ctx, 0, sizeof(*ctx));
- spin_lock_init(&ctx->mxc_lock);
-
- ctx->mxc_type = type;
- ctx->mxc_page = alloc_page (GFP_KERNEL);
- if (ctx->mxc_page == NULL) {
- CDEBUG(D_NETERROR, "Can't allocate page\n");
- ret = -ENOMEM;
- goto failed;
- }
- spin_lock(&kmxlnd_data.kmx_global_lock);
- kmxlnd_data.kmx_mem_used += MXLND_EAGER_SIZE;
- spin_unlock(&kmxlnd_data.kmx_global_lock);
- ctx->mxc_msg = (struct kmx_msg *)((char *)page_address(ctx->mxc_page));
- ctx->mxc_seg.segment_ptr = MX_PA_TO_U64(lnet_page2phys(ctx->mxc_page));
- ctx->mxc_state = MXLND_CTX_IDLE;
-
- *ctxp = ctx;
- return 0;
-
-failed:
- mxlnd_ctx_free(ctx);
- return ret;
-}
-
-/**
- * mxlnd_ctx_init - reset ctx struct to the default values
- * @ctx - a kmx_ctx pointer
- */
-void
-mxlnd_ctx_init(struct kmx_ctx *ctx)
-{
- if (ctx == NULL) return;
-
- /* do not change mxc_type */
- ctx->mxc_incarnation = 0;
- ctx->mxc_deadline = 0;
- ctx->mxc_state = MXLND_CTX_IDLE;
- /* ignore mxc_global_list */
- if (ctx->mxc_list.next != NULL && !list_empty(&ctx->mxc_list)) {
- if (ctx->mxc_peer != NULL) spin_lock(&ctx->mxc_lock);
- list_del_init(&ctx->mxc_list);
- if (ctx->mxc_peer != NULL) spin_unlock(&ctx->mxc_lock);
- }
- /* ignore mxc_rx_list */
- /* ignore mxc_lock */
- ctx->mxc_nid = 0;
- ctx->mxc_peer = NULL;
- ctx->mxc_conn = NULL;
- /* ignore mxc_msg */
- /* ignore mxc_page */
- ctx->mxc_lntmsg[0] = NULL;
- ctx->mxc_lntmsg[1] = NULL;
- ctx->mxc_msg_type = 0;
- ctx->mxc_cookie = 0LL;
- ctx->mxc_match = 0LL;
- /* ctx->mxc_seg.segment_ptr points to mxc_page */
- ctx->mxc_seg.segment_length = 0;
- if (ctx->mxc_seg_list != NULL) {
- LASSERT(ctx->mxc_nseg > 0);
- MXLND_FREE(ctx->mxc_seg_list, ctx->mxc_nseg * sizeof(mx_ksegment_t));
- }
- ctx->mxc_seg_list = NULL;
- ctx->mxc_nseg = 0;
- ctx->mxc_nob = 0;
- ctx->mxc_mxreq = NULL;
- memset(&ctx->mxc_status, 0, sizeof(mx_status_t));
- /* ctx->mxc_get */
- /* ctx->mxc_put */
-
- ctx->mxc_msg->mxm_type = 0;
- ctx->mxc_msg->mxm_credits = 0;
- ctx->mxc_msg->mxm_nob = 0;
- ctx->mxc_msg->mxm_seq = 0;
-
- return;
-}
-
-/**
- * mxlnd_free_txs - free kmx_txs and associated pages
- *
- * Called from mxlnd_shutdown()
- */
-void
-mxlnd_free_txs(void)
-{
- struct kmx_ctx *tx = NULL;
- struct kmx_ctx *next = NULL;
-
- list_for_each_entry_safe(tx, next, &kmxlnd_data.kmx_txs, mxc_global_list) {
- list_del_init(&tx->mxc_global_list);
- mxlnd_ctx_free(tx);
- }
- return;
-}
-
-/**
- * mxlnd_init_txs - allocate tx descriptors then stash on txs and idle tx lists
- *
- * Called from mxlnd_startup()
- * returns 0 on success, else -ENOMEM
- */
-int
-mxlnd_init_txs(void)
-{
- int ret = 0;
- int i = 0;
- struct kmx_ctx *tx = NULL;
-
- for (i = 0; i < *kmxlnd_tunables.kmx_ntx; i++) {
- ret = mxlnd_ctx_alloc(&tx, MXLND_REQ_TX);
- if (ret != 0) {
- mxlnd_free_txs();
- return ret;
- }
- mxlnd_ctx_init(tx);
- /* in startup(), no locks required */
- list_add_tail(&tx->mxc_global_list, &kmxlnd_data.kmx_txs);
- list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_idle);
- }
- return 0;
-}
-
-/**
- * mxlnd_free_rxs - free initial kmx_rx descriptors and associated pages
- *
- * Called from mxlnd_shutdown()
- */
-void
-mxlnd_free_rxs(void)
-{
- struct kmx_ctx *rx = NULL;
- struct kmx_ctx *next = NULL;
-
- list_for_each_entry_safe(rx, next, &kmxlnd_data.kmx_rxs, mxc_global_list) {
- list_del_init(&rx->mxc_global_list);
- mxlnd_ctx_free(rx);
- }
- return;
-}
-
-/**
- * mxlnd_init_rxs - allocate initial rx descriptors
- *
- * Called from startup(). We create MXLND_MAX_PEERS plus MXLND_NTX
- * rx descriptors. We create one for each potential peer to handle
- * the initial connect request. We create on for each tx in case the
- * send requires a non-eager receive.
- *
- * Returns 0 on success, else -ENOMEM
- */
-int
-mxlnd_init_rxs(void)
-{
- int ret = 0;
- int i = 0;
- struct kmx_ctx *rx = NULL;
-
- for (i = 0; i < (*kmxlnd_tunables.kmx_ntx + *kmxlnd_tunables.kmx_max_peers); i++) {
- ret = mxlnd_ctx_alloc(&rx, MXLND_REQ_RX);
- if (ret != 0) {
- mxlnd_free_rxs();
- return ret;
- }
- mxlnd_ctx_init(rx);
- /* in startup(), no locks required */
- list_add_tail(&rx->mxc_global_list, &kmxlnd_data.kmx_rxs);
- list_add_tail(&rx->mxc_list, &kmxlnd_data.kmx_rx_idle);
- }
- return 0;
-}
-
-/**
- * mxlnd_free_peers - free peers
- *
- * Called from mxlnd_shutdown()
- */
-void
-mxlnd_free_peers(void)
-{
- int i = 0;
- struct kmx_peer *peer = NULL;
- struct kmx_peer *next = NULL;
-
- for (i = 0; i < MXLND_HASH_SIZE; i++) {
- list_for_each_entry_safe(peer, next, &kmxlnd_data.kmx_peers[i], mxp_peers) {
- list_del_init(&peer->mxp_peers);
- if (peer->mxp_conn) mxlnd_conn_decref(peer->mxp_conn);
- mxlnd_peer_decref(peer);
- }
- }
-}
-
-int
-mxlnd_host_alloc(struct kmx_host **hostp)
-{
- struct kmx_host *host = NULL;
-
- MXLND_ALLOC(host, sizeof (*host));
- if (host == NULL) {
- CDEBUG(D_NETERROR, "Cannot allocate host\n");
- return -1;
- }
- memset(host, 0, sizeof(*host));
- spin_lock_init(&host->mxh_lock);
-
- *hostp = host;
-
- return 0;
-}
-
-void
-mxlnd_host_free(struct kmx_host *host)
-{
- if (host == NULL) return;
-
- if (host->mxh_hostname != NULL)
- MXLND_FREE(host->mxh_hostname, strlen(host->mxh_hostname) + 1);
-
- MXLND_FREE(host, sizeof(*host));
- return;
-}
-
-/**
- * mxlnd_free_hosts - free kmx_hosts
- *
- * Called from mxlnd_shutdown()
- */
-void
-mxlnd_free_hosts(void)
-{
- struct kmx_host *host = NULL;
- struct kmx_host *next = NULL;
-
- list_for_each_entry_safe(host, next, &kmxlnd_data.kmx_hosts, mxh_list) {
- list_del_init(&host->mxh_list);
- mxlnd_host_free(host);
- }
- return;
-}
-
-#define xstr(s) #s
-#define str(s) xstr(s)
-#define MXLND_MAX_BOARD 4 /* we expect hosts to have fewer NICs than this */
-#define MXLND_MAX_EP_ID 16 /* we expect hosts to have less than this endpoints */
-
-/* this parses a line that consists of:
- *
- * IP HOSTNAME BOARD ENDPOINT ID
- * 169.192.0.113 mds01 0 3
- *
- * By default MX uses the alias (short hostname). If you override
- * it using mx_hostname to use the FQDN or some other name, the hostname
- * here must match exactly.
- */
-
-/* MX_MAX_HOSTNAME_LEN = 80. See myriexpress.h */
-int
-mxlnd_parse_line(char *line)
-{
- int i = 0;
- int ret = 0;
- int len = 0;
- u32 ip[4] = { 0, 0, 0, 0 };
- char hostname[MX_MAX_HOSTNAME_LEN];
- u32 board = -1;
- u32 ep_id = -1;
- struct kmx_host *host = NULL;
-
- if (line == NULL) return -1;
-
- len = strlen(line);
-
- if (len == 0) return -1;
-
- /* convert tabs to spaces */
- for (i = 0; i < len; i++) {
- if (line[i] == '\t') line[i] = ' ';
- }
-
- memset(&hostname, 0 , sizeof(hostname));
- ret = sscanf(line, "%d.%d.%d.%d %" str(MX_MAX_HOSTNAME_LEN) "s %d %d",
- &ip[0], &ip[1], &ip[2], &ip[3], hostname, &board, &ep_id);
-
- if (ret != 7) {
- return -1;
- }
-
- /* check for valid values */
- /* we assume a valid IP address (all <= 255), number of NICs,
- * and number of endpoint IDs */
- if (ip[0] > 255 || ip [1] > 255 || ip[2] > 255 || ip[3] > 255 ||
- board > MXLND_MAX_BOARD || ep_id > MXLND_MAX_EP_ID) {
- CDEBUG(D_NETERROR, "Illegal value in \"%s\". Ignoring "
- "this host.\n", line);
- return -1;
- }
-
- ret = mxlnd_host_alloc(&host);
- if (ret != 0) return -1;
-
- host->mxh_addr = ((ip[0]<<24)|(ip[1]<<16)|(ip[2]<<8)|ip[3]);
- len = strlen(hostname);
- MXLND_ALLOC(host->mxh_hostname, len + 1);
- if (host->mxh_hostname == NULL) {
- mxlnd_host_free(host);
- return -ENOMEM;
- }
- memset(host->mxh_hostname, 0, len + 1);
- strncpy(host->mxh_hostname, hostname, len);
- host->mxh_board = board;
- host->mxh_ep_id = ep_id;
-
- spin_lock(&kmxlnd_data.kmx_hosts_lock);
- list_add_tail(&host->mxh_list, &kmxlnd_data.kmx_hosts);
- spin_unlock(&kmxlnd_data.kmx_hosts_lock);
-
- return 0;
-}
-
-void
-mxlnd_print_hosts(void)
-{
-#if MXLND_DEBUG
- struct kmx_host *host = NULL;
-
- list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) {
- int ip[4];
- u32 addr = host->mxh_addr;
-
- ip[0] = (addr >> 24) & 0xff;
- ip[1] = (addr >> 16) & 0xff;
- ip[2] = (addr >> 8) & 0xff;
- ip[3] = addr & 0xff;
- CDEBUG(D_NET, "\tip= %d.%d.%d.%d\n\thost= %s\n\tboard= %d\n\tep_id= %d\n\n",
- ip[0], ip[1], ip[2], ip[3],
- host->mxh_hostname, host->mxh_board, host->mxh_ep_id);
- }
-#endif
- return;
-}
-
-#define MXLND_BUFSIZE (PAGE_SIZE - 1)
-
-int
-mxlnd_parse_hosts(char *filename)
-{
- int ret = 0;
- s32 size = 0;
- s32 bufsize = MXLND_BUFSIZE;
- s32 allocd = 0;
- loff_t offset = 0;
- struct file *filp = NULL;
- struct inode *inode = NULL;
- char *buf = NULL;
- s32 buf_off = 0;
- char *sep = NULL;
- char *line = NULL;
-
- if (filename == NULL) return -1;
-
- filp = filp_open(filename, O_RDONLY, 0);
- if (IS_ERR(filp)) {
- CERROR("filp_open() failed for %s\n", filename);
- return -1;
- }
-
- inode = filp->f_dentry->d_inode;
- if (!S_ISREG(inode->i_mode)) {
- CERROR("%s is not a regular file\n", filename);
- return -1;
- }
-
- size = (s32) inode->i_size;
- if (size < MXLND_BUFSIZE) bufsize = size;
- allocd = bufsize;
- MXLND_ALLOC(buf, allocd + 1);
- if (buf == NULL) {
- CERROR("Cannot allocate buf\n");
- filp_close(filp, current->files);
- return -1;
- }
-
- while (offset < size) {
- memset(buf, 0, bufsize + 1);
- ret = kernel_read(filp, (unsigned long) offset, buf, (unsigned long) bufsize);
- if (ret < 0) {
- CDEBUG(D_NETERROR, "kernel_read() returned %d - closing %s\n", ret, filename);
- filp_close(filp, current->files);
- MXLND_FREE(buf, allocd + 1);
- return -1;
- }
-
- if (ret < bufsize) bufsize = ret;
- buf_off = 0;
- while (buf_off < bufsize) {
- sep = strchr(buf + buf_off, '\n');
- if (sep != NULL) {
- /* we have a line */
- line = buf + buf_off;
- *sep = '\0';
- ret = mxlnd_parse_line(line);
- if (ret != 0 && strlen(line) != 0) {
- CDEBUG(D_NETERROR, "Failed to parse \"%s\". Ignoring this host.\n", line);
- }
- buf_off += strlen(line) + 1;
- } else {
- /* last line or we need to read more */
- line = buf + buf_off;
- ret = mxlnd_parse_line(line);
- if (ret != 0) {
- bufsize -= strlen(line) + 1;
- }
- buf_off += strlen(line) + 1;
- }
- }
- offset += bufsize;
- bufsize = MXLND_BUFSIZE;
- }
-
- MXLND_FREE(buf, allocd + 1);
- filp_close(filp, current->files);
- mxlnd_print_hosts();
-
- return 0;
-}
-
-/**
- * mxlnd_init_mx - open the endpoint, set out ID, register the EAGER callback
- * @ni - the network interface
- *
- * Returns 0 on success, -1 on failure
- */
-int
-mxlnd_init_mx(lnet_ni_t *ni)
-{
- int ret = 0;
- int found = 0;
- mx_return_t mxret;
- mx_endpoint_addr_t addr;
- u32 board = *kmxlnd_tunables.kmx_board;
- u32 ep_id = *kmxlnd_tunables.kmx_ep_id;
- u64 nic_id = 0LL;
- struct kmx_host *host = NULL;
-
- mxret = mx_init();
- if (mxret != MX_SUCCESS) {
- CERROR("mx_init() failed with %s (%d)\n", mx_strerror(mxret), mxret);
- return -1;
- }
-
- ret = mxlnd_parse_hosts(*kmxlnd_tunables.kmx_hosts);
- if (ret != 0) {
- if (*kmxlnd_tunables.kmx_hosts != NULL) {
- CERROR("mxlnd_parse_hosts(%s) failed\n", *kmxlnd_tunables.kmx_hosts);
- }
- mx_finalize();
- return -1;
- }
-
- list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) {
- if (strcmp(host->mxh_hostname, system_utsname.nodename) == 0) {
- /* override the defaults and module parameters with
- * the info from the hosts file */
- board = host->mxh_board;
- ep_id = host->mxh_ep_id;
- kmxlnd_data.kmx_localhost = host;
- CDEBUG(D_NET, "my hostname is %s board %d ep_id %d\n", kmxlnd_data.kmx_localhost->mxh_hostname, kmxlnd_data.kmx_localhost->mxh_board, kmxlnd_data.kmx_localhost->mxh_ep_id);
- found = 1;
- break;
- }
- }
-
- if (found == 0) {
- CERROR("no host entry found for localhost\n");
- mx_finalize();
- return -1;
- }
-
- mxret = mx_open_endpoint(board, ep_id, MXLND_MSG_MAGIC,
- NULL, 0, &kmxlnd_data.kmx_endpt);
- if (mxret != MX_SUCCESS) {
- CERROR("mx_open_endpoint() failed with %d\n", mxret);
- mx_finalize();
- return -1;
- }
-
- mx_get_endpoint_addr(kmxlnd_data.kmx_endpt, &addr);
- mx_decompose_endpoint_addr(addr, &nic_id, &ep_id);
-
- LASSERT(host != NULL);
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), host->mxh_addr);
-
- CDEBUG(D_NET, "My NID is 0x%llx\n", ni->ni_nid);
-
- /* this will catch all unexpected receives. */
- mxret = mx_register_unexp_handler(kmxlnd_data.kmx_endpt,
- (mx_unexp_handler_t) mxlnd_unexpected_recv,
- NULL);
- if (mxret != MX_SUCCESS) {
- CERROR("mx_register_unexp_callback() failed with %s\n",
- mx_strerror(mxret));
- mx_close_endpoint(kmxlnd_data.kmx_endpt);
- mx_finalize();
- return -1;
- }
- mxret = mx_set_request_timeout(kmxlnd_data.kmx_endpt, NULL, MXLND_COMM_TIMEOUT/HZ*1000);
- if (mxret != MX_SUCCESS) {
- CERROR("mx_set_request_timeout() failed with %s\n",
- mx_strerror(mxret));
- mx_close_endpoint(kmxlnd_data.kmx_endpt);
- mx_finalize();
- return -1;
- }
- return 0;
-}
-
-
-/**
- * mxlnd_thread_start - spawn a kernel thread with this function
- * @fn - function pointer
- * @arg - pointer to the parameter data
- *
- * Returns 0 on success and a negative value on failure
- */
-int
-mxlnd_thread_start(int (*fn)(void *arg), void *arg)
-{
- int pid = 0;
- int i = (int) ((long) arg);
-
- atomic_inc(&kmxlnd_data.kmx_nthreads);
- init_completion(&kmxlnd_data.kmx_completions[i]);
-
- pid = kernel_thread (fn, arg, 0);
- if (pid < 0) {
- CERROR("kernel_thread() failed with %d\n", pid);
- atomic_dec(&kmxlnd_data.kmx_nthreads);
- }
- return pid;
-}
-
-/**
- * mxlnd_thread_stop - decrement thread counter
- *
- * The thread returns 0 when it detects shutdown.
- * We are simply decrementing the thread counter.
- */
-void
-mxlnd_thread_stop(long id)
-{
- int i = (int) id;
- atomic_dec (&kmxlnd_data.kmx_nthreads);
- complete(&kmxlnd_data.kmx_completions[i]);
-}
-
-/**
- * mxlnd_shutdown - stop IO, clean up state
- * @ni - LNET interface handle
- *
- * No calls to the LND should be made after calling this function.
- */
-void
-mxlnd_shutdown (lnet_ni_t *ni)
-{
- int i = 0;
- int nthreads = 2 + *kmxlnd_tunables.kmx_n_waitd;
-
- LASSERT (ni == kmxlnd_data.kmx_ni);
- LASSERT (ni->ni_data == &kmxlnd_data);
- CDEBUG(D_NET, "in shutdown()\n");
-
- CDEBUG(D_MALLOC, "before MXLND cleanup: libcfs_kmemory %d "
- "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory),
- kmxlnd_data.kmx_mem_used);
-
- switch (kmxlnd_data.kmx_init) {
-
- case MXLND_INIT_ALL:
-
- CDEBUG(D_NET, "setting shutdown = 1\n");
- /* set shutdown and wakeup request_waitds */
- kmxlnd_data.kmx_shutdown = 1;
- mb();
- mx_wakeup(kmxlnd_data.kmx_endpt);
- up(&kmxlnd_data.kmx_tx_queue_sem);
- mxlnd_sleep(2 * HZ);
-
- /* fall through */
-
- case MXLND_INIT_THREADS:
-
- CDEBUG(D_NET, "waiting on threads\n");
- /* wait for threads to complete */
- for (i = 0; i < nthreads; i++) {
- wait_for_completion(&kmxlnd_data.kmx_completions[i]);
- }
- LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
-
- CDEBUG(D_NET, "freeing completions\n");
- MXLND_FREE(kmxlnd_data.kmx_completions,
- MXLND_NCOMPLETIONS * sizeof(struct completion));
-
- /* fall through */
-
- case MXLND_INIT_MX:
-
- CDEBUG(D_NET, "stopping mx\n");
-
- /* wakeup waiters if they missed the above.
- * close endpoint to stop all traffic.
- * this will cancel and cleanup all requests, etc. */
-
- mx_wakeup(kmxlnd_data.kmx_endpt);
- mx_close_endpoint(kmxlnd_data.kmx_endpt);
- mx_finalize();
-
- CDEBUG(D_NET, "mxlnd_free_hosts();\n");
- mxlnd_free_hosts();
-
- /* fall through */
-
- case MXLND_INIT_RXS:
-
- CDEBUG(D_NET, "freeing rxs\n");
-
- /* free all rxs and associated pages */
- mxlnd_free_rxs();
-
- /* fall through */
-
- case MXLND_INIT_TXS:
-
- CDEBUG(D_NET, "freeing txs\n");
-
- /* free all txs and associated pages */
- mxlnd_free_txs();
-
- /* fall through */
-
- case MXLND_INIT_DATA:
-
- CDEBUG(D_NET, "freeing peers\n");
-
- /* free peer list */
- mxlnd_free_peers();
-
- /* fall through */
-
- case MXLND_INIT_NOTHING:
- break;
- }
- CDEBUG(D_NET, "shutdown complete\n");
-
- CDEBUG(D_MALLOC, "after MXLND cleanup: libcfs_kmemory %d "
- "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory),
- kmxlnd_data.kmx_mem_used);
-
- kmxlnd_data.kmx_init = MXLND_INIT_NOTHING;
- PORTAL_MODULE_UNUSE;
- return;
-}
-
-/**
- * mxlnd_startup - initialize state, open an endpoint, start IO
- * @ni - LNET interface handle
- *
- * Initialize state, open an endpoint, start monitoring threads.
- * Should only be called once.
- */
-int
-mxlnd_startup (lnet_ni_t *ni)
-{
- int i = 0;
- int ret = 0;
- int nthreads = 2; /* for timeoutd and tx_queued */
- struct timeval tv;
-
- LASSERT (ni->ni_lnd == &the_kmxlnd);
-
- if (kmxlnd_data.kmx_init != MXLND_INIT_NOTHING) {
- CERROR("Only 1 instance supported\n");
- return -EPERM;
- }
- CDEBUG(D_MALLOC, "before MXLND startup: libcfs_kmemory %d "
- "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory),
- kmxlnd_data.kmx_mem_used);
-
- /* reserve 1/2 of tx for connect request messages */
- ni->ni_maxtxcredits = *kmxlnd_tunables.kmx_ntx / 2;
- ni->ni_peertxcredits = *kmxlnd_tunables.kmx_credits;
- if (ni->ni_maxtxcredits < ni->ni_peertxcredits)
- ni->ni_maxtxcredits = ni->ni_peertxcredits;
-
- PORTAL_MODULE_USE;
- memset (&kmxlnd_data, 0, sizeof (kmxlnd_data));
-
- kmxlnd_data.kmx_ni = ni;
- ni->ni_data = &kmxlnd_data;
-
- do_gettimeofday(&tv);
- kmxlnd_data.kmx_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
- CDEBUG(D_NET, "my incarnation is %lld\n", kmxlnd_data.kmx_incarnation);
-
- spin_lock_init (&kmxlnd_data.kmx_global_lock);
-
- INIT_LIST_HEAD (&kmxlnd_data.kmx_conn_req);
- spin_lock_init (&kmxlnd_data.kmx_conn_lock);
- sema_init(&kmxlnd_data.kmx_conn_sem, 0);
-
- INIT_LIST_HEAD (&kmxlnd_data.kmx_hosts);
- spin_lock_init (&kmxlnd_data.kmx_hosts_lock);
-
- for (i = 0; i < MXLND_HASH_SIZE; i++) {
- INIT_LIST_HEAD (&kmxlnd_data.kmx_peers[i]);
- }
- rwlock_init (&kmxlnd_data.kmx_peers_lock);
-
- INIT_LIST_HEAD (&kmxlnd_data.kmx_txs);
- INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_idle);
- spin_lock_init (&kmxlnd_data.kmx_tx_idle_lock);
- kmxlnd_data.kmx_tx_next_cookie = 1;
- INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_queue);
- spin_lock_init (&kmxlnd_data.kmx_tx_queue_lock);
- sema_init(&kmxlnd_data.kmx_tx_queue_sem, 0);
-
- INIT_LIST_HEAD (&kmxlnd_data.kmx_rxs);
- spin_lock_init (&kmxlnd_data.kmx_rxs_lock);
- INIT_LIST_HEAD (&kmxlnd_data.kmx_rx_idle);
- spin_lock_init (&kmxlnd_data.kmx_rx_idle_lock);
-
- kmxlnd_data.kmx_init = MXLND_INIT_DATA;
- /*****************************************************/
-
- ret = mxlnd_init_txs();
- if (ret != 0) {
- CERROR("Can't alloc tx descs: %d\n", ret);
- goto failed;
- }
- kmxlnd_data.kmx_init = MXLND_INIT_TXS;
- /*****************************************************/
-
- ret = mxlnd_init_rxs();
- if (ret != 0) {
- CERROR("Can't alloc rx descs: %d\n", ret);
- goto failed;
- }
- kmxlnd_data.kmx_init = MXLND_INIT_RXS;
- /*****************************************************/
-
- ret = mxlnd_init_mx(ni);
- if (ret != 0) {
- CERROR("Can't init mx\n");
- goto failed;
- }
-
- kmxlnd_data.kmx_init = MXLND_INIT_MX;
- /*****************************************************/
-
- /* start threads */
-
- nthreads += *kmxlnd_tunables.kmx_n_waitd;
- MXLND_ALLOC (kmxlnd_data.kmx_completions,
- nthreads * sizeof(struct completion));
- if (kmxlnd_data.kmx_completions == NULL) {
- CERROR("failed to alloc kmxlnd_data.kmx_completions\n");
- goto failed;
- }
- memset(kmxlnd_data.kmx_completions, 0,
- nthreads * sizeof(struct completion));
-
- {
- CDEBUG(D_NET, "using %d %s in mx_wait_any()\n",
- *kmxlnd_tunables.kmx_n_waitd,
- *kmxlnd_tunables.kmx_n_waitd == 1 ? "thread" : "threads");
-
- for (i = 0; i < *kmxlnd_tunables.kmx_n_waitd; i++) {
- ret = mxlnd_thread_start(mxlnd_request_waitd, (void*)((long)i));
- if (ret < 0) {
- CERROR("Starting mxlnd_request_waitd[%d] failed with %d\n", i, ret);
- kmxlnd_data.kmx_shutdown = 1;
- mx_wakeup(kmxlnd_data.kmx_endpt);
- for (--i; i >= 0; i--) {
- wait_for_completion(&kmxlnd_data.kmx_completions[i]);
- }
- LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
- MXLND_FREE(kmxlnd_data.kmx_completions,
- MXLND_NCOMPLETIONS * sizeof(struct completion));
-
- goto failed;
- }
- }
- ret = mxlnd_thread_start(mxlnd_tx_queued, (void*)((long)i++));
- if (ret < 0) {
- CERROR("Starting mxlnd_tx_queued failed with %d\n", ret);
- kmxlnd_data.kmx_shutdown = 1;
- mx_wakeup(kmxlnd_data.kmx_endpt);
- for (--i; i >= 0; i--) {
- wait_for_completion(&kmxlnd_data.kmx_completions[i]);
- }
- LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
- MXLND_FREE(kmxlnd_data.kmx_completions,
- MXLND_NCOMPLETIONS * sizeof(struct completion));
- goto failed;
- }
- ret = mxlnd_thread_start(mxlnd_timeoutd, (void*)((long)i++));
- if (ret < 0) {
- CERROR("Starting mxlnd_timeoutd failed with %d\n", ret);
- kmxlnd_data.kmx_shutdown = 1;
- mx_wakeup(kmxlnd_data.kmx_endpt);
- up(&kmxlnd_data.kmx_tx_queue_sem);
- for (--i; i >= 0; i--) {
- wait_for_completion(&kmxlnd_data.kmx_completions[i]);
- }
- LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
- MXLND_FREE(kmxlnd_data.kmx_completions,
- MXLND_NCOMPLETIONS * sizeof(struct completion));
- goto failed;
- }
- }
-
- kmxlnd_data.kmx_init = MXLND_INIT_THREADS;
- /*****************************************************/
-
- kmxlnd_data.kmx_init = MXLND_INIT_ALL;
- CDEBUG(D_MALLOC, "startup complete (kmx_mem_used %ld)\n", kmxlnd_data.kmx_mem_used);
-
- return 0;
-failed:
- CERROR("mxlnd_startup failed\n");
- mxlnd_shutdown(ni);
- return (-ENETDOWN);
-}
-
-static int mxlnd_init(void)
-{
- lnet_register_lnd(&the_kmxlnd);
- return 0;
-}
-
-static void mxlnd_exit(void)
-{
- lnet_unregister_lnd(&the_kmxlnd);
- return;
-}
-
-module_init(mxlnd_init);
-module_exit(mxlnd_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Myricom, Inc. - help@myri.com");
-MODULE_DESCRIPTION("Kernel MyrinetExpress LND");
-MODULE_VERSION("0.5.0");
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Copyright (C) 2006 Myricom, Inc.
- * Author: Scott Atchley <atchley at myri.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-#define EXPORT_SYMTAB
-#endif
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h> /* module */
-#include <linux/kernel.h> /* module */
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-#include <linux/fs.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h> /* module */
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#include <linux/utsname.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-#include <linux/netdevice.h> /* these are needed for ARP */
-#include <linux/if_arp.h>
-#include <net/arp.h>
-#include <linux/inetdevice.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include "libcfs/kp30.h"
-#include "lnet/lnet.h"
-#include "lnet/lib-lnet.h"
-
-#define MX_KERNEL 1
-#include "mx_extensions.h"
-#include "myriexpress.h"
-
-#if LNET_MAX_IOV > MX_MAX_SEGMENTS
- #error LNET_MAX_IOV is greater then MX_MAX_SEGMENTS
-#endif
-
-/* Using MX's 64 match bits
- * We are using the match bits to specify message type and the cookie. The
- * highest four bits (60-63) are reserved for message type. Below we specify
- * the types. MXLND_MASK_ICON_REQ and MXLND_MASK_ICON_ACK are used for
- * mx_iconnect(). We reserve the remaining combinations for future use. The
- * next 8 bits (52-59) are reserved for returning a status code for failed
- * GET_DATA (payload) messages. The last 52 bits are used for cookies. That
- * should allow unique cookies for 4 KB messages at 10 Gbps line rate without
- * rollover for about 8 years. That should be enough. */
-
-/* constants */
-#define MXLND_MASK_ICON_REQ (0xBLL << 60) /* it is a mx_iconnect() completion */
-#define MXLND_MASK_CONN_REQ (0xCLL << 60) /* CONN_REQ msg */
-#define MXLND_MASK_ICON_ACK (0x9LL << 60) /* it is a mx_iconnect() completion */
-#define MXLND_MASK_CONN_ACK (0xALL << 60) /* CONN_ACK msg*/
-#define MXLND_MASK_EAGER (0xELL << 60) /* EAGER msg */
-#define MXLND_MASK_NOOP (0x1LL << 60) /* NOOP msg */
-#define MXLND_MASK_PUT_REQ (0x2LL << 60) /* PUT_REQ msg */
-#define MXLND_MASK_PUT_ACK (0x3LL << 60) /* PUT_ACK msg */
-#define MXLND_MASK_PUT_DATA (0x4LL << 60) /* PUT_DATA msg */
-#define MXLND_MASK_GET_REQ (0x5LL << 60) /* GET_REQ msg */
-#define MXLND_MASK_GET_DATA (0x6LL << 60) /* GET_DATA msg */
-//#define MXLND_MASK_NAK (0x7LL << 60) /* NAK msg */
-
-#define MXLND_MAX_COOKIE ((1LL << 52) - 1) /* when to roll-over the cookie value */
-#define MXLND_NCOMPLETIONS (MXLND_N_SCHED + 2) /* max threads for completion array */
-
-/* defaults for configurable parameters */
-#define MXLND_N_SCHED 1 /* # schedulers (mx_wait_any() threads) */
-#define MXLND_MX_BOARD 0 /* Use the first MX NIC if more than 1 avail */
-#define MXLND_MX_EP_ID 3 /* MX endpoint ID */
-#define MXLND_COMM_TIMEOUT (20 * HZ) /* timeout for send/recv (jiffies) */
-#define MXLND_WAIT_TIMEOUT HZ /* timeout for wait (jiffies) */
-#define MXLND_POLLING 1000 /* poll iterations before blocking */
-#define MXLND_MAX_PEERS 1024 /* number of nodes talking to me */
-#define MXLND_EAGER_NUM MXLND_MAX_PEERS /* number of pre-posted receives */
-#define MXLND_EAGER_SIZE PAGE_SIZE /* pre-posted eager message size */
-#define MXLND_MSG_QUEUE_DEPTH 8 /* msg queue depth */
-#define MXLND_CREDIT_HIGHWATER (MXLND_MSG_QUEUE_DEPTH - 2)
- /* when to send a noop to return credits */
-#define MXLND_NTX 256 /* # of kmx_tx - total sends in flight
- 1/2 are reserved for connect messages */
-
-#define MXLND_HASH_BITS 6 /* the number of bits to hash over */
-#define MXLND_HASH_SIZE (1<<MXLND_HASH_BITS)
- /* number of peer lists for lookup.
- we hash over the last N bits of
- the IP address converted to an int. */
-#define MXLND_HASH_MASK (MXLND_HASH_SIZE - 1)
- /* ensure we use only the last N bits */
-
-/* debugging features */
-#define MXLND_CKSUM 0 /* checksum kmx_msg_t */
-#define MXLND_DEBUG 0 /* turn on printk()s */
-
-extern inline void mxlnd_noop(char *s, ...);
-#if MXLND_DEBUG
- #define MXLND_PRINT printk
-#else
- #define MXLND_PRINT mxlnd_noop
-#endif
-
-/* provide wrappers around LIBCFS_ALLOC/FREE to keep MXLND specific
- * memory usage stats that include pages */
-
-#define MXLND_ALLOC(x, size) \
- do { \
- spin_lock(&kmxlnd_data.kmx_global_lock); \
- kmxlnd_data.kmx_mem_used += size; \
- spin_unlock(&kmxlnd_data.kmx_global_lock); \
- LIBCFS_ALLOC(x, size); \
- if (x == NULL) { \
- spin_lock(&kmxlnd_data.kmx_global_lock); \
- kmxlnd_data.kmx_mem_used -= size; \
- spin_unlock(&kmxlnd_data.kmx_global_lock); \
- } \
- } while (0)
-
-#define MXLND_FREE(x, size) \
- do { \
- spin_lock(&kmxlnd_data.kmx_global_lock); \
- kmxlnd_data.kmx_mem_used -= size; \
- spin_unlock(&kmxlnd_data.kmx_global_lock); \
- LIBCFS_FREE(x, size); \
- } while (0)
-
-
-typedef struct kmx_tunables {
- int *kmx_n_waitd; /* # completion threads */
- int *kmx_max_peers; /* max # of potential peers */
- int *kmx_cksum; /* checksum small msgs? */
- int *kmx_ntx; /* total # of tx (1/2 for LNET 1/2 for CONN_REQ */
- int *kmx_credits; /* concurrent sends to 1 peer */
- int *kmx_board; /* MX board (NIC) number */
- int *kmx_ep_id; /* MX endpoint number */
- int *kmx_polling; /* if 0, block. if > 0, poll this many
- iterations before blocking */
- char **kmx_hosts; /* Location of hosts file, if used */
-} kmx_tunables_t;
-
-/* structure to hold IP-to-hostname resolution data */
-struct kmx_host {
- struct kmx_peer *mxh_peer; /* pointer to matching peer */
- u32 mxh_addr; /* IP address as int */
- char *mxh_hostname; /* peer's hostname */
- u32 mxh_board; /* peer's board rank */
- u32 mxh_ep_id; /* peer's MX endpoint ID */
- struct list_head mxh_list; /* position on kmx_hosts */
- spinlock_t mxh_lock; /* lock */
-};
-
-/* global interface state */
-typedef struct kmx_data
-{
- int kmx_init; /* initialization state */
- int kmx_shutdown; /* shutting down? */
- atomic_t kmx_nthreads; /* number of threads */
- struct completion *kmx_completions; /* array of completion structs */
- lnet_ni_t *kmx_ni; /* the LND instance */
- u64 kmx_incarnation; /* my incarnation value - unused */
- long kmx_mem_used; /* memory used */
- struct kmx_host *kmx_localhost; /* pointer to my kmx_host info */
- mx_endpoint_t kmx_endpt; /* the MX endpoint */
-
- spinlock_t kmx_global_lock; /* global lock */
-
- struct list_head kmx_conn_req; /* list of connection requests */
- spinlock_t kmx_conn_lock; /* connection list lock */
- struct semaphore kmx_conn_sem; /* semaphore for connection request list */
-
- struct list_head kmx_hosts; /* host lookup info */
- spinlock_t kmx_hosts_lock; /* hosts list lock */
-
- struct list_head kmx_peers[MXLND_HASH_SIZE];
- /* list of all known peers */
- rwlock_t kmx_peers_lock; /* peer list rw lock */
- atomic_t kmx_npeers; /* number of peers */
-
- struct list_head kmx_txs; /* all tx descriptors */
- struct list_head kmx_tx_idle; /* list of idle tx */
- spinlock_t kmx_tx_idle_lock; /* lock for idle tx list */
- s32 kmx_tx_used; /* txs in use */
- u64 kmx_tx_next_cookie; /* unique id for tx */
- struct list_head kmx_tx_queue; /* generic send queue */
- spinlock_t kmx_tx_queue_lock; /* lock for generic sends */
- struct semaphore kmx_tx_queue_sem; /* semaphore for tx queue */
-
- struct list_head kmx_rxs; /* all rx descriptors */
- spinlock_t kmx_rxs_lock; /* lock for rxs list */
- struct list_head kmx_rx_idle; /* list of idle tx */
- spinlock_t kmx_rx_idle_lock; /* lock for idle rx list */
-} kmx_data_t;
-
-#define MXLND_INIT_NOTHING 0 /* in the beginning, there was nothing... */
-#define MXLND_INIT_DATA 1 /* main data structures created */
-#define MXLND_INIT_TXS 2 /* tx descriptors created */
-#define MXLND_INIT_RXS 3 /* initial rx descriptors created */
-#define MXLND_INIT_MX 4 /* initiate MX library, open endpoint, get NIC id */
-#define MXLND_INIT_THREADS 5 /* waitd, timeoutd, tx_queued threads */
-#define MXLND_INIT_ALL 6 /* startup completed */
-
-#include "mxlnd_wire.h"
-
-enum kmx_req_type {
- MXLND_REQ_TX = 0,
- MXLND_REQ_RX = 1,
-};
-
-/* The life cycle of a request */
-enum kmx_req_state {
- MXLND_CTX_INIT = 0, /* just created */
- MXLND_CTX_IDLE = 1, /* available for use */
- MXLND_CTX_PREP = 2, /* getting ready for send/recv */
- MXLND_CTX_PENDING = 3, /* mx_isend() or mx_irecv() called */
- MXLND_CTX_COMPLETED = 4, /* cleaning up after completion or timeout */
- MXLND_CTX_CANCELED = 5, /* timed out but still in ctx list */
-};
-
-/* Context Structure - generic tx/rx descriptor
- * It represents the context (or state) of each send or receive request.
- * In other LNDs, they have separate TX and RX descriptors and this replaces both.
- *
- * We will keep the these on the global kmx_rxs and kmx_txs lists for cleanup
- * during shutdown(). We will move them between the rx/tx idle lists and the
- * pending list which is monitored by mxlnd_timeoutd().
- */
-struct kmx_ctx {
- enum kmx_req_type mxc_type; /* TX or RX */
- u64 mxc_incarnation; /* store the peer's incarnation here
- to verify before changing flow
- control credits after completion */
- unsigned long mxc_deadline; /* request time out in absolute jiffies */
- enum kmx_req_state mxc_state; /* what is the state of the request? */
- struct list_head mxc_global_list; /* place on kmx_rxs or kmx_txs */
- struct list_head mxc_list; /* place on rx/tx idle list, tx q, peer tx */
- struct list_head mxc_rx_list; /* place on mxp_rx_posted list */
- spinlock_t mxc_lock; /* lock */
-
- lnet_nid_t mxc_nid; /* dst's NID if peer is not known */
- struct kmx_peer *mxc_peer; /* owning peer */
- struct kmx_conn *mxc_conn; /* owning conn */
- struct kmx_msg *mxc_msg; /* msg hdr mapped to mxc_page */
- struct page *mxc_page; /* buffer for eager msgs */
- lnet_msg_t *mxc_lntmsg[2]; /* lnet msgs to finalize */
-
- u8 mxc_msg_type; /* what type of message is this? */
- u64 mxc_cookie; /* completion cookie */
- u64 mxc_match; /* MX match info */
- mx_ksegment_t mxc_seg; /* local MX ksegment for non-DATA */
- mx_ksegment_t *mxc_seg_list; /* MX ksegment array for DATA */
- int mxc_nseg; /* number of segments */
- unsigned long mxc_pin_type; /* MX_PIN_KERNEL or MX_PIN_PHYSICAL */
- u32 mxc_nob; /* number of bytes sent/received */
- mx_request_t mxc_mxreq; /* MX request */
- mx_status_t mxc_status; /* MX status */
- s64 mxc_get; /* # of times returned from idle list */
- s64 mxc_put; /* # of times returned from idle list */
-};
-
-#define MXLND_CONN_DISCONNECT -2 /* conn is being destroyed - do not add txs */
-#define MXLND_CONN_FAIL -1 /* connect failed (bad handshake, unavail, etc.) */
-#define MXLND_CONN_INIT 0 /* in the beginning, there was nothing... */
-#define MXLND_CONN_REQ 1 /* a connection request message is needed */
-#define MXLND_CONN_ACK 2 /* a connection ack is needed */
-#define MXLND_CONN_WAIT 3 /* waiting for req or ack to complete */
-#define MXLND_CONN_READY 4 /* ready to send */
-
-/* connection state - queues for queued and pending msgs */
-struct kmx_conn
-{
- u64 mxk_incarnation; /* connections's incarnation value */
- atomic_t mxk_refcount; /* reference counting */
-
- struct kmx_peer *mxk_peer; /* owning peer */
- mx_endpoint_addr_t mxk_epa; /* peer's endpoint address */
-
- struct list_head mxk_list; /* for placing on mxp_conns */
- spinlock_t mxk_lock; /* lock */
- unsigned long mxk_timeout; /* expiration of oldest pending tx/rx */
- unsigned long mxk_last_tx; /* when last tx completed with success */
- unsigned long mxk_last_rx; /* when last rx completed */
-
- int mxk_credits; /* # of my credits for sending to peer */
- int mxk_outstanding; /* # of credits to return */
-
- int mxk_status; /* can we send messages? MXLND_CONN_* */
- struct list_head mxk_tx_credit_queue; /* send queue for peer */
- struct list_head mxk_tx_free_queue; /* send queue for peer */
- int mxk_ntx_msgs; /* # of msgs on tx queues */
- int mxk_ntx_data ; /* # of DATA on tx queues */
- int mxk_ntx_posted; /* # of tx msgs in flight */
- int mxk_data_posted; /* # of tx data payloads in flight */
-
- struct list_head mxk_pending; /* in flight rxs and txs */
-};
-
-/* peer state */
-struct kmx_peer
-{
- lnet_nid_t mxp_nid; /* peer's LNET NID */
- u64 mxp_incarnation; /* peer's incarnation value */
- atomic_t mxp_refcount; /* reference counts */
-
- struct kmx_host *mxp_host; /* peer lookup info */
- u64 mxp_nic_id; /* remote's MX nic_id for mx_connect() */
-
- struct list_head mxp_peers; /* for placing on kmx_peers */
- spinlock_t mxp_lock; /* lock */
-
- struct list_head mxp_conns; /* list of connections */
- struct kmx_conn *mxp_conn; /* current connection */
-
- unsigned long mxp_reconnect_time; /* when to retry connect */
- int mxp_incompatible; /* incorrect conn_req values */
-};
-
-extern kmx_data_t kmxlnd_data;
-extern kmx_tunables_t kmxlnd_tunables;
-
-/* required for the LNET API */
-int mxlnd_startup(lnet_ni_t *ni);
-void mxlnd_shutdown(lnet_ni_t *ni);
-int mxlnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int mxlnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int mxlnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-
-/* in mxlnd.c */
-extern void mxlnd_thread_stop(long id);
-extern int mxlnd_ctx_alloc(struct kmx_ctx **ctxp, enum kmx_req_type type);
-extern void mxlnd_ctx_free(struct kmx_ctx *ctx);
-extern void mxlnd_ctx_init(struct kmx_ctx *ctx);
-extern lnet_nid_t mxlnd_nic_id2nid(lnet_ni_t *ni, u64 nic_id);
-extern u64 mxlnd_nid2nic_id(lnet_nid_t nid);
-
-/* in mxlnd_cb.c */
-void mxlnd_eager_recv(void *context, uint64_t match_value, uint32_t length);
-extern mx_unexp_handler_action_t mxlnd_unexpected_recv(void *context,
- mx_endpoint_addr_t source, uint64_t match_value, uint32_t length,
- void *data_if_available);
-extern void mxlnd_peer_free(struct kmx_peer *peer);
-extern void mxlnd_conn_free(struct kmx_conn *conn);
-extern void mxlnd_sleep(unsigned long timeout);
-extern int mxlnd_tx_queued(void *arg);
-extern void mxlnd_handle_rx_completion(struct kmx_ctx *rx);
-extern int mxlnd_check_sends(struct kmx_peer *peer);
-extern int mxlnd_tx_peer_queued(void *arg);
-extern int mxlnd_request_waitd(void *arg);
-extern int mxlnd_unex_recvd(void *arg);
-extern int mxlnd_timeoutd(void *arg);
-extern int mxlnd_connd(void *arg);
-
-#define mxlnd_peer_addref(peer) \
-do { \
- LASSERT(peer != NULL); \
- LASSERT(atomic_read(&(peer)->mxp_refcount) > 0); \
- atomic_inc(&(peer)->mxp_refcount); \
-} while (0)
-
-
-#define mxlnd_peer_decref(peer) \
-do { \
- LASSERT(atomic_read(&(peer)->mxp_refcount) > 0); \
- if (atomic_dec_and_test(&(peer)->mxp_refcount)) \
- mxlnd_peer_free(peer); \
-} while (0)
-
-#define mxlnd_conn_addref(conn) \
-do { \
- LASSERT(conn != NULL); \
- LASSERT(atomic_read(&(conn)->mxk_refcount) > 0); \
- atomic_inc(&(conn)->mxk_refcount); \
-} while (0)
-
-
-#define mxlnd_conn_decref(conn) \
-do { \
- LASSERT(atomic_read(&(conn)->mxk_refcount) > 0); \
- if (atomic_dec_and_test(&(conn)->mxk_refcount)) \
- mxlnd_conn_free(conn); \
-} while (0)
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Copyright (C) 2006 Myricom, Inc.
- * Author: Myricom, Inc. <help at myri.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "mxlnd.h"
-
-inline void mxlnd_noop(char *s, ...)
-{
- return;
-}
-
-char *
-mxlnd_ctxstate_to_str(int mxc_state)
-{
- switch (mxc_state) {
- case MXLND_CTX_INIT:
- return "MXLND_CTX_INIT";
- case MXLND_CTX_IDLE:
- return "MXLND_CTX_IDLE";
- case MXLND_CTX_PREP:
- return "MXLND_CTX_PREP";
- case MXLND_CTX_PENDING:
- return "MXLND_CTX_PENDING";
- case MXLND_CTX_COMPLETED:
- return "MXLND_CTX_COMPLETED";
- case MXLND_CTX_CANCELED:
- return "MXLND_CTX_CANCELED";
- default:
- return "*unknown*";
- }
-}
-
-char *
-mxlnd_connstatus_to_str(int mxk_status)
-{
- switch (mxk_status) {
- case MXLND_CONN_READY:
- return "MXLND_CONN_READY";
- case MXLND_CONN_INIT:
- return "MXLND_CONN_INIT";
- case MXLND_CONN_REQ:
- return "MXLND_CONN_REQ";
- case MXLND_CONN_ACK:
- return "MXLND_CONN_ACK";
- case MXLND_CONN_WAIT:
- return "MXLND_CONN_WAIT";
- case MXLND_CONN_DISCONNECT:
- return "MXLND_CONN_DISCONNECT";
- case MXLND_CONN_FAIL:
- return "MXLND_CONN_FAIL";
- default:
- return "unknown";
- }
-}
-
-char *
-mxlnd_msgtype_to_str(int type) {
- switch (type) {
- case MXLND_MSG_EAGER:
- return "MXLND_MSG_EAGER";
- case MXLND_MSG_CONN_REQ:
- return "MXLND_MSG_CONN_REQ";
- case MXLND_MSG_CONN_ACK:
- return "MXLND_MSG_CONN_ACK";
- case MXLND_MSG_NOOP:
- return "MXLND_MSG_NOOP";
- case MXLND_MSG_PUT_REQ:
- return "MXLND_MSG_PUT_REQ";
- case MXLND_MSG_PUT_ACK:
- return "MXLND_MSG_PUT_ACK";
- case MXLND_MSG_PUT_DATA:
- return "MXLND_MSG_PUT_DATA";
- case MXLND_MSG_GET_REQ:
- return "MXLND_MSG_GET_REQ";
- case MXLND_MSG_GET_DATA:
- return "MXLND_MSG_GET_DATA";
- default:
- return "unknown";
- }
-}
-
-char *
-mxlnd_lnetmsg_to_str(int type)
-{
- switch (type) {
- case LNET_MSG_ACK:
- return "LNET_MSG_ACK";
- case LNET_MSG_PUT:
- return "LNET_MSG_PUT";
- case LNET_MSG_GET:
- return "LNET_MSG_GET";
- case LNET_MSG_REPLY:
- return "LNET_MSG_REPLY";
- case LNET_MSG_HELLO:
- return "LNET_MSG_HELLO";
- default:
- LBUG();
- return "*unknown*";
- }
-}
-
-static inline u64
-//mxlnd_create_match(u8 msg_type, u8 error, u64 cookie)
-mxlnd_create_match(struct kmx_ctx *ctx, u8 error)
-{
- u64 type = (u64) ctx->mxc_msg_type;
- u64 err = (u64) error;
- u64 match = 0LL;
-
- LASSERT(ctx->mxc_msg_type != 0);
- LASSERT(ctx->mxc_cookie >> 52 == 0);
- match = (type << 60) | (err << 52) | ctx->mxc_cookie;
- return match;
-}
-
-static inline void
-mxlnd_parse_match(u64 match, u8 *msg_type, u8 *error, u64 *cookie)
-{
- *msg_type = (u8) (match >> 60);
- *error = (u8) ((match >> 52) & 0xFF);
- *cookie = match & 0xFFFFFFFFFFFFFLL;
- LASSERT(match == (MXLND_MASK_ICON_REQ & 0xF000000000000000LL) ||
- match == (MXLND_MASK_ICON_ACK & 0xF000000000000000LL) ||
- *msg_type == MXLND_MSG_EAGER ||
- *msg_type == MXLND_MSG_CONN_REQ ||
- *msg_type == MXLND_MSG_CONN_ACK ||
- *msg_type == MXLND_MSG_NOOP ||
- *msg_type == MXLND_MSG_PUT_REQ ||
- *msg_type == MXLND_MSG_PUT_ACK ||
- *msg_type == MXLND_MSG_PUT_DATA ||
- *msg_type == MXLND_MSG_GET_REQ ||
- *msg_type == MXLND_MSG_GET_DATA);
- return;
-}
-
-struct kmx_ctx *
-mxlnd_get_idle_rx(void)
-{
- struct list_head *tmp = NULL;
- struct kmx_ctx *rx = NULL;
-
- spin_lock(&kmxlnd_data.kmx_rx_idle_lock);
-
- if (list_empty (&kmxlnd_data.kmx_rx_idle)) {
- spin_unlock(&kmxlnd_data.kmx_rx_idle_lock);
- return NULL;
- }
-
- tmp = &kmxlnd_data.kmx_rx_idle;
- rx = list_entry (tmp->next, struct kmx_ctx, mxc_list);
- list_del_init(&rx->mxc_list);
- spin_unlock(&kmxlnd_data.kmx_rx_idle_lock);
-
-#if MXLND_DEBUG
- if (rx->mxc_get != rx->mxc_put) {
- CDEBUG(D_NETERROR, "*** RX get (%lld) != put (%lld) ***\n", rx->mxc_get, rx->mxc_put);
- CDEBUG(D_NETERROR, "*** incarnation= %lld ***\n", rx->mxc_incarnation);
- CDEBUG(D_NETERROR, "*** deadline= %ld ***\n", rx->mxc_deadline);
- CDEBUG(D_NETERROR, "*** state= %s ***\n", mxlnd_ctxstate_to_str(rx->mxc_state));
- CDEBUG(D_NETERROR, "*** listed?= %d ***\n", !list_empty(&rx->mxc_list));
- CDEBUG(D_NETERROR, "*** nid= 0x%llx ***\n", rx->mxc_nid);
- CDEBUG(D_NETERROR, "*** peer= 0x%p ***\n", rx->mxc_peer);
- CDEBUG(D_NETERROR, "*** msg_type= %s ***\n", mxlnd_msgtype_to_str(rx->mxc_msg_type));
- CDEBUG(D_NETERROR, "*** cookie= 0x%llx ***\n", rx->mxc_cookie);
- CDEBUG(D_NETERROR, "*** nob= %d ***\n", rx->mxc_nob);
- }
-#endif
- LASSERT (rx->mxc_get == rx->mxc_put);
-
- rx->mxc_get++;
-
- LASSERT (rx->mxc_state == MXLND_CTX_IDLE);
- rx->mxc_state = MXLND_CTX_PREP;
-
- return rx;
-}
-
-int
-mxlnd_put_idle_rx(struct kmx_ctx *rx)
-{
- if (rx == NULL) {
- CDEBUG(D_NETERROR, "called with NULL pointer\n");
- return -EINVAL;
- } else if (rx->mxc_type != MXLND_REQ_RX) {
- CDEBUG(D_NETERROR, "called with tx\n");
- return -EINVAL;
- }
- LASSERT(rx->mxc_get == rx->mxc_put + 1);
- mxlnd_ctx_init(rx);
- rx->mxc_put++;
- spin_lock(&kmxlnd_data.kmx_rx_idle_lock);
- list_add_tail(&rx->mxc_list, &kmxlnd_data.kmx_rx_idle);
- spin_unlock(&kmxlnd_data.kmx_rx_idle_lock);
- return 0;
-}
-
-int
-mxlnd_reduce_idle_rxs(__u32 count)
-{
- __u32 i = 0;
- struct kmx_ctx *rx = NULL;
-
- spin_lock(&kmxlnd_data.kmx_rxs_lock);
- for (i = 0; i < count; i++) {
- rx = mxlnd_get_idle_rx();
- if (rx != NULL) {
- struct list_head *tmp = &rx->mxc_global_list;
- list_del_init(tmp);
- mxlnd_ctx_free(rx);
- } else {
- CDEBUG(D_NETERROR, "only reduced %d out of %d rxs\n", i, count);
- break;
- }
- }
- spin_unlock(&kmxlnd_data.kmx_rxs_lock);
- return 0;
-}
-
-struct kmx_ctx *
-mxlnd_get_idle_tx(void)
-{
- struct list_head *tmp = NULL;
- struct kmx_ctx *tx = NULL;
-
- spin_lock(&kmxlnd_data.kmx_tx_idle_lock);
-
- if (list_empty (&kmxlnd_data.kmx_tx_idle)) {
- CDEBUG(D_NETERROR, "%d txs in use\n", kmxlnd_data.kmx_tx_used);
- spin_unlock(&kmxlnd_data.kmx_tx_idle_lock);
- return NULL;
- }
-
- tmp = &kmxlnd_data.kmx_tx_idle;
- tx = list_entry (tmp->next, struct kmx_ctx, mxc_list);
- list_del_init(&tx->mxc_list);
-
- /* Allocate a new completion cookie. It might not be needed,
- * but we've got a lock right now and we're unlikely to
- * wrap... */
- tx->mxc_cookie = kmxlnd_data.kmx_tx_next_cookie++;
- if (kmxlnd_data.kmx_tx_next_cookie > MXLND_MAX_COOKIE) {
- kmxlnd_data.kmx_tx_next_cookie = 1;
- }
- kmxlnd_data.kmx_tx_used++;
- spin_unlock(&kmxlnd_data.kmx_tx_idle_lock);
-
- LASSERT (tx->mxc_get == tx->mxc_put);
-
- tx->mxc_get++;
-
- LASSERT (tx->mxc_state == MXLND_CTX_IDLE);
- LASSERT (tx->mxc_lntmsg[0] == NULL);
- LASSERT (tx->mxc_lntmsg[1] == NULL);
-
- tx->mxc_state = MXLND_CTX_PREP;
-
- return tx;
-}
-
-int
-mxlnd_put_idle_tx(struct kmx_ctx *tx)
-{
- //int failed = (tx->mxc_status.code != MX_STATUS_SUCCESS && tx->mxc_status.code != MX_STATUS_TRUNCATED);
- int result = 0;
- lnet_msg_t *lntmsg[2];
-
- if (tx == NULL) {
- CDEBUG(D_NETERROR, "called with NULL pointer\n");
- return -EINVAL;
- } else if (tx->mxc_type != MXLND_REQ_TX) {
- CDEBUG(D_NETERROR, "called with rx\n");
- return -EINVAL;
- }
- if (!(tx->mxc_status.code == MX_STATUS_SUCCESS ||
- tx->mxc_status.code == MX_STATUS_TRUNCATED))
- result = -EIO;
-
- lntmsg[0] = tx->mxc_lntmsg[0];
- lntmsg[1] = tx->mxc_lntmsg[1];
-
- LASSERT(tx->mxc_get == tx->mxc_put + 1);
- mxlnd_ctx_init(tx);
- tx->mxc_put++;
- spin_lock(&kmxlnd_data.kmx_tx_idle_lock);
- list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_idle);
- kmxlnd_data.kmx_tx_used--;
- spin_unlock(&kmxlnd_data.kmx_tx_idle_lock);
- if (lntmsg[0] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[0], result);
- if (lntmsg[1] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[1], result);
- return 0;
-}
-
-/**
- * mxlnd_conn_free - free the conn
- * @conn - a kmx_conn pointer
- *
- * The calling function should remove the conn from the conns list first
- * then destroy it.
- */
-void
-mxlnd_conn_free(struct kmx_conn *conn)
-{
- struct kmx_peer *peer = conn->mxk_peer;
-
- CDEBUG(D_NET, "freeing conn 0x%p *****\n", conn);
- LASSERT (list_empty (&conn->mxk_tx_credit_queue) &&
- list_empty (&conn->mxk_tx_free_queue) &&
- list_empty (&conn->mxk_pending));
- if (!list_empty(&conn->mxk_list)) {
- spin_lock(&peer->mxp_lock);
- list_del_init(&conn->mxk_list);
- if (peer->mxp_conn == conn) {
- peer->mxp_conn = NULL;
- if (!(conn->mxk_epa.stuff[0] == 0 && conn->mxk_epa.stuff[1] == 0)) {
- mx_set_endpoint_addr_context(conn->mxk_epa,
- (void *) NULL);
- }
- }
- spin_unlock(&peer->mxp_lock);
- }
- mxlnd_peer_decref(conn->mxk_peer); /* drop conn's ref to peer */
- MXLND_FREE (conn, sizeof (*conn));
- return;
-}
-
-
-void
-mxlnd_conn_cancel_pending_rxs(struct kmx_conn *conn)
-{
- int found = 0;
- struct kmx_ctx *ctx = NULL;
- struct kmx_ctx *next = NULL;
- mx_return_t mxret = MX_SUCCESS;
- u32 result = 0;
-
- do {
- found = 0;
- spin_lock(&conn->mxk_lock);
- list_for_each_entry_safe(ctx, next, &conn->mxk_pending, mxc_list) {
- /* we will delete all including txs */
- list_del_init(&ctx->mxc_list);
- if (ctx->mxc_type == MXLND_REQ_RX) {
- found = 1;
- mxret = mx_cancel(kmxlnd_data.kmx_endpt,
- &ctx->mxc_mxreq,
- &result);
- if (mxret != MX_SUCCESS) {
- CDEBUG(D_NETERROR, "mx_cancel() returned %s (%d)\n", mx_strerror(mxret), mxret);
- }
- if (result == 1) {
- ctx->mxc_status.code = -ECONNABORTED;
- ctx->mxc_state = MXLND_CTX_CANCELED;
- /* NOTE this calls lnet_finalize() and
- * we cannot hold any locks when calling it.
- * It also calls mxlnd_conn_decref(conn) */
- spin_unlock(&conn->mxk_lock);
- mxlnd_handle_rx_completion(ctx);
- spin_lock(&conn->mxk_lock);
- }
- break;
- }
- }
- spin_unlock(&conn->mxk_lock);
- }
- while (found);
-
- return;
-}
-
-/**
- * mxlnd_conn_disconnect - shutdown a connection
- * @conn - a kmx_conn pointer
- *
- * This function sets the status to DISCONNECT, completes queued
- * txs with failure, calls mx_disconnect, which will complete
- * pending txs and matched rxs with failure.
- */
-void
-mxlnd_conn_disconnect(struct kmx_conn *conn, int mx_dis, int notify)
-{
- struct list_head *tmp = NULL;
-
- spin_lock(&conn->mxk_lock);
- if (conn->mxk_status == MXLND_CONN_DISCONNECT) {
- spin_unlock(&conn->mxk_lock);
- return;
- }
- conn->mxk_status = MXLND_CONN_DISCONNECT;
- conn->mxk_timeout = 0;
-
- while (!list_empty(&conn->mxk_tx_free_queue) ||
- !list_empty(&conn->mxk_tx_credit_queue)) {
-
- struct kmx_ctx *tx = NULL;
-
- if (!list_empty(&conn->mxk_tx_free_queue)) {
- tmp = &conn->mxk_tx_free_queue;
- } else {
- tmp = &conn->mxk_tx_credit_queue;
- }
-
- tx = list_entry(tmp->next, struct kmx_ctx, mxc_list);
- list_del_init(&tx->mxc_list);
- tx->mxc_status.code = -ECONNABORTED;
- spin_unlock(&conn->mxk_lock);
- mxlnd_put_idle_tx(tx);
- mxlnd_conn_decref(conn); /* for this tx */
- spin_lock(&conn->mxk_lock);
- }
-
- spin_unlock(&conn->mxk_lock);
-
- /* cancel pending rxs */
- mxlnd_conn_cancel_pending_rxs(conn);
-
- if (kmxlnd_data.kmx_shutdown != 1) {
-
- if (mx_dis) mx_disconnect(kmxlnd_data.kmx_endpt, conn->mxk_epa);
-
- if (notify) {
- time_t last_alive = 0;
- unsigned long last_msg = 0;
-
- /* notify LNET that we are giving up on this peer */
- if (time_after(conn->mxk_last_rx, conn->mxk_last_tx)) {
- last_msg = conn->mxk_last_rx;
- } else {
- last_msg = conn->mxk_last_tx;
- }
- last_alive = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() - last_msg);
- lnet_notify(kmxlnd_data.kmx_ni, conn->mxk_peer->mxp_nid, 0, last_alive);
- }
- }
- mxlnd_conn_decref(conn); /* drop the owning peer's reference */
-
- return;
-}
-
-/**
- * mxlnd_conn_alloc - allocate and initialize a new conn struct
- * @connp - address of a kmx_conn pointer
- * @peer - owning kmx_peer
- *
- * Returns 0 on success and -ENOMEM on failure
- */
-int
-mxlnd_conn_alloc_locked(struct kmx_conn **connp, struct kmx_peer *peer)
-{
- struct kmx_conn *conn = NULL;
-
- LASSERT(peer != NULL);
-
- MXLND_ALLOC(conn, sizeof (*conn));
- if (conn == NULL) {
- CDEBUG(D_NETERROR, "Cannot allocate conn\n");
- return -ENOMEM;
- }
- CDEBUG(D_NET, "allocated conn 0x%p for peer 0x%p\n", conn, peer);
-
- memset(conn, 0, sizeof(*conn));
-
- /* conn->mxk_incarnation = 0 - will be set by peer */
- atomic_set(&conn->mxk_refcount, 2); /* ref for owning peer
- and one for the caller */
- conn->mxk_peer = peer;
- /* mxk_epa - to be set after mx_iconnect() */
- INIT_LIST_HEAD(&conn->mxk_list);
- spin_lock_init(&conn->mxk_lock);
- /* conn->mxk_timeout = 0 */
- conn->mxk_last_tx = jiffies;
- conn->mxk_last_rx = conn->mxk_last_tx;
- conn->mxk_credits = *kmxlnd_tunables.kmx_credits;
- /* mxk_outstanding = 0 */
- conn->mxk_status = MXLND_CONN_INIT;
- INIT_LIST_HEAD(&conn->mxk_tx_credit_queue);
- INIT_LIST_HEAD(&conn->mxk_tx_free_queue);
- /* conn->mxk_ntx_msgs = 0 */
- /* conn->mxk_ntx_data = 0 */
- /* conn->mxk_ntx_posted = 0 */
- /* conn->mxk_data_posted = 0 */
- INIT_LIST_HEAD(&conn->mxk_pending);
-
- *connp = conn;
-
- mxlnd_peer_addref(peer); /* add a ref for this conn */
-
- /* add to front of peer's conns list */
- list_add(&conn->mxk_list, &peer->mxp_conns);
- peer->mxp_conn = conn;
- return 0;
-}
-
-int
-mxlnd_conn_alloc(struct kmx_conn **connp, struct kmx_peer *peer)
-{
- int ret = 0;
- spin_lock(&peer->mxp_lock);
- ret = mxlnd_conn_alloc_locked(connp, peer);
- spin_unlock(&peer->mxp_lock);
- return ret;
-}
-
-int
-mxlnd_q_pending_ctx(struct kmx_ctx *ctx)
-{
- int ret = 0;
- struct kmx_conn *conn = ctx->mxc_conn;
-
- ctx->mxc_state = MXLND_CTX_PENDING;
- if (conn != NULL) {
- spin_lock(&conn->mxk_lock);
- if (conn->mxk_status >= MXLND_CONN_INIT) {
- list_add_tail(&ctx->mxc_list, &conn->mxk_pending);
- if (conn->mxk_timeout == 0 || ctx->mxc_deadline < conn->mxk_timeout) {
- conn->mxk_timeout = ctx->mxc_deadline;
- }
- } else {
- ctx->mxc_state = MXLND_CTX_COMPLETED;
- ret = -1;
- }
- spin_unlock(&conn->mxk_lock);
- }
- return ret;
-}
-
-int
-mxlnd_deq_pending_ctx(struct kmx_ctx *ctx)
-{
- LASSERT(ctx->mxc_state == MXLND_CTX_PENDING ||
- ctx->mxc_state == MXLND_CTX_COMPLETED);
- if (ctx->mxc_state != MXLND_CTX_PENDING &&
- ctx->mxc_state != MXLND_CTX_COMPLETED) {
- CDEBUG(D_NETERROR, "deq ctx->mxc_state = %s\n",
- mxlnd_ctxstate_to_str(ctx->mxc_state));
- }
- ctx->mxc_state = MXLND_CTX_COMPLETED;
- if (!list_empty(&ctx->mxc_list)) {
- struct kmx_conn *conn = ctx->mxc_conn;
- struct kmx_ctx *next = NULL;
- LASSERT(conn != NULL);
- spin_lock(&conn->mxk_lock);
- list_del_init(&ctx->mxc_list);
- conn->mxk_timeout = 0;
- if (!list_empty(&conn->mxk_pending)) {
- next = list_entry(conn->mxk_pending.next, struct kmx_ctx, mxc_list);
- conn->mxk_timeout = next->mxc_deadline;
- }
- spin_unlock(&conn->mxk_lock);
- }
- return 0;
-}
-
-/**
- * mxlnd_peer_free - free the peer
- * @peer - a kmx_peer pointer
- *
- * The calling function should decrement the rxs, drain the tx queues and
- * remove the peer from the peers list first then destroy it.
- */
-void
-mxlnd_peer_free(struct kmx_peer *peer)
-{
- CDEBUG(D_NET, "freeing peer 0x%p\n", peer);
-
- LASSERT (atomic_read(&peer->mxp_refcount) == 0);
-
- if (peer->mxp_host != NULL) {
- spin_lock(&peer->mxp_host->mxh_lock);
- peer->mxp_host->mxh_peer = NULL;
- spin_unlock(&peer->mxp_host->mxh_lock);
- }
- if (!list_empty(&peer->mxp_peers)) {
- /* assume we are locked */
- list_del_init(&peer->mxp_peers);
- }
-
- MXLND_FREE (peer, sizeof (*peer));
- atomic_dec(&kmxlnd_data.kmx_npeers);
- return;
-}
-
-void
-mxlnd_peer_hostname_to_nic_id(struct kmx_peer *peer)
-{
- u64 nic_id = 0LL;
- char name[MX_MAX_HOSTNAME_LEN + 1];
- mx_return_t mxret = MX_SUCCESS;
-
- memset(name, 0, sizeof(name));
- snprintf(name, sizeof(name), "%s:%d", peer->mxp_host->mxh_hostname, peer->mxp_host->mxh_board);
- mxret = mx_hostname_to_nic_id(name, &nic_id);
- if (mxret == MX_SUCCESS) {
- peer->mxp_nic_id = nic_id;
- } else {
- CDEBUG(D_NETERROR, "mx_hostname_to_nic_id() failed for %s "
- "with %s\n", name, mx_strerror(mxret));
- mxret = mx_hostname_to_nic_id(peer->mxp_host->mxh_hostname, &nic_id);
- if (mxret == MX_SUCCESS) {
- peer->mxp_nic_id = nic_id;
- } else {
- CDEBUG(D_NETERROR, "mx_hostname_to_nic_id() failed for %s "
- "with %s\n", peer->mxp_host->mxh_hostname,
- mx_strerror(mxret));
- }
- }
- return;
-}
-
-/**
- * mxlnd_peer_alloc - allocate and initialize a new peer struct
- * @peerp - address of a kmx_peer pointer
- * @nid - LNET node id
- *
- * Returns 0 on success and -ENOMEM on failure
- */
-int
-mxlnd_peer_alloc(struct kmx_peer **peerp, lnet_nid_t nid)
-{
- int i = 0;
- int ret = 0;
- u32 addr = LNET_NIDADDR(nid);
- struct kmx_peer *peer = NULL;
- struct kmx_host *host = NULL;
-
- LASSERT (nid != LNET_NID_ANY && nid != 0LL);
-
- MXLND_ALLOC(peer, sizeof (*peer));
- if (peer == NULL) {
- CDEBUG(D_NETERROR, "Cannot allocate peer for NID 0x%llx\n", nid);
- return -ENOMEM;
- }
- CDEBUG(D_NET, "allocated peer 0x%p for NID 0x%llx\n", peer, nid);
-
- memset(peer, 0, sizeof(*peer));
-
- list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) {
- if (addr == host->mxh_addr) {
- peer->mxp_host = host;
- spin_lock(&host->mxh_lock);
- host->mxh_peer = peer;
- spin_unlock(&host->mxh_lock);
- break;
- }
- }
- if (peer->mxp_host == NULL) {
- CDEBUG(D_NETERROR, "unknown host for NID 0x%llx\n", nid);
- MXLND_FREE(peer, sizeof(*peer));
- return -ENXIO;
- }
-
- peer->mxp_nid = nid;
- /* peer->mxp_incarnation */
- atomic_set(&peer->mxp_refcount, 1); /* ref for kmx_peers list */
- mxlnd_peer_hostname_to_nic_id(peer);
-
- INIT_LIST_HEAD(&peer->mxp_peers);
- spin_lock_init(&peer->mxp_lock);
- INIT_LIST_HEAD(&peer->mxp_conns);
- ret = mxlnd_conn_alloc(&peer->mxp_conn, peer); /* adds 2nd conn ref here... */
- if (ret != 0) {
- mxlnd_peer_decref(peer);
- return ret;
- }
-
- for (i = 0; i < *kmxlnd_tunables.kmx_credits - 1; i++) {
- struct kmx_ctx *rx = NULL;
- ret = mxlnd_ctx_alloc(&rx, MXLND_REQ_RX);
- if (ret != 0) {
- mxlnd_reduce_idle_rxs(i);
- mxlnd_conn_decref(peer->mxp_conn); /* drop peer's ref... */
- mxlnd_conn_decref(peer->mxp_conn); /* drop this function's ref */
- mxlnd_peer_decref(peer);
- return ret;
- }
- spin_lock(&kmxlnd_data.kmx_rxs_lock);
- list_add_tail(&rx->mxc_global_list, &kmxlnd_data.kmx_rxs);
- spin_unlock(&kmxlnd_data.kmx_rxs_lock);
- rx->mxc_put = -1;
- mxlnd_put_idle_rx(rx);
- }
- /* peer->mxp_reconnect_time = 0 */
- /* peer->mxp_incompatible = 0 */
-
- *peerp = peer;
- return 0;
-}
-
-/**
- * mxlnd_nid_to_hash - hash the nid
- * @nid - msg pointer
- *
- * Takes the u64 nid and XORs the lowest N bits by the next lowest N bits.
- */
-static inline int
-mxlnd_nid_to_hash(lnet_nid_t nid)
-{
- return (nid & MXLND_HASH_MASK) ^
- ((nid & (MXLND_HASH_MASK << MXLND_HASH_BITS)) >> MXLND_HASH_BITS);
-}
-
-static inline struct kmx_peer *
-mxlnd_find_peer_by_nid_locked(lnet_nid_t nid)
-{
- int found = 0;
- int hash = 0;
- struct kmx_peer *peer = NULL;
-
- hash = mxlnd_nid_to_hash(nid);
-
- list_for_each_entry(peer, &kmxlnd_data.kmx_peers[hash], mxp_peers) {
- if (peer->mxp_nid == nid) {
- found = 1;
- mxlnd_peer_addref(peer);
- break;
- }
- }
- return (found ? peer : NULL);
-}
-
-static inline struct kmx_peer *
-mxlnd_find_peer_by_nid(lnet_nid_t nid)
-{
- struct kmx_peer *peer = NULL;
-
- read_lock(&kmxlnd_data.kmx_peers_lock);
- peer = mxlnd_find_peer_by_nid_locked(nid);
- read_unlock(&kmxlnd_data.kmx_peers_lock);
- return peer;
-}
-
-static inline int
-mxlnd_tx_requires_credit(struct kmx_ctx *tx)
-{
- return (tx->mxc_msg_type == MXLND_MSG_EAGER ||
- tx->mxc_msg_type == MXLND_MSG_GET_REQ ||
- tx->mxc_msg_type == MXLND_MSG_PUT_REQ ||
- tx->mxc_msg_type == MXLND_MSG_NOOP);
-}
-
-/**
- * mxlnd_init_msg - set type and number of bytes
- * @msg - msg pointer
- * @type - of message
- * @body_nob - bytes in msg body
- */
-static inline void
-mxlnd_init_msg(kmx_msg_t *msg, u8 type, int body_nob)
-{
- msg->mxm_type = type;
- msg->mxm_nob = offsetof(kmx_msg_t, mxm_u) + body_nob;
-}
-
-static inline void
-mxlnd_init_tx_msg (struct kmx_ctx *tx, u8 type, int body_nob, lnet_nid_t nid)
-{
- int nob = offsetof (kmx_msg_t, mxm_u) + body_nob;
- struct kmx_msg *msg = NULL;
-
- LASSERT (tx != NULL);
- LASSERT (nob <= MXLND_EAGER_SIZE);
-
- tx->mxc_nid = nid;
- /* tx->mxc_peer should have already been set if we know it */
- tx->mxc_msg_type = type;
- tx->mxc_nseg = 1;
- /* tx->mxc_seg.segment_ptr is already pointing to mxc_page */
- tx->mxc_seg.segment_length = nob;
- tx->mxc_pin_type = MX_PIN_PHYSICAL;
- //tx->mxc_state = MXLND_CTX_PENDING;
-
- msg = tx->mxc_msg;
- msg->mxm_type = type;
- msg->mxm_nob = nob;
-
- return;
-}
-
-static inline __u32
-mxlnd_cksum (void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return (sum == 0) ? 1 : sum;
-}
-
-/**
- * mxlnd_pack_msg - complete msg info
- * @tx - msg to send
- */
-static inline void
-mxlnd_pack_msg(struct kmx_ctx *tx)
-{
- struct kmx_msg *msg = tx->mxc_msg;
-
- /* type and nob should already be set in init_msg() */
- msg->mxm_magic = MXLND_MSG_MAGIC;
- msg->mxm_version = MXLND_MSG_VERSION;
- /* mxm_type */
- /* don't use mxlnd_tx_requires_credit() since we want PUT_ACK to
- * return credits as well */
- if (tx->mxc_msg_type != MXLND_MSG_CONN_REQ &&
- tx->mxc_msg_type != MXLND_MSG_CONN_ACK) {
- spin_lock(&tx->mxc_conn->mxk_lock);
- msg->mxm_credits = tx->mxc_conn->mxk_outstanding;
- tx->mxc_conn->mxk_outstanding = 0;
- spin_unlock(&tx->mxc_conn->mxk_lock);
- } else {
- msg->mxm_credits = 0;
- }
- /* mxm_nob */
- msg->mxm_cksum = 0;
- msg->mxm_srcnid = lnet_ptlcompat_srcnid(kmxlnd_data.kmx_ni->ni_nid, tx->mxc_nid);
- msg->mxm_srcstamp = kmxlnd_data.kmx_incarnation;
- msg->mxm_dstnid = tx->mxc_nid;
- /* if it is a new peer, the dststamp will be 0 */
- msg->mxm_dststamp = tx->mxc_conn->mxk_incarnation;
- msg->mxm_seq = tx->mxc_cookie;
-
- if (*kmxlnd_tunables.kmx_cksum) {
- msg->mxm_cksum = mxlnd_cksum(msg, msg->mxm_nob);
- }
-}
-
-int
-mxlnd_unpack_msg(kmx_msg_t *msg, int nob)
-{
- const int hdr_size = offsetof(kmx_msg_t, mxm_u);
- __u32 msg_cksum = 0;
- int flip = 0;
- int msg_nob = 0;
-
- /* 6 bytes are enough to have received magic + version */
- if (nob < 6) {
- CDEBUG(D_NETERROR, "not enough bytes for magic + hdr: %d\n", nob);
- return -EPROTO;
- }
-
- if (msg->mxm_magic == MXLND_MSG_MAGIC) {
- flip = 0;
- } else if (msg->mxm_magic == __swab32(MXLND_MSG_MAGIC)) {
- flip = 1;
- } else {
- CDEBUG(D_NETERROR, "Bad magic: %08x\n", msg->mxm_magic);
- return -EPROTO;
- }
-
- if (msg->mxm_version !=
- (flip ? __swab16(MXLND_MSG_VERSION) : MXLND_MSG_VERSION)) {
- CDEBUG(D_NETERROR, "Bad version: %d\n", msg->mxm_version);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CDEBUG(D_NETERROR, "not enough for a header: %d\n", nob);
- return -EPROTO;
- }
-
- msg_nob = flip ? __swab32(msg->mxm_nob) : msg->mxm_nob;
- if (msg_nob > nob) {
- CDEBUG(D_NETERROR, "Short message: got %d, wanted %d\n", nob, msg_nob);
- return -EPROTO;
- }
-
- /* checksum must be computed with mxm_cksum zero and BEFORE anything
- * gets flipped */
- msg_cksum = flip ? __swab32(msg->mxm_cksum) : msg->mxm_cksum;
- msg->mxm_cksum = 0;
- if (msg_cksum != 0 && msg_cksum != mxlnd_cksum(msg, msg_nob)) {
- CDEBUG(D_NETERROR, "Bad checksum\n");
- return -EPROTO;
- }
- msg->mxm_cksum = msg_cksum;
-
- if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
- __swab16s(&msg->mxm_version);
- CLASSERT (sizeof(msg->mxm_type) == 1);
- CLASSERT (sizeof(msg->mxm_credits) == 1);
- msg->mxm_nob = msg_nob;
- __swab64s(&msg->mxm_srcnid);
- __swab64s(&msg->mxm_srcstamp);
- __swab64s(&msg->mxm_dstnid);
- __swab64s(&msg->mxm_dststamp);
- __swab64s(&msg->mxm_seq);
- }
-
- if (msg->mxm_srcnid == LNET_NID_ANY) {
- CDEBUG(D_NETERROR, "Bad src nid: %s\n", libcfs_nid2str(msg->mxm_srcnid));
- return -EPROTO;
- }
-
- switch (msg->mxm_type) {
- default:
- CDEBUG(D_NETERROR, "Unknown message type %x\n", msg->mxm_type);
- return -EPROTO;
-
- case MXLND_MSG_NOOP:
- break;
-
- case MXLND_MSG_EAGER:
- if (msg_nob < offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[0])) {
- CDEBUG(D_NETERROR, "Short EAGER: %d(%d)\n", msg_nob,
- (int)offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[0]));
- return -EPROTO;
- }
- break;
-
- case MXLND_MSG_PUT_REQ:
- if (msg_nob < hdr_size + sizeof(msg->mxm_u.put_req)) {
- CDEBUG(D_NETERROR, "Short PUT_REQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->mxm_u.put_req)));
- return -EPROTO;
- }
- if (flip)
- __swab64s(&msg->mxm_u.put_req.mxprm_cookie);
- break;
-
- case MXLND_MSG_PUT_ACK:
- if (msg_nob < hdr_size + sizeof(msg->mxm_u.put_ack)) {
- CDEBUG(D_NETERROR, "Short PUT_ACK: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->mxm_u.put_ack)));
- return -EPROTO;
- }
- if (flip) {
- __swab64s(&msg->mxm_u.put_ack.mxpam_src_cookie);
- __swab64s(&msg->mxm_u.put_ack.mxpam_dst_cookie);
- }
- break;
-
- case MXLND_MSG_GET_REQ:
- if (msg_nob < hdr_size + sizeof(msg->mxm_u.get_req)) {
- CDEBUG(D_NETERROR, "Short GET_REQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->mxm_u.get_req)));
- return -EPROTO;
- }
- if (flip) {
- __swab64s(&msg->mxm_u.get_req.mxgrm_cookie);
- }
- break;
-
- case MXLND_MSG_CONN_REQ:
- case MXLND_MSG_CONN_ACK:
- if (msg_nob < hdr_size + sizeof(msg->mxm_u.conn_req)) {
- CDEBUG(D_NETERROR, "Short connreq/ack: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->mxm_u.conn_req)));
- return -EPROTO;
- }
- if (flip) {
- __swab32s(&msg->mxm_u.conn_req.mxcrm_queue_depth);
- __swab32s(&msg->mxm_u.conn_req.mxcrm_eager_size);
- }
- break;
- }
- return 0;
-}
-
-/**
- * mxlnd_recv_msg
- * @lntmsg - the LNET msg that this is continuing. If EAGER, then NULL.
- * @rx
- * @msg_type
- * @cookie
- * @length - length of incoming message
- * @pending - add to kmx_pending (0 is NO and 1 is YES)
- *
- * The caller gets the rx and sets nid, peer and conn if known.
- *
- * Returns 0 on success and -1 on failure
- */
-int
-mxlnd_recv_msg(lnet_msg_t *lntmsg, struct kmx_ctx *rx, u8 msg_type, u64 cookie, u32 length)
-{
- int ret = 0;
- mx_return_t mxret = MX_SUCCESS;
- uint64_t mask = 0xF00FFFFFFFFFFFFFLL;
-
- rx->mxc_msg_type = msg_type;
- rx->mxc_lntmsg[0] = lntmsg; /* may be NULL if EAGER */
- rx->mxc_cookie = cookie;
- /* rx->mxc_match may already be set */
- /* rx->mxc_seg.segment_ptr is already set */
- rx->mxc_seg.segment_length = length;
- rx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT;
- ret = mxlnd_q_pending_ctx(rx);
- if (ret == -1) {
- /* the caller is responsible for calling conn_decref() if needed */
- return -1;
- }
- mxret = mx_kirecv(kmxlnd_data.kmx_endpt, &rx->mxc_seg, 1, MX_PIN_PHYSICAL,
- cookie, mask, (void *) rx, &rx->mxc_mxreq);
- if (mxret != MX_SUCCESS) {
- mxlnd_deq_pending_ctx(rx);
- CDEBUG(D_NETERROR, "mx_kirecv() failed with %s (%d)\n",
- mx_strerror(mxret), (int) mxret);
- return -1;
- }
- return 0;
-}
-
-
-/**
- * mxlnd_unexpected_recv - this is the callback function that will handle
- * unexpected receives
- * @context - NULL, ignore
- * @source - the peer's mx_endpoint_addr_t
- * @match_value - the msg's bit, should be MXLND_MASK_EAGER
- * @length - length of incoming message
- * @data_if_available - ignore
- *
- * If it is an eager-sized msg, we will call recv_msg() with the actual
- * length. If it is a large message, we will call recv_msg() with a
- * length of 0 bytes to drop it because we should never have a large,
- * unexpected message.
- *
- * NOTE - The MX library blocks until this function completes. Make it as fast as
- * possible. DO NOT allocate memory which can block!
- *
- * If we cannot get a rx or the conn is closed, drop the message on the floor
- * (i.e. recv 0 bytes and ignore).
- */
-mx_unexp_handler_action_t
-mxlnd_unexpected_recv(void *context, mx_endpoint_addr_t source,
- uint64_t match_value, uint32_t length, void *data_if_available)
-{
- int ret = 0;
- struct kmx_ctx *rx = NULL;
- mx_ksegment_t seg;
- u8 msg_type = 0;
- u8 error = 0;
- u64 cookie = 0LL;
-
- if (context != NULL) {
- CDEBUG(D_NETERROR, "unexpected receive with non-NULL context\n");
- }
-
-#if MXLND_DEBUG
- CDEBUG(D_NET, "unexpected_recv() bits=0x%llx length=%d\n", match_value, length);
-#endif
-
- rx = mxlnd_get_idle_rx();
- if (rx != NULL) {
- mxlnd_parse_match(match_value, &msg_type, &error, &cookie);
- if (length <= MXLND_EAGER_SIZE) {
- ret = mxlnd_recv_msg(NULL, rx, msg_type, match_value, length);
- } else {
- CDEBUG(D_NETERROR, "unexpected large receive with "
- "match_value=0x%llx length=%d\n",
- match_value, length);
- ret = mxlnd_recv_msg(NULL, rx, msg_type, match_value, 0);
- }
-
- if (ret == 0) {
- struct kmx_peer *peer = NULL;
- struct kmx_conn *conn = NULL;
-
- /* NOTE to avoid a peer disappearing out from under us,
- * read lock the peers lock first */
- read_lock(&kmxlnd_data.kmx_peers_lock);
- mx_get_endpoint_addr_context(source, (void **) &peer);
- if (peer != NULL) {
- mxlnd_peer_addref(peer); /* add a ref... */
- spin_lock(&peer->mxp_lock);
- conn = peer->mxp_conn;
- if (conn) {
- mxlnd_conn_addref(conn); /* add ref until rx completed */
- mxlnd_peer_decref(peer); /* and drop peer ref */
- rx->mxc_conn = conn;
- }
- spin_unlock(&peer->mxp_lock);
- rx->mxc_peer = peer;
- rx->mxc_nid = peer->mxp_nid;
- }
- read_unlock(&kmxlnd_data.kmx_peers_lock);
- } else {
- CDEBUG(D_NETERROR, "could not post receive\n");
- mxlnd_put_idle_rx(rx);
- }
- }
-
- if (rx == NULL || ret != 0) {
- if (rx == NULL) {
- CDEBUG(D_NETERROR, "no idle rxs available - dropping rx\n");
- } else {
- /* ret != 0 */
- CDEBUG(D_NETERROR, "disconnected peer - dropping rx\n");
- }
- seg.segment_ptr = 0LL;
- seg.segment_length = 0;
- mx_kirecv(kmxlnd_data.kmx_endpt, &seg, 1, MX_PIN_PHYSICAL,
- match_value, 0xFFFFFFFFFFFFFFFFLL, NULL, NULL);
- }
-
- return MX_RECV_CONTINUE;
-}
-
-
-int
-mxlnd_get_peer_info(int index, lnet_nid_t *nidp, int *count)
-{
- int i = 0;
- int ret = -ENOENT;
- struct kmx_peer *peer = NULL;
-
- read_lock(&kmxlnd_data.kmx_peers_lock);
- for (i = 0; i < MXLND_HASH_SIZE; i++) {
- list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) {
- if (index-- > 0)
- continue;
-
- *nidp = peer->mxp_nid;
- *count = atomic_read(&peer->mxp_refcount);
- ret = 0;
- break;
- }
- }
- read_unlock(&kmxlnd_data.kmx_peers_lock);
-
- return ret;
-}
-
-void
-mxlnd_del_peer_locked(struct kmx_peer *peer)
-{
- list_del_init(&peer->mxp_peers); /* remove from the global list */
- if (peer->mxp_conn) mxlnd_conn_disconnect(peer->mxp_conn, 1, 0);
- mxlnd_peer_decref(peer); /* drop global list ref */
- return;
-}
-
-int
-mxlnd_del_peer(lnet_nid_t nid)
-{
- int i = 0;
- int ret = 0;
- struct kmx_peer *peer = NULL;
- struct kmx_peer *next = NULL;
-
- if (nid != LNET_NID_ANY) {
- peer = mxlnd_find_peer_by_nid(nid); /* adds peer ref */
- }
- write_lock(&kmxlnd_data.kmx_peers_lock);
- if (nid != LNET_NID_ANY) {
- if (peer == NULL) {
- ret = -ENOENT;
- } else {
- mxlnd_peer_decref(peer); /* and drops it */
- mxlnd_del_peer_locked(peer);
- }
- } else { /* LNET_NID_ANY */
- for (i = 0; i < MXLND_HASH_SIZE; i++) {
- list_for_each_entry_safe(peer, next,
- &kmxlnd_data.kmx_peers[i], mxp_peers) {
- mxlnd_del_peer_locked(peer);
- }
- }
- }
- write_unlock(&kmxlnd_data.kmx_peers_lock);
-
- return ret;
-}
-
-struct kmx_conn *
-mxlnd_get_conn_by_idx(int index)
-{
- int i = 0;
- struct kmx_peer *peer = NULL;
- struct kmx_conn *conn = NULL;
-
- read_lock(&kmxlnd_data.kmx_peers_lock);
- for (i = 0; i < MXLND_HASH_SIZE; i++) {
- list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) {
- spin_lock(&peer->mxp_lock);
- list_for_each_entry(conn, &peer->mxp_conns, mxk_list) {
- if (index-- > 0) {
- continue;
- }
-
- mxlnd_conn_addref(conn); /* add ref here, dec in ctl() */
- spin_unlock(&peer->mxp_lock);
- read_unlock(&kmxlnd_data.kmx_peers_lock);
- return conn;
- }
- spin_unlock(&peer->mxp_lock);
- }
- }
- read_unlock(&kmxlnd_data.kmx_peers_lock);
-
- return NULL;
-}
-
-void
-mxlnd_close_matching_conns_locked(struct kmx_peer *peer)
-{
- struct kmx_conn *conn = NULL;
- struct kmx_conn *next = NULL;
-
- spin_lock(&peer->mxp_lock);
- list_for_each_entry_safe(conn, next, &peer->mxp_conns, mxk_list) {
- mxlnd_conn_disconnect(conn, 0 , 0);
- }
- spin_unlock(&peer->mxp_lock);
- return;
-}
-
-int
-mxlnd_close_matching_conns(lnet_nid_t nid)
-{
- int i = 0;
- int ret = 0;
- struct kmx_peer *peer = NULL;
-
- read_lock(&kmxlnd_data.kmx_peers_lock);
- if (nid != LNET_NID_ANY) {
- peer = mxlnd_find_peer_by_nid(nid); /* adds peer ref */
- if (peer == NULL) {
- ret = -ENOENT;
- } else {
- mxlnd_close_matching_conns_locked(peer);
- mxlnd_peer_decref(peer); /* and drops it here */
- }
- } else { /* LNET_NID_ANY */
- for (i = 0; i < MXLND_HASH_SIZE; i++) {
- list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers)
- mxlnd_close_matching_conns_locked(peer);
- }
- }
- read_unlock(&kmxlnd_data.kmx_peers_lock);
-
- return ret;
-}
-
-/**
- * mxlnd_ctl - modify MXLND parameters
- * @ni - LNET interface handle
- * @cmd - command to change
- * @arg - the ioctl data
- *
- * Not implemented yet.
- */
-int
-mxlnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int ret = -EINVAL;
-
- LASSERT (ni == kmxlnd_data.kmx_ni);
-
- switch (cmd) {
- case IOC_LIBCFS_GET_PEER: {
- lnet_nid_t nid = 0;
- int count = 0;
-
- ret = mxlnd_get_peer_info(data->ioc_count, &nid, &count);
- data->ioc_nid = nid;
- data->ioc_count = count;
- break;
- }
- case IOC_LIBCFS_DEL_PEER: {
- ret = mxlnd_del_peer(data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_GET_CONN: {
- struct kmx_conn *conn = NULL;
-
- conn = mxlnd_get_conn_by_idx(data->ioc_count);
- if (conn == NULL) {
- ret = -ENOENT;
- } else {
- ret = 0;
- data->ioc_nid = conn->mxk_peer->mxp_nid;
- mxlnd_conn_decref(conn); /* dec ref taken in get_conn_by_idx() */
- }
- break;
- }
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- ret = mxlnd_close_matching_conns(data->ioc_nid);
- break;
- }
- default:
- CDEBUG(D_NETERROR, "unknown ctl(%d)\n", cmd);
- break;
- }
-
- return ret;
-}
-
-/**
- * mxlnd_peer_queue_tx_locked - add the tx to the global tx queue
- * @tx
- *
- * Add the tx to the peer's msg or data queue. The caller has locked the peer.
- */
-void
-mxlnd_peer_queue_tx_locked(struct kmx_ctx *tx)
-{
- u8 msg_type = tx->mxc_msg_type;
- //struct kmx_peer *peer = tx->mxc_peer;
- struct kmx_conn *conn = tx->mxc_conn;
-
- LASSERT (msg_type != 0);
- LASSERT (tx->mxc_nid != 0);
- LASSERT (tx->mxc_peer != NULL);
- LASSERT (tx->mxc_conn != NULL);
-
- tx->mxc_incarnation = conn->mxk_incarnation;
-
- if (msg_type != MXLND_MSG_PUT_DATA &&
- msg_type != MXLND_MSG_GET_DATA) {
- /* msg style tx */
- if (mxlnd_tx_requires_credit(tx)) {
- list_add_tail(&tx->mxc_list, &conn->mxk_tx_credit_queue);
- conn->mxk_ntx_msgs++;
- } else if (msg_type == MXLND_MSG_CONN_REQ ||
- msg_type == MXLND_MSG_CONN_ACK) {
- /* put conn msgs at the front of the queue */
- list_add(&tx->mxc_list, &conn->mxk_tx_free_queue);
- } else {
- /* PUT_ACK, PUT_NAK */
- list_add_tail(&tx->mxc_list, &conn->mxk_tx_free_queue);
- conn->mxk_ntx_msgs++;
- }
- } else {
- /* data style tx */
- list_add_tail(&tx->mxc_list, &conn->mxk_tx_free_queue);
- conn->mxk_ntx_data++;
- }
-
- return;
-}
-
-/**
- * mxlnd_peer_queue_tx - add the tx to the global tx queue
- * @tx
- *
- * Add the tx to the peer's msg or data queue
- */
-static inline void
-mxlnd_peer_queue_tx(struct kmx_ctx *tx)
-{
- LASSERT(tx->mxc_peer != NULL);
- LASSERT(tx->mxc_conn != NULL);
- spin_lock(&tx->mxc_conn->mxk_lock);
- mxlnd_peer_queue_tx_locked(tx);
- spin_unlock(&tx->mxc_conn->mxk_lock);
-
- return;
-}
-
-/**
- * mxlnd_queue_tx - add the tx to the global tx queue
- * @tx
- *
- * Add the tx to the global queue and up the tx_queue_sem
- */
-void
-mxlnd_queue_tx(struct kmx_ctx *tx)
-{
- struct kmx_peer *peer = tx->mxc_peer;
- LASSERT (tx->mxc_nid != 0);
-
- if (peer != NULL) {
- if (peer->mxp_incompatible &&
- tx->mxc_msg_type != MXLND_MSG_CONN_ACK) {
- /* let this fail now */
- tx->mxc_status.code = -ECONNABORTED;
- mxlnd_conn_decref(peer->mxp_conn);
- mxlnd_put_idle_tx(tx);
- return;
- }
- if (tx->mxc_conn == NULL) {
- int ret = 0;
- struct kmx_conn *conn = NULL;
-
- ret = mxlnd_conn_alloc(&conn, peer); /* adds 2nd ref for tx... */
- if (ret != 0) {
- tx->mxc_status.code = ret;
- mxlnd_put_idle_tx(tx);
- goto done;
- }
- tx->mxc_conn = conn;
- mxlnd_peer_decref(peer); /* and takes it from peer */
- }
- LASSERT(tx->mxc_conn != NULL);
- mxlnd_peer_queue_tx(tx);
- mxlnd_check_sends(peer);
- } else {
- spin_lock(&kmxlnd_data.kmx_tx_queue_lock);
- list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_queue);
- spin_unlock(&kmxlnd_data.kmx_tx_queue_lock);
- up(&kmxlnd_data.kmx_tx_queue_sem);
- }
-done:
- return;
-}
-
-int
-mxlnd_setup_iov(struct kmx_ctx *ctx, u32 niov, struct iovec *iov, u32 offset, u32 nob)
-{
- int i = 0;
- int sum = 0;
- int old_sum = 0;
- int nseg = 0;
- int first_iov = -1;
- int first_iov_offset = 0;
- int first_found = 0;
- int last_iov = -1;
- int last_iov_length = 0;
- mx_ksegment_t *seg = NULL;
-
- if (niov == 0) return 0;
- LASSERT(iov != NULL);
-
- for (i = 0; i < niov; i++) {
- sum = old_sum + (u32) iov[i].iov_len;
- if (!first_found && (sum > offset)) {
- first_iov = i;
- first_iov_offset = offset - old_sum;
- first_found = 1;
- sum = (u32) iov[i].iov_len - first_iov_offset;
- old_sum = 0;
- }
- if (sum >= nob) {
- last_iov = i;
- last_iov_length = (u32) iov[i].iov_len - (sum - nob);
- if (first_iov == last_iov) last_iov_length -= first_iov_offset;
- break;
- }
- old_sum = sum;
- }
- LASSERT(first_iov >= 0 && last_iov >= first_iov);
- nseg = last_iov - first_iov + 1;
- LASSERT(nseg > 0);
-
- MXLND_ALLOC (seg, nseg * sizeof(*seg));
- if (seg == NULL) {
- CDEBUG(D_NETERROR, "MXLND_ALLOC() failed\n");
- return -1;
- }
- memset(seg, 0, nseg * sizeof(*seg));
- ctx->mxc_nseg = nseg;
- sum = 0;
- for (i = 0; i < nseg; i++) {
- seg[i].segment_ptr = MX_KVA_TO_U64(iov[first_iov + i].iov_base);
- seg[i].segment_length = (u32) iov[first_iov + i].iov_len;
- if (i == 0) {
- seg[i].segment_ptr += (u64) first_iov_offset;
- seg[i].segment_length -= (u32) first_iov_offset;
- }
- if (i == (nseg - 1)) {
- seg[i].segment_length = (u32) last_iov_length;
- }
- sum += seg[i].segment_length;
- }
- ctx->mxc_seg_list = seg;
- ctx->mxc_pin_type = MX_PIN_KERNEL;
-#ifdef MX_PIN_FULLPAGES
- ctx->mxc_pin_type |= MX_PIN_FULLPAGES;
-#endif
- LASSERT(nob == sum);
- return 0;
-}
-
-int
-mxlnd_setup_kiov(struct kmx_ctx *ctx, u32 niov, lnet_kiov_t *kiov, u32 offset, u32 nob)
-{
- int i = 0;
- int sum = 0;
- int old_sum = 0;
- int nseg = 0;
- int first_kiov = -1;
- int first_kiov_offset = 0;
- int first_found = 0;
- int last_kiov = -1;
- int last_kiov_length = 0;
- mx_ksegment_t *seg = NULL;
-
- if (niov == 0) return 0;
- LASSERT(kiov != NULL);
-
- for (i = 0; i < niov; i++) {
- sum = old_sum + kiov[i].kiov_len;
- if (i == 0) sum -= kiov[i].kiov_offset;
- if (!first_found && (sum > offset)) {
- first_kiov = i;
- first_kiov_offset = offset - old_sum;
- //if (i == 0) first_kiov_offset + kiov[i].kiov_offset;
- if (i == 0) first_kiov_offset = kiov[i].kiov_offset;
- first_found = 1;
- sum = kiov[i].kiov_len - first_kiov_offset;
- old_sum = 0;
- }
- if (sum >= nob) {
- last_kiov = i;
- last_kiov_length = kiov[i].kiov_len - (sum - nob);
- if (first_kiov == last_kiov) last_kiov_length -= first_kiov_offset;
- break;
- }
- old_sum = sum;
- }
- LASSERT(first_kiov >= 0 && last_kiov >= first_kiov);
- nseg = last_kiov - first_kiov + 1;
- LASSERT(nseg > 0);
-
- MXLND_ALLOC (seg, nseg * sizeof(*seg));
- if (seg == NULL) {
- CDEBUG(D_NETERROR, "MXLND_ALLOC() failed\n");
- return -1;
- }
- memset(seg, 0, niov * sizeof(*seg));
- ctx->mxc_nseg = niov;
- sum = 0;
- for (i = 0; i < niov; i++) {
- seg[i].segment_ptr = lnet_page2phys(kiov[first_kiov + i].kiov_page);
- seg[i].segment_length = kiov[first_kiov + i].kiov_len;
- if (i == 0) {
- seg[i].segment_ptr += (u64) first_kiov_offset;
- /* we have to add back the original kiov_offset */
- seg[i].segment_length -= first_kiov_offset +
- kiov[first_kiov].kiov_offset;
- }
- if (i == (nseg - 1)) {
- seg[i].segment_length = last_kiov_length;
- }
- sum += seg[i].segment_length;
- }
- ctx->mxc_seg_list = seg;
- ctx->mxc_pin_type = MX_PIN_PHYSICAL;
-#ifdef MX_PIN_FULLPAGES
- ctx->mxc_pin_type |= MX_PIN_FULLPAGES;
-#endif
- LASSERT(nob == sum);
- return 0;
-}
-
-void
-mxlnd_send_nak(struct kmx_ctx *tx, lnet_nid_t nid, int type, int status, __u64 cookie)
-{
- LASSERT(type == MXLND_MSG_PUT_ACK);
- mxlnd_init_tx_msg(tx, type, sizeof(kmx_putack_msg_t), tx->mxc_nid);
- tx->mxc_cookie = cookie;
- tx->mxc_msg->mxm_u.put_ack.mxpam_src_cookie = cookie;
- tx->mxc_msg->mxm_u.put_ack.mxpam_dst_cookie = ((u64) status << 52); /* error code */
- tx->mxc_match = mxlnd_create_match(tx, status);
-
- mxlnd_queue_tx(tx);
-}
-
-
-/**
- * mxlnd_send_data - get tx, map [k]iov, queue tx
- * @ni
- * @lntmsg
- * @peer
- * @msg_type
- * @cookie
- *
- * This setups the DATA send for PUT or GET.
- *
- * On success, it queues the tx, on failure it calls lnet_finalize()
- */
-void
-mxlnd_send_data(lnet_ni_t *ni, lnet_msg_t *lntmsg, struct kmx_peer *peer, u8 msg_type, u64 cookie)
-{
- int ret = 0;
- lnet_process_id_t target = lntmsg->msg_target;
- unsigned int niov = lntmsg->msg_niov;
- struct iovec *iov = lntmsg->msg_iov;
- lnet_kiov_t *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- struct kmx_ctx *tx = NULL;
-
- LASSERT(lntmsg != NULL);
- LASSERT(peer != NULL);
- LASSERT(msg_type == MXLND_MSG_PUT_DATA || msg_type == MXLND_MSG_GET_DATA);
- LASSERT((cookie>>52) == 0);
-
- tx = mxlnd_get_idle_tx();
- if (tx == NULL) {
- CDEBUG(D_NETERROR, "Can't allocate %s tx for %s\n",
- msg_type == MXLND_MSG_PUT_DATA ? "PUT_DATA" : "GET_DATA",
- libcfs_nid2str(target.nid));
- goto failed_0;
- }
- tx->mxc_nid = target.nid;
- /* NOTE called when we have a ref on the conn, get one for this tx */
- mxlnd_conn_addref(peer->mxp_conn);
- tx->mxc_peer = peer;
- tx->mxc_conn = peer->mxp_conn;
- tx->mxc_msg_type = msg_type;
- tx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT;
- tx->mxc_state = MXLND_CTX_PENDING;
- tx->mxc_lntmsg[0] = lntmsg;
- tx->mxc_cookie = cookie;
- tx->mxc_match = mxlnd_create_match(tx, 0);
-
- /* This setups up the mx_ksegment_t to send the DATA payload */
- if (nob == 0) {
- /* do not setup the segments */
- CDEBUG(D_NETERROR, "nob = 0; why didn't we use an EAGER reply "
- "to %s?\n", libcfs_nid2str(target.nid));
- ret = 0;
- } else if (kiov == NULL) {
- ret = mxlnd_setup_iov(tx, niov, iov, offset, nob);
- } else {
- ret = mxlnd_setup_kiov(tx, niov, kiov, offset, nob);
- }
- if (ret != 0) {
- CDEBUG(D_NETERROR, "Can't setup send DATA for %s\n",
- libcfs_nid2str(target.nid));
- tx->mxc_status.code = -EIO;
- goto failed_1;
- }
- mxlnd_queue_tx(tx);
- return;
-
-failed_1:
- mxlnd_conn_decref(peer->mxp_conn);
- mxlnd_put_idle_tx(tx);
- return;
-
-failed_0:
- CDEBUG(D_NETERROR, "no tx avail\n");
- lnet_finalize(ni, lntmsg, -EIO);
- return;
-}
-
-/**
- * mxlnd_recv_data - map [k]iov, post rx
- * @ni
- * @lntmsg
- * @rx
- * @msg_type
- * @cookie
- *
- * This setups the DATA receive for PUT or GET.
- *
- * On success, it returns 0, on failure it returns -1
- */
-int
-mxlnd_recv_data(lnet_ni_t *ni, lnet_msg_t *lntmsg, struct kmx_ctx *rx, u8 msg_type, u64 cookie)
-{
- int ret = 0;
- lnet_process_id_t target = lntmsg->msg_target;
- unsigned int niov = lntmsg->msg_niov;
- struct iovec *iov = lntmsg->msg_iov;
- lnet_kiov_t *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- mx_return_t mxret = MX_SUCCESS;
-
- /* above assumes MXLND_MSG_PUT_DATA */
- if (msg_type == MXLND_MSG_GET_DATA) {
- niov = lntmsg->msg_md->md_niov;
- iov = lntmsg->msg_md->md_iov.iov;
- kiov = lntmsg->msg_md->md_iov.kiov;
- offset = 0;
- nob = lntmsg->msg_md->md_length;
- }
-
- LASSERT(lntmsg != NULL);
- LASSERT(rx != NULL);
- LASSERT(msg_type == MXLND_MSG_PUT_DATA || msg_type == MXLND_MSG_GET_DATA);
- LASSERT((cookie>>52) == 0); /* ensure top 12 bits are 0 */
-
- rx->mxc_msg_type = msg_type;
- rx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT;
- rx->mxc_state = MXLND_CTX_PENDING;
- rx->mxc_nid = target.nid;
- /* if posting a GET_DATA, we may not yet know the peer */
- if (rx->mxc_peer != NULL) {
- rx->mxc_conn = rx->mxc_peer->mxp_conn;
- }
- rx->mxc_lntmsg[0] = lntmsg;
- rx->mxc_cookie = cookie;
- rx->mxc_match = mxlnd_create_match(rx, 0);
- /* This setups up the mx_ksegment_t to receive the DATA payload */
- if (kiov == NULL) {
- ret = mxlnd_setup_iov(rx, niov, iov, offset, nob);
- } else {
- ret = mxlnd_setup_kiov(rx, niov, kiov, offset, nob);
- }
- if (msg_type == MXLND_MSG_GET_DATA) {
- rx->mxc_lntmsg[1] = lnet_create_reply_msg(kmxlnd_data.kmx_ni, lntmsg);
- if (rx->mxc_lntmsg[1] == NULL) {
- CDEBUG(D_NETERROR, "Can't create reply for GET -> %s\n",
- libcfs_nid2str(target.nid));
- ret = -1;
- }
- }
- if (ret != 0) {
- CDEBUG(D_NETERROR, "Can't setup %s rx for %s\n",
- msg_type == MXLND_MSG_PUT_DATA ? "PUT_DATA" : "GET_DATA",
- libcfs_nid2str(target.nid));
- return -1;
- }
- ret = mxlnd_q_pending_ctx(rx);
- if (ret == -1) {
- return -1;
- }
- CDEBUG(D_NET, "receiving %s 0x%llx\n", mxlnd_msgtype_to_str(msg_type), rx->mxc_cookie);
- mxret = mx_kirecv(kmxlnd_data.kmx_endpt,
- rx->mxc_seg_list, rx->mxc_nseg,
- rx->mxc_pin_type, rx->mxc_match,
- 0xF00FFFFFFFFFFFFFLL, (void *) rx,
- &rx->mxc_mxreq);
- if (mxret != MX_SUCCESS) {
- if (rx->mxc_conn != NULL) {
- mxlnd_deq_pending_ctx(rx);
- }
- CDEBUG(D_NETERROR, "mx_kirecv() failed with %d for %s\n",
- (int) mxret, libcfs_nid2str(target.nid));
- return -1;
- }
-
- return 0;
-}
-
-/**
- * mxlnd_send - the LND required send function
- * @ni
- * @private
- * @lntmsg
- *
- * This must not block. Since we may not have a peer struct for the receiver,
- * it will append send messages on a global tx list. We will then up the
- * tx_queued's semaphore to notify it of the new send.
- */
-int
-mxlnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- int ret = 0;
- int type = lntmsg->msg_type;
- lnet_hdr_t *hdr = &lntmsg->msg_hdr;
- lnet_process_id_t target = lntmsg->msg_target;
- lnet_nid_t nid = target.nid;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- struct kmx_ctx *tx = NULL;
- struct kmx_msg *txmsg = NULL;
- struct kmx_ctx *rx = (struct kmx_ctx *) private; /* for REPLY */
- struct kmx_ctx *rx_data = NULL;
- struct kmx_conn *conn = NULL;
- int nob = 0;
- uint32_t length = 0;
- struct kmx_peer *peer = NULL;
-
- CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT (payload_nob == 0 || payload_niov > 0);
- LASSERT (payload_niov <= LNET_MAX_IOV);
- /* payload is either all vaddrs or all pages */
- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
- /* private is used on LNET_GET_REPLY only, NULL for all other cases */
-
- /* NOTE we may not know the peer if it is the very first PUT_REQ or GET_REQ
- * to a new peer, use the nid */
- peer = mxlnd_find_peer_by_nid(nid); /* adds peer ref */
- if (peer != NULL) {
- if (unlikely(peer->mxp_incompatible)) {
- mxlnd_peer_decref(peer); /* drop ref taken above */
- } else {
- spin_lock(&peer->mxp_lock);
- conn = peer->mxp_conn;
- if (conn) {
- mxlnd_conn_addref(conn);
- mxlnd_peer_decref(peer); /* drop peer ref taken above */
- }
- spin_unlock(&peer->mxp_lock);
- }
- }
- if (conn == NULL && peer != NULL) {
- CDEBUG(D_NETERROR, "conn==NULL peer=0x%p nid=0x%llx payload_nob=%d type=%s\n",
- peer, nid, payload_nob, mxlnd_lnetmsg_to_str(type));
- }
-
- switch (type) {
- case LNET_MSG_ACK:
- LASSERT (payload_nob == 0);
- break;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- /* Is the payload small enough not to need DATA? */
- nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[payload_nob]);
- if (nob <= MXLND_EAGER_SIZE)
- break; /* send EAGER */
-
- tx = mxlnd_get_idle_tx();
- if (unlikely(tx == NULL)) {
- CDEBUG(D_NETERROR, "Can't allocate %s tx for %s\n",
- type == LNET_MSG_PUT ? "PUT" : "REPLY",
- libcfs_nid2str(nid));
- if (conn) mxlnd_conn_decref(conn);
- return -ENOMEM;
- }
-
- /* the peer may be NULL */
- tx->mxc_peer = peer;
- tx->mxc_conn = conn; /* may be NULL */
- /* we added a conn ref above */
- mxlnd_init_tx_msg (tx, MXLND_MSG_PUT_REQ, sizeof(kmx_putreq_msg_t), nid);
- txmsg = tx->mxc_msg;
- txmsg->mxm_u.put_req.mxprm_hdr = *hdr;
- txmsg->mxm_u.put_req.mxprm_cookie = tx->mxc_cookie;
- tx->mxc_match = mxlnd_create_match(tx, 0);
-
- /* we must post a receive _before_ sending the request.
- * we need to determine how much to receive, it will be either
- * a put_ack or a put_nak. The put_ack is larger, so use it. */
-
- rx = mxlnd_get_idle_rx();
- if (unlikely(rx == NULL)) {
- CDEBUG(D_NETERROR, "Can't allocate rx for PUT_ACK for %s\n",
- libcfs_nid2str(nid));
- mxlnd_put_idle_tx(tx);
- if (conn) mxlnd_conn_decref(conn); /* for the ref taken above */
- return -ENOMEM;
- }
- rx->mxc_nid = nid;
- rx->mxc_peer = peer;
- /* conn may be NULL but unlikely since the first msg is always small */
- /* NOTE no need to lock peer before adding conn ref since we took
- * a conn ref for the tx (it cannot be freed between there and here ) */
- if (conn) mxlnd_conn_addref(conn); /* for this rx */
- rx->mxc_conn = conn;
- rx->mxc_msg_type = MXLND_MSG_PUT_ACK;
- rx->mxc_cookie = tx->mxc_cookie;
- rx->mxc_match = mxlnd_create_match(rx, 0);
-
- length = offsetof(kmx_msg_t, mxm_u) + sizeof(kmx_putack_msg_t);
- ret = mxlnd_recv_msg(lntmsg, rx, MXLND_MSG_PUT_ACK, rx->mxc_match, length);
- if (unlikely(ret != 0)) {
- CDEBUG(D_NETERROR, "recv_msg() failed for PUT_ACK for %s\n",
- libcfs_nid2str(nid));
- rx->mxc_lntmsg[0] = NULL;
- mxlnd_put_idle_rx(rx);
- mxlnd_put_idle_tx(tx);
- if (conn) {
- mxlnd_conn_decref(conn); /* for the rx... */
- mxlnd_conn_decref(conn); /* and for the tx */
- }
- return -EHOSTUNREACH;
- }
-
- mxlnd_queue_tx(tx);
- return 0;
-
- case LNET_MSG_GET:
- if (routing || target_is_router)
- break; /* send EAGER */
-
- /* is the REPLY message too small for DATA? */
- nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[lntmsg->msg_md->md_length]);
- if (nob <= MXLND_EAGER_SIZE)
- break; /* send EAGER */
-
- /* get tx (we need the cookie) , post rx for incoming DATA,
- * then post GET_REQ tx */
- tx = mxlnd_get_idle_tx();
- if (unlikely(tx == NULL)) {
- CDEBUG(D_NETERROR, "Can't allocate GET tx for %s\n",
- libcfs_nid2str(nid));
- if (conn) mxlnd_conn_decref(conn); /* for the ref taken above */
- return -ENOMEM;
- }
- rx_data = mxlnd_get_idle_rx();
- if (unlikely(rx_data == NULL)) {
- CDEBUG(D_NETERROR, "Can't allocate DATA rx for %s\n",
- libcfs_nid2str(nid));
- mxlnd_put_idle_tx(tx);
- if (conn) mxlnd_conn_decref(conn); /* for the ref taken above */
- return -ENOMEM;
- }
- rx_data->mxc_peer = peer;
- /* NOTE no need to lock peer before adding conn ref since we took
- * a conn ref for the tx (it cannot be freed between there and here ) */
- if (conn) mxlnd_conn_addref(conn); /* for the rx_data */
- rx_data->mxc_conn = conn; /* may be NULL */
-
- ret = mxlnd_recv_data(ni, lntmsg, rx_data, MXLND_MSG_GET_DATA, tx->mxc_cookie);
- if (unlikely(ret != 0)) {
- CDEBUG(D_NETERROR, "Can't setup GET sink for %s\n",
- libcfs_nid2str(nid));
- mxlnd_put_idle_rx(rx_data);
- mxlnd_put_idle_tx(tx);
- if (conn) {
- mxlnd_conn_decref(conn); /* for the rx_data... */
- mxlnd_conn_decref(conn); /* and for the tx */
- }
- return -EIO;
- }
-
- tx->mxc_peer = peer;
- tx->mxc_conn = conn; /* may be NULL */
- /* conn ref taken above */
- mxlnd_init_tx_msg(tx, MXLND_MSG_GET_REQ, sizeof(kmx_getreq_msg_t), nid);
- txmsg = tx->mxc_msg;
- txmsg->mxm_u.get_req.mxgrm_hdr = *hdr;
- txmsg->mxm_u.get_req.mxgrm_cookie = tx->mxc_cookie;
- tx->mxc_match = mxlnd_create_match(tx, 0);
-
- mxlnd_queue_tx(tx);
- return 0;
-
- default:
- LBUG();
- if (conn) mxlnd_conn_decref(conn); /* drop ref taken above */
- return -EIO;
- }
-
- /* send EAGER */
-
- LASSERT (offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[payload_nob])
- <= MXLND_EAGER_SIZE);
-
- tx = mxlnd_get_idle_tx();
- if (unlikely(tx == NULL)) {
- CDEBUG(D_NETERROR, "Can't send %s to %s: tx descs exhausted\n",
- mxlnd_lnetmsg_to_str(type), libcfs_nid2str(nid));
- if (conn) mxlnd_conn_decref(conn); /* drop ref taken above */
- return -ENOMEM;
- }
-
- tx->mxc_peer = peer;
- tx->mxc_conn = conn; /* may be NULL */
- /* conn ref taken above */
- nob = offsetof(kmx_eager_msg_t, mxem_payload[payload_nob]);
- mxlnd_init_tx_msg (tx, MXLND_MSG_EAGER, nob, nid);
- tx->mxc_match = mxlnd_create_match(tx, 0);
-
- txmsg = tx->mxc_msg;
- txmsg->mxm_u.eager.mxem_hdr = *hdr;
-
- if (payload_kiov != NULL)
- lnet_copy_kiov2flat(MXLND_EAGER_SIZE, txmsg,
- offsetof(kmx_msg_t, mxm_u.eager.mxem_payload),
- payload_niov, payload_kiov, payload_offset, payload_nob);
- else
- lnet_copy_iov2flat(MXLND_EAGER_SIZE, txmsg,
- offsetof(kmx_msg_t, mxm_u.eager.mxem_payload),
- payload_niov, payload_iov, payload_offset, payload_nob);
-
- tx->mxc_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- mxlnd_queue_tx(tx);
- return 0;
-}
-
-/**
- * mxlnd_recv - the LND required recv function
- * @ni
- * @private
- * @lntmsg
- * @delayed
- * @niov
- * @kiov
- * @offset
- * @mlen
- * @rlen
- *
- * This must not block.
- */
-int
-mxlnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- int ret = 0;
- int nob = 0;
- int len = 0;
- struct kmx_ctx *rx = private;
- struct kmx_msg *rxmsg = rx->mxc_msg;
- lnet_nid_t nid = rx->mxc_nid;
- struct kmx_ctx *tx = NULL;
- struct kmx_msg *txmsg = NULL;
- struct kmx_peer *peer = rx->mxc_peer;
- struct kmx_conn *conn = peer->mxp_conn;
- u64 cookie = 0LL;
- int msg_type = rxmsg->mxm_type;
- int repost = 1;
- int credit = 0;
- int finalize = 0;
-
- LASSERT (mlen <= rlen);
- /* Either all pages or all vaddrs */
- LASSERT (!(kiov != NULL && iov != NULL));
- LASSERT (peer != NULL);
-
- /* conn_addref(conn) already taken for the primary rx */
-
- switch (msg_type) {
- case MXLND_MSG_EAGER:
- nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[rlen]);
- len = rx->mxc_status.xfer_length;
- if (unlikely(nob > len)) {
- CDEBUG(D_NETERROR, "Eager message from %s too big: %d(%d)\n",
- libcfs_nid2str(nid), nob, len);
- ret = -EPROTO;
- break;
- }
-
- if (kiov != NULL)
- lnet_copy_flat2kiov(niov, kiov, offset,
- MXLND_EAGER_SIZE, rxmsg,
- offsetof(kmx_msg_t, mxm_u.eager.mxem_payload),
- mlen);
- else
- lnet_copy_flat2iov(niov, iov, offset,
- MXLND_EAGER_SIZE, rxmsg,
- offsetof(kmx_msg_t, mxm_u.eager.mxem_payload),
- mlen);
- finalize = 1;
- credit = 1;
- break;
-
- case MXLND_MSG_PUT_REQ:
- /* we are going to reuse the rx, store the needed info */
- cookie = rxmsg->mxm_u.put_req.mxprm_cookie;
-
- /* get tx, post rx, send PUT_ACK */
-
- tx = mxlnd_get_idle_tx();
- if (unlikely(tx == NULL)) {
- CDEBUG(D_NETERROR, "Can't allocate tx for %s\n", libcfs_nid2str(nid));
- /* Not replying will break the connection */
- ret = -ENOMEM;
- break;
- }
- if (unlikely(mlen == 0)) {
- finalize = 1;
- tx->mxc_peer = peer;
- tx->mxc_conn = conn;
- mxlnd_send_nak(tx, nid, MXLND_MSG_PUT_ACK, 0, cookie);
- /* repost = 1 */
- break;
- }
-
- mxlnd_init_tx_msg(tx, MXLND_MSG_PUT_ACK, sizeof(kmx_putack_msg_t), nid);
- tx->mxc_peer = peer;
- tx->mxc_conn = conn;
- /* no need to lock peer first since we already have a ref */
- mxlnd_conn_addref(conn); /* for the tx */
- txmsg = tx->mxc_msg;
- txmsg->mxm_u.put_ack.mxpam_src_cookie = cookie;
- txmsg->mxm_u.put_ack.mxpam_dst_cookie = tx->mxc_cookie;
- tx->mxc_cookie = cookie;
- tx->mxc_match = mxlnd_create_match(tx, 0);
-
- /* we must post a receive _before_ sending the PUT_ACK */
- mxlnd_ctx_init(rx);
- rx->mxc_state = MXLND_CTX_PREP;
- rx->mxc_peer = peer;
- rx->mxc_conn = conn;
- /* do not take another ref for this rx, it is already taken */
- rx->mxc_nid = peer->mxp_nid;
- ret = mxlnd_recv_data(ni, lntmsg, rx, MXLND_MSG_PUT_DATA,
- txmsg->mxm_u.put_ack.mxpam_dst_cookie);
-
- if (unlikely(ret != 0)) {
- /* Notify peer that it's over */
- CDEBUG(D_NETERROR, "Can't setup PUT_DATA rx for %s: %d\n",
- libcfs_nid2str(nid), ret);
- mxlnd_ctx_init(tx);
- tx->mxc_state = MXLND_CTX_PREP;
- tx->mxc_peer = peer;
- tx->mxc_conn = conn;
- /* finalize = 0, let the PUT_ACK tx finalize this */
- tx->mxc_lntmsg[0] = rx->mxc_lntmsg[0];
- tx->mxc_lntmsg[1] = rx->mxc_lntmsg[1];
- /* conn ref already taken above */
- mxlnd_send_nak(tx, nid, MXLND_MSG_PUT_ACK, ret, cookie);
- /* repost = 1 */
- break;
- }
-
- mxlnd_queue_tx(tx);
- /* do not return a credit until after PUT_DATA returns */
- repost = 0;
- break;
-
- case MXLND_MSG_GET_REQ:
- if (likely(lntmsg != NULL)) {
- mxlnd_send_data(ni, lntmsg, rx->mxc_peer, MXLND_MSG_GET_DATA,
- rx->mxc_msg->mxm_u.get_req.mxgrm_cookie);
- } else {
- /* GET didn't match anything */
- /* The initiator has a rx mapped to [k]iov. We cannot send a nak.
- * We have to embed the error code in the match bits.
- * Send the error in bits 52-59 and the cookie in bits 0-51 */
- u64 cookie = rxmsg->mxm_u.get_req.mxgrm_cookie;
-
- tx = mxlnd_get_idle_tx();
- if (unlikely(tx == NULL)) {
- CDEBUG(D_NETERROR, "Can't get tx for GET NAK for %s\n",
- libcfs_nid2str(nid));
- ret = -ENOMEM;
- break;
- }
- tx->mxc_msg_type = MXLND_MSG_GET_DATA;
- tx->mxc_state = MXLND_CTX_PENDING;
- tx->mxc_nid = nid;
- tx->mxc_peer = peer;
- tx->mxc_conn = conn;
- /* no need to lock peer first since we already have a ref */
- mxlnd_conn_addref(conn); /* for this tx */
- tx->mxc_cookie = cookie;
- tx->mxc_match = mxlnd_create_match(tx, ENODATA);
- tx->mxc_pin_type = MX_PIN_PHYSICAL;
- mxlnd_queue_tx(tx);
- }
- /* finalize lntmsg after tx completes */
- break;
-
- default:
- LBUG();
- }
-
- if (repost) {
- /* we received a message, increment peer's outstanding credits */
- if (credit == 1) {
- spin_lock(&conn->mxk_lock);
- conn->mxk_outstanding++;
- spin_unlock(&conn->mxk_lock);
- }
- /* we are done with the rx */
- mxlnd_put_idle_rx(rx);
- mxlnd_conn_decref(conn);
- }
-
- if (finalize == 1) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg, 0);
-
- /* we received a credit, see if we can use it to send a msg */
- if (credit) mxlnd_check_sends(peer);
-
- return ret;
-}
-
-void
-mxlnd_sleep(unsigned long timeout)
-{
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(timeout);
- return;
-}
-
-/**
- * mxlnd_tx_queued - the generic send queue thread
- * @arg - thread id (as a void *)
- *
- * This thread moves send messages from the global tx_queue to the owning
- * peer's tx_[msg|data]_queue. If the peer does not exist, it creates one and adds
- * it to the global peer list.
- */
-int
-mxlnd_tx_queued(void *arg)
-{
- long id = (long) arg;
- int ret = 0;
- int found = 0;
- struct kmx_ctx *tx = NULL;
- struct kmx_peer *peer = NULL;
- struct list_head *tmp_tx = NULL;
-
- cfs_daemonize("mxlnd_tx_queued");
- //cfs_block_allsigs();
-
- while (!kmxlnd_data.kmx_shutdown) {
- ret = down_interruptible(&kmxlnd_data.kmx_tx_queue_sem);
- if (kmxlnd_data.kmx_shutdown)
- break;
- if (ret != 0) // Should we check for -EINTR?
- continue;
- spin_lock(&kmxlnd_data.kmx_tx_queue_lock);
- if (list_empty (&kmxlnd_data.kmx_tx_queue)) {
- spin_unlock(&kmxlnd_data.kmx_tx_queue_lock);
- continue;
- }
- tmp_tx = &kmxlnd_data.kmx_tx_queue;
- tx = list_entry (tmp_tx->next, struct kmx_ctx, mxc_list);
- list_del_init(&tx->mxc_list);
- spin_unlock(&kmxlnd_data.kmx_tx_queue_lock);
-
- found = 0;
- peer = mxlnd_find_peer_by_nid(tx->mxc_nid); /* adds peer ref */
- if (peer != NULL) {
- tx->mxc_peer = peer;
- spin_lock(&peer->mxp_lock);
- if (peer->mxp_conn == NULL) {
- ret = mxlnd_conn_alloc_locked(&peer->mxp_conn, peer);
- if (ret != 0) {
- /* out of memory, give up and fail tx */
- tx->mxc_status.code = -ENOMEM;
- spin_unlock(&peer->mxp_lock);
- mxlnd_peer_decref(peer);
- mxlnd_put_idle_tx(tx);
- continue;
- }
- }
- tx->mxc_conn = peer->mxp_conn;
- mxlnd_conn_addref(tx->mxc_conn); /* for this tx */
- spin_unlock(&peer->mxp_lock);
- mxlnd_peer_decref(peer); /* drop peer ref taken above */
- mxlnd_queue_tx(tx);
- found = 1;
- }
- if (found == 0) {
- int hash = 0;
- struct kmx_peer *peer = NULL;
- struct kmx_peer *old = NULL;
-
- hash = mxlnd_nid_to_hash(tx->mxc_nid);
-
- LASSERT(tx->mxc_msg_type != MXLND_MSG_PUT_DATA &&
- tx->mxc_msg_type != MXLND_MSG_GET_DATA);
- /* create peer */
- /* adds conn ref for this function */
- ret = mxlnd_peer_alloc(&peer, tx->mxc_nid);
- if (ret != 0) {
- /* finalize message */
- tx->mxc_status.code = ret;
- mxlnd_put_idle_tx(tx);
- continue;
- }
- tx->mxc_peer = peer;
- tx->mxc_conn = peer->mxp_conn;
- /* this tx will keep the conn ref taken in peer_alloc() */
-
- /* add peer to global peer list, but look to see
- * if someone already created it after we released
- * the read lock */
- write_lock(&kmxlnd_data.kmx_peers_lock);
- list_for_each_entry(old, &kmxlnd_data.kmx_peers[hash], mxp_peers) {
- if (old->mxp_nid == peer->mxp_nid) {
- /* somebody beat us here, we created a duplicate */
- found = 1;
- break;
- }
- }
-
- if (found == 0) {
- list_add_tail(&peer->mxp_peers, &kmxlnd_data.kmx_peers[hash]);
- atomic_inc(&kmxlnd_data.kmx_npeers);
- } else {
- tx->mxc_peer = old;
- spin_lock(&old->mxp_lock);
- tx->mxc_conn = old->mxp_conn;
- /* FIXME can conn be NULL? */
- LASSERT(old->mxp_conn != NULL);
- mxlnd_conn_addref(old->mxp_conn);
- spin_unlock(&old->mxp_lock);
- mxlnd_reduce_idle_rxs(*kmxlnd_tunables.kmx_credits - 1);
- mxlnd_conn_decref(peer->mxp_conn); /* drop ref taken above.. */
- mxlnd_conn_decref(peer->mxp_conn); /* drop peer's ref */
- mxlnd_peer_decref(peer);
- }
- write_unlock(&kmxlnd_data.kmx_peers_lock);
-
- mxlnd_queue_tx(tx);
- }
- }
- mxlnd_thread_stop(id);
- return 0;
-}
-
-/* When calling this, we must not have the peer lock. */
-void
-mxlnd_iconnect(struct kmx_peer *peer, u64 mask)
-{
- mx_return_t mxret = MX_SUCCESS;
- mx_request_t request;
- struct kmx_conn *conn = peer->mxp_conn;
-
- /* NOTE we are holding a conn ref every time we call this function,
- * we do not need to lock the peer before taking another ref */
- mxlnd_conn_addref(conn); /* hold until CONN_REQ or CONN_ACK completes */
-
- LASSERT(mask == MXLND_MASK_ICON_REQ ||
- mask == MXLND_MASK_ICON_ACK);
-
- if (peer->mxp_reconnect_time == 0) {
- peer->mxp_reconnect_time = jiffies;
- }
-
- if (peer->mxp_nic_id == 0LL) {
- mxlnd_peer_hostname_to_nic_id(peer);
- if (peer->mxp_nic_id == 0LL) {
- /* not mapped yet, return */
- spin_lock(&conn->mxk_lock);
- conn->mxk_status = MXLND_CONN_INIT;
- spin_unlock(&conn->mxk_lock);
- if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) {
- /* give up and notify LNET */
- mxlnd_conn_disconnect(conn, 0, 1);
- mxlnd_conn_alloc(&peer->mxp_conn, peer); /* adds ref for this
- function... */
- mxlnd_conn_decref(peer->mxp_conn); /* which we no
- longer need */
- }
- mxlnd_conn_decref(conn);
- return;
- }
- }
-
- mxret = mx_iconnect(kmxlnd_data.kmx_endpt, peer->mxp_nic_id,
- peer->mxp_host->mxh_ep_id, MXLND_MSG_MAGIC, mask,
- (void *) peer, &request);
- if (unlikely(mxret != MX_SUCCESS)) {
- spin_lock(&conn->mxk_lock);
- conn->mxk_status = MXLND_CONN_FAIL;
- spin_unlock(&conn->mxk_lock);
- CDEBUG(D_NETERROR, "mx_iconnect() failed with %s (%d) to %s\n",
- mx_strerror(mxret), mxret, libcfs_nid2str(peer->mxp_nid));
- mxlnd_conn_decref(conn);
- }
- return;
-}
-
-#define MXLND_STATS 0
-
-int
-mxlnd_check_sends(struct kmx_peer *peer)
-{
- int ret = 0;
- int found = 0;
- mx_return_t mxret = MX_SUCCESS;
- struct kmx_ctx *tx = NULL;
- struct kmx_conn *conn = NULL;
- u8 msg_type = 0;
- int credit = 0;
- int status = 0;
- int ntx_posted = 0;
- int credits = 0;
-#if MXLND_STATS
- static unsigned long last = 0;
-#endif
-
- if (unlikely(peer == NULL)) {
- LASSERT(peer != NULL);
- return -1;
- }
- spin_lock(&peer->mxp_lock);
- conn = peer->mxp_conn;
- /* NOTE take a ref for the duration of this function since it is called
- * when there might not be any queued txs for this peer */
- if (conn) mxlnd_conn_addref(conn); /* for duration of this function */
- spin_unlock(&peer->mxp_lock);
-
- /* do not add another ref for this tx */
-
- if (conn == NULL) {
- /* we do not have any conns */
- return -1;
- }
-
-#if MXLND_STATS
- if (time_after(jiffies, last)) {
- last = jiffies + HZ;
- CDEBUG(D_NET, "status= %s credits= %d outstanding= %d ntx_msgs= %d "
- "ntx_posted= %d ntx_data= %d data_posted= %d\n",
- mxlnd_connstatus_to_str(conn->mxk_status), conn->mxk_credits,
- conn->mxk_outstanding, conn->mxk_ntx_msgs, conn->mxk_ntx_posted,
- conn->mxk_ntx_data, conn->mxk_data_posted);
- }
-#endif
-
- /* cache peer state for asserts */
- spin_lock(&conn->mxk_lock);
- ntx_posted = conn->mxk_ntx_posted;
- credits = conn->mxk_credits;
- spin_unlock(&conn->mxk_lock);
-
- LASSERT(ntx_posted <= *kmxlnd_tunables.kmx_credits);
- LASSERT(ntx_posted >= 0);
-
- LASSERT(credits <= *kmxlnd_tunables.kmx_credits);
- LASSERT(credits >= 0);
-
- /* check number of queued msgs, ignore data */
- spin_lock(&conn->mxk_lock);
- if (conn->mxk_outstanding >= MXLND_CREDIT_HIGHWATER) {
- /* check if any txs queued that could return credits... */
- if (list_empty(&conn->mxk_tx_credit_queue) || conn->mxk_ntx_msgs == 0) {
- /* if not, send a NOOP */
- tx = mxlnd_get_idle_tx();
- if (likely(tx != NULL)) {
- tx->mxc_peer = peer;
- tx->mxc_conn = peer->mxp_conn;
- mxlnd_conn_addref(conn); /* for this tx */
- mxlnd_init_tx_msg (tx, MXLND_MSG_NOOP, 0, peer->mxp_nid);
- tx->mxc_match = mxlnd_create_match(tx, 0);
- mxlnd_peer_queue_tx_locked(tx);
- found = 1;
- goto done_locked;
- }
- }
- }
- spin_unlock(&conn->mxk_lock);
-
- /* if the peer is not ready, try to connect */
- spin_lock(&conn->mxk_lock);
- if (unlikely(conn->mxk_status == MXLND_CONN_INIT ||
- conn->mxk_status == MXLND_CONN_FAIL ||
- conn->mxk_status == MXLND_CONN_REQ)) {
- CDEBUG(D_NET, "status=%s\n", mxlnd_connstatus_to_str(conn->mxk_status));
- conn->mxk_status = MXLND_CONN_WAIT;
- spin_unlock(&conn->mxk_lock);
- mxlnd_iconnect(peer, MXLND_MASK_ICON_REQ);
- goto done;
- }
- spin_unlock(&conn->mxk_lock);
-
- spin_lock(&conn->mxk_lock);
- while (!list_empty(&conn->mxk_tx_free_queue) ||
- !list_empty(&conn->mxk_tx_credit_queue)) {
- /* We have something to send. If we have a queued tx that does not
- * require a credit (free), choose it since its completion will
- * return a credit (here or at the peer), complete a DATA or
- * CONN_REQ or CONN_ACK. */
- struct list_head *tmp_tx = NULL;
- if (!list_empty(&conn->mxk_tx_free_queue)) {
- tmp_tx = &conn->mxk_tx_free_queue;
- } else {
- tmp_tx = &conn->mxk_tx_credit_queue;
- }
- tx = list_entry(tmp_tx->next, struct kmx_ctx, mxc_list);
-
- msg_type = tx->mxc_msg_type;
-
- /* don't try to send a rx */
- LASSERT(tx->mxc_type == MXLND_REQ_TX);
-
- /* ensure that it is a valid msg type */
- LASSERT(msg_type == MXLND_MSG_CONN_REQ ||
- msg_type == MXLND_MSG_CONN_ACK ||
- msg_type == MXLND_MSG_NOOP ||
- msg_type == MXLND_MSG_EAGER ||
- msg_type == MXLND_MSG_PUT_REQ ||
- msg_type == MXLND_MSG_PUT_ACK ||
- msg_type == MXLND_MSG_PUT_DATA ||
- msg_type == MXLND_MSG_GET_REQ ||
- msg_type == MXLND_MSG_GET_DATA);
- LASSERT(tx->mxc_peer == peer);
- LASSERT(tx->mxc_nid == peer->mxp_nid);
-
- credit = mxlnd_tx_requires_credit(tx);
- if (credit) {
-
- if (conn->mxk_ntx_posted == *kmxlnd_tunables.kmx_credits) {
- CDEBUG(D_NET, "%s: posted enough\n",
- libcfs_nid2str(peer->mxp_nid));
- goto done_locked;
- }
-
- if (conn->mxk_credits == 0) {
- CDEBUG(D_NET, "%s: no credits\n",
- libcfs_nid2str(peer->mxp_nid));
- goto done_locked;
- }
-
- if (conn->mxk_credits == 1 && /* last credit reserved for */
- conn->mxk_outstanding == 0) { /* giving back credits */
- CDEBUG(D_NET, "%s: not using last credit\n",
- libcfs_nid2str(peer->mxp_nid));
- goto done_locked;
- }
- }
-
- if (unlikely(conn->mxk_status != MXLND_CONN_READY)) {
- if ( ! (msg_type == MXLND_MSG_CONN_REQ ||
- msg_type == MXLND_MSG_CONN_ACK)) {
- CDEBUG(D_NET, "peer status is %s for tx 0x%llx (%s)\n",
- mxlnd_connstatus_to_str(conn->mxk_status),
- tx->mxc_cookie,
- mxlnd_msgtype_to_str(tx->mxc_msg_type));
- if (conn->mxk_status == MXLND_CONN_DISCONNECT) {
- list_del_init(&tx->mxc_list);
- tx->mxc_status.code = -ECONNABORTED;
- mxlnd_put_idle_tx(tx);
- mxlnd_conn_decref(conn);
- }
- goto done_locked;
- }
- }
-
- list_del_init(&tx->mxc_list);
-
- /* handle credits, etc now while we have the lock to avoid races */
- if (credit) {
- conn->mxk_credits--;
- conn->mxk_ntx_posted++;
- }
- if (msg_type != MXLND_MSG_PUT_DATA &&
- msg_type != MXLND_MSG_GET_DATA) {
- if (msg_type != MXLND_MSG_CONN_REQ &&
- msg_type != MXLND_MSG_CONN_ACK) {
- conn->mxk_ntx_msgs--;
- }
- }
- if (tx->mxc_incarnation == 0 &&
- conn->mxk_incarnation != 0) {
- tx->mxc_incarnation = conn->mxk_incarnation;
- }
- spin_unlock(&conn->mxk_lock);
-
- /* if this is a NOOP and (1) mxp_conn->mxk_outstanding < CREDIT_HIGHWATER
- * or (2) there is a non-DATA msg that can return credits in the
- * queue, then drop this duplicate NOOP */
- if (unlikely(msg_type == MXLND_MSG_NOOP)) {
- spin_lock(&conn->mxk_lock);
- if ((conn->mxk_outstanding < MXLND_CREDIT_HIGHWATER) ||
- (conn->mxk_ntx_msgs >= 1)) {
- conn->mxk_credits++;
- conn->mxk_ntx_posted--;
- spin_unlock(&conn->mxk_lock);
- /* redundant NOOP */
- mxlnd_put_idle_tx(tx);
- mxlnd_conn_decref(conn);
- CDEBUG(D_NET, "%s: redundant noop\n",
- libcfs_nid2str(peer->mxp_nid));
- found = 1;
- goto done;
- }
- spin_unlock(&conn->mxk_lock);
- }
-
- found = 1;
- if (likely((msg_type != MXLND_MSG_PUT_DATA) &&
- (msg_type != MXLND_MSG_GET_DATA))) {
- mxlnd_pack_msg(tx);
- }
-
- //ret = -ECONNABORTED;
- mxret = MX_SUCCESS;
-
- spin_lock(&conn->mxk_lock);
- status = conn->mxk_status;
- spin_unlock(&conn->mxk_lock);
-
- if (likely((status == MXLND_CONN_READY) ||
- (msg_type == MXLND_MSG_CONN_REQ) ||
- (msg_type == MXLND_MSG_CONN_ACK))) {
- ret = 0;
- if (msg_type != MXLND_MSG_CONN_REQ &&
- msg_type != MXLND_MSG_CONN_ACK) {
- /* add to the pending list */
- ret = mxlnd_q_pending_ctx(tx);
- if (ret == -1) {
- /* FIXME the conn is disconnected, now what? */
- }
- } else {
- /* CONN_REQ/ACK */
- tx->mxc_state = MXLND_CTX_PENDING;
- }
-
- if (ret == 0) {
- if (likely(msg_type != MXLND_MSG_PUT_DATA &&
- msg_type != MXLND_MSG_GET_DATA)) {
- /* send a msg style tx */
- LASSERT(tx->mxc_nseg == 1);
- LASSERT(tx->mxc_pin_type == MX_PIN_PHYSICAL);
- CDEBUG(D_NET, "sending %s 0x%llx\n",
- mxlnd_msgtype_to_str(msg_type),
- tx->mxc_cookie);
- mxret = mx_kisend(kmxlnd_data.kmx_endpt,
- &tx->mxc_seg,
- tx->mxc_nseg,
- tx->mxc_pin_type,
- conn->mxk_epa,
- tx->mxc_match,
- (void *) tx,
- &tx->mxc_mxreq);
- } else {
- /* send a DATA tx */
- spin_lock(&conn->mxk_lock);
- conn->mxk_ntx_data--;
- conn->mxk_data_posted++;
- spin_unlock(&conn->mxk_lock);
- CDEBUG(D_NET, "sending %s 0x%llx\n",
- mxlnd_msgtype_to_str(msg_type),
- tx->mxc_cookie);
- mxret = mx_kisend(kmxlnd_data.kmx_endpt,
- tx->mxc_seg_list,
- tx->mxc_nseg,
- tx->mxc_pin_type,
- conn->mxk_epa,
- tx->mxc_match,
- (void *) tx,
- &tx->mxc_mxreq);
- }
- } else {
- mxret = MX_CONNECTION_FAILED;
- }
- if (likely(mxret == MX_SUCCESS)) {
- ret = 0;
- } else {
- CDEBUG(D_NETERROR, "mx_kisend() failed with %s (%d) "
- "sending to %s\n", mx_strerror(mxret), (int) mxret,
- libcfs_nid2str(peer->mxp_nid));
- /* NOTE mx_kisend() only fails if there are not enough
- * resources. Do not change the connection status. */
- if (mxret == MX_NO_RESOURCES) {
- tx->mxc_status.code = -ENOMEM;
- } else {
- tx->mxc_status.code = -ECONNABORTED;
- }
- if (credit) {
- spin_lock(&conn->mxk_lock);
- conn->mxk_ntx_posted--;
- conn->mxk_credits++;
- spin_unlock(&conn->mxk_lock);
- } else if (msg_type == MXLND_MSG_PUT_DATA ||
- msg_type == MXLND_MSG_GET_DATA) {
- spin_lock(&conn->mxk_lock);
- conn->mxk_data_posted--;
- spin_unlock(&conn->mxk_lock);
- }
- if (msg_type != MXLND_MSG_PUT_DATA &&
- msg_type != MXLND_MSG_GET_DATA &&
- msg_type != MXLND_MSG_CONN_REQ &&
- msg_type != MXLND_MSG_CONN_ACK) {
- spin_lock(&conn->mxk_lock);
- conn->mxk_outstanding += tx->mxc_msg->mxm_credits;
- spin_unlock(&conn->mxk_lock);
- }
- if (msg_type != MXLND_MSG_CONN_REQ &&
- msg_type != MXLND_MSG_CONN_ACK) {
- /* remove from the pending list */
- mxlnd_deq_pending_ctx(tx);
- }
- mxlnd_put_idle_tx(tx);
- mxlnd_conn_decref(conn);
- }
- }
- spin_lock(&conn->mxk_lock);
- }
-done_locked:
- spin_unlock(&conn->mxk_lock);
-done:
- mxlnd_conn_decref(conn); /* drop ref taken at start of function */
- return found;
-}
-
-
-/**
- * mxlnd_handle_tx_completion - a tx completed, progress or complete the msg
- * @ctx - the tx descriptor
- *
- * Determine which type of send request it was and start the next step, if needed,
- * or, if done, signal completion to LNET. After we are done, put back on the
- * idle tx list.
- */
-void
-mxlnd_handle_tx_completion(struct kmx_ctx *tx)
-{
- int failed = (tx->mxc_status.code != MX_STATUS_SUCCESS);
- struct kmx_msg *msg = tx->mxc_msg;
- struct kmx_peer *peer = tx->mxc_peer;
- struct kmx_conn *conn = tx->mxc_conn;
- u8 type = tx->mxc_msg_type;
- int credit = mxlnd_tx_requires_credit(tx);
- u64 cookie = tx->mxc_cookie;
-
- CDEBUG(D_NET, "entering %s (0x%llx):\n",
- mxlnd_msgtype_to_str(tx->mxc_msg_type), cookie);
-
- if (unlikely(conn == NULL)) {
- mx_get_endpoint_addr_context(tx->mxc_status.source, (void **) &peer);
- conn = peer->mxp_conn;
- if (conn != NULL) {
- /* do not add a ref for the tx, it was set before sending */
- tx->mxc_conn = conn;
- tx->mxc_peer = conn->mxk_peer;
- }
- }
- LASSERT (peer != NULL);
- LASSERT (conn != NULL);
-
- if (type != MXLND_MSG_PUT_DATA && type != MXLND_MSG_GET_DATA) {
- LASSERT (type == msg->mxm_type);
- }
-
- if (failed) {
- tx->mxc_status.code = -EIO;
- } else {
- spin_lock(&conn->mxk_lock);
- conn->mxk_last_tx = jiffies;
- spin_unlock(&conn->mxk_lock);
- }
-
- switch (type) {
-
- case MXLND_MSG_GET_DATA:
- spin_lock(&conn->mxk_lock);
- if (conn->mxk_incarnation == tx->mxc_incarnation) {
- conn->mxk_outstanding++;
- conn->mxk_data_posted--;
- }
- spin_unlock(&conn->mxk_lock);
- break;
-
- case MXLND_MSG_PUT_DATA:
- spin_lock(&conn->mxk_lock);
- if (conn->mxk_incarnation == tx->mxc_incarnation) {
- conn->mxk_data_posted--;
- }
- spin_unlock(&conn->mxk_lock);
- break;
-
- case MXLND_MSG_NOOP:
- case MXLND_MSG_PUT_REQ:
- case MXLND_MSG_PUT_ACK:
- case MXLND_MSG_GET_REQ:
- case MXLND_MSG_EAGER:
- //case MXLND_MSG_NAK:
- break;
-
- case MXLND_MSG_CONN_ACK:
- if (peer->mxp_incompatible) {
- /* we sent our params, now close this conn */
- mxlnd_conn_disconnect(conn, 0, 1);
- }
- case MXLND_MSG_CONN_REQ:
- if (failed) {
- CDEBUG(D_NETERROR, "handle_tx_completion(): %s "
- "failed with %s (%d) to %s\n",
- type == MXLND_MSG_CONN_REQ ? "CONN_REQ" : "CONN_ACK",
- mx_strstatus(tx->mxc_status.code),
- tx->mxc_status.code,
- libcfs_nid2str(tx->mxc_nid));
- if (!peer->mxp_incompatible) {
- spin_lock(&conn->mxk_lock);
- conn->mxk_status = MXLND_CONN_FAIL;
- spin_unlock(&conn->mxk_lock);
- }
- }
- break;
-
- default:
- CDEBUG(D_NETERROR, "Unknown msg type of %d\n", type);
- LBUG();
- }
-
- if (credit) {
- spin_lock(&conn->mxk_lock);
- if (conn->mxk_incarnation == tx->mxc_incarnation) {
- conn->mxk_ntx_posted--;
- }
- spin_unlock(&conn->mxk_lock);
- }
-
- CDEBUG(D_NET, "leaving mxlnd_handle_tx_completion()\n");
- mxlnd_put_idle_tx(tx);
- mxlnd_conn_decref(conn);
-
- mxlnd_check_sends(peer);
-
- return;
-}
-
-void
-mxlnd_handle_rx_completion(struct kmx_ctx *rx)
-{
- int ret = 0;
- int repost = 1;
- int credit = 1;
- u32 nob = rx->mxc_status.xfer_length;
- u64 bits = rx->mxc_status.match_info;
- struct kmx_msg *msg = rx->mxc_msg;
- struct kmx_peer *peer = rx->mxc_peer;
- struct kmx_conn *conn = rx->mxc_conn;
- u8 type = rx->mxc_msg_type;
- u64 seq = 0LL;
- lnet_msg_t *lntmsg[2];
- int result = 0;
- u64 nic_id = 0LL;
- u32 ep_id = 0;
- int peer_ref = 0;
- int conn_ref = 0;
- int incompatible = 0;
-
- /* NOTE We may only know the peer's nid if it is a PUT_REQ, GET_REQ,
- * failed GET reply, CONN_REQ, or a CONN_ACK */
-
- /* NOTE peer may still be NULL if it is a new peer and
- * conn may be NULL if this is a re-connect */
- if (likely(peer != NULL && conn != NULL)) {
- /* we have a reference on the conn */
- conn_ref = 1;
- } else if (peer != NULL && conn == NULL) {
- /* we have a reference on the peer */
- peer_ref = 1;
- } else if (peer == NULL && conn != NULL) {
- /* fatal error */
- CDEBUG(D_NETERROR, "rx has conn but no peer\n");
- LBUG();
- } /* else peer and conn == NULL */
-
-#if 0
- if (peer == NULL || conn == NULL) {
- /* if the peer was disconnected, the peer may exist but
- * not have any valid conns */
- decref = 0; /* no peer means no ref was taken for this rx */
- }
-#endif
-
- if (conn == NULL && peer != NULL) {
- spin_lock(&peer->mxp_lock);
- conn = peer->mxp_conn;
- if (conn) {
- mxlnd_conn_addref(conn); /* conn takes ref... */
- mxlnd_peer_decref(peer); /* from peer */
- conn_ref = 1;
- peer_ref = 0;
- }
- spin_unlock(&peer->mxp_lock);
- rx->mxc_conn = conn;
- }
-
-#if MXLND_DEBUG
- CDEBUG(D_NET, "receiving msg bits=0x%llx nob=%d peer=0x%p\n", bits, nob, peer);
-#endif
-
- lntmsg[0] = NULL;
- lntmsg[1] = NULL;
-
- if (rx->mxc_status.code != MX_STATUS_SUCCESS) {
- CDEBUG(D_NETERROR, "rx from %s failed with %s (%d)\n",
- libcfs_nid2str(rx->mxc_nid),
- mx_strstatus(rx->mxc_status.code),
- (int) rx->mxc_status.code);
- credit = 0;
- goto cleanup;
- }
-
- if (nob == 0) {
- /* this may be a failed GET reply */
- if (type == MXLND_MSG_GET_DATA) {
- bits = rx->mxc_status.match_info & 0x0FF0000000000000LL;
- ret = (u32) (bits>>52);
- lntmsg[0] = rx->mxc_lntmsg[0];
- result = -ret;
- goto cleanup;
- } else {
- /* we had a rx complete with 0 bytes (no hdr, nothing) */
- CDEBUG(D_NETERROR, "rx from %s returned with 0 bytes\n",
- libcfs_nid2str(rx->mxc_nid));
- goto cleanup;
- }
- }
-
- /* NOTE PUT_DATA and GET_DATA do not have mxc_msg, do not call unpack() */
- if (type == MXLND_MSG_PUT_DATA) {
- result = rx->mxc_status.code;
- lntmsg[0] = rx->mxc_lntmsg[0];
- goto cleanup;
- } else if (type == MXLND_MSG_GET_DATA) {
- result = rx->mxc_status.code;
- lntmsg[0] = rx->mxc_lntmsg[0];
- lntmsg[1] = rx->mxc_lntmsg[1];
- goto cleanup;
- }
-
- ret = mxlnd_unpack_msg(msg, nob);
- if (ret != 0) {
- CDEBUG(D_NETERROR, "Error %d unpacking rx from %s\n",
- ret, libcfs_nid2str(rx->mxc_nid));
- goto cleanup;
- }
- rx->mxc_nob = nob;
- type = msg->mxm_type;
- seq = msg->mxm_seq;
-
- if (type != MXLND_MSG_CONN_REQ &&
- (!lnet_ptlcompat_matchnid(rx->mxc_nid, msg->mxm_srcnid) ||
- !lnet_ptlcompat_matchnid(kmxlnd_data.kmx_ni->ni_nid, msg->mxm_dstnid))) {
- CDEBUG(D_NETERROR, "rx with mismatched NID (type %s) (my nid is "
- "0x%llx and rx msg dst is 0x%llx)\n",
- mxlnd_msgtype_to_str(type), kmxlnd_data.kmx_ni->ni_nid,
- msg->mxm_dstnid);
- goto cleanup;
- }
-
- if (type != MXLND_MSG_CONN_REQ && type != MXLND_MSG_CONN_ACK) {
- if ((conn != NULL && msg->mxm_srcstamp != conn->mxk_incarnation) ||
- msg->mxm_dststamp != kmxlnd_data.kmx_incarnation) {
- if (conn != NULL) {
- CDEBUG(D_NETERROR, "Stale rx from %s with type %s "
- "(mxm_srcstamp (%lld) != mxk_incarnation (%lld) "
- "|| mxm_dststamp (%lld) != kmx_incarnation (%lld))\n",
- libcfs_nid2str(rx->mxc_nid), mxlnd_msgtype_to_str(type),
- msg->mxm_srcstamp, conn->mxk_incarnation,
- msg->mxm_dststamp, kmxlnd_data.kmx_incarnation);
- } else {
- CDEBUG(D_NETERROR, "Stale rx from %s with type %s "
- "mxm_dststamp (%lld) != kmx_incarnation (%lld))\n",
- libcfs_nid2str(rx->mxc_nid), mxlnd_msgtype_to_str(type),
- msg->mxm_dststamp, kmxlnd_data.kmx_incarnation);
- }
- credit = 0;
- goto cleanup;
- }
- }
-
- CDEBUG(D_NET, "Received %s with %d credits\n",
- mxlnd_msgtype_to_str(type), msg->mxm_credits);
-
- if (msg->mxm_type != MXLND_MSG_CONN_REQ &&
- msg->mxm_type != MXLND_MSG_CONN_ACK) {
- LASSERT(peer != NULL);
- LASSERT(conn != NULL);
- if (msg->mxm_credits != 0) {
- spin_lock(&conn->mxk_lock);
- if (msg->mxm_srcstamp == conn->mxk_incarnation) {
- if ((conn->mxk_credits + msg->mxm_credits) >
- *kmxlnd_tunables.kmx_credits) {
- CDEBUG(D_NETERROR, "mxk_credits %d mxm_credits %d\n",
- conn->mxk_credits, msg->mxm_credits);
- }
- conn->mxk_credits += msg->mxm_credits;
- LASSERT(conn->mxk_credits >= 0);
- LASSERT(conn->mxk_credits <= *kmxlnd_tunables.kmx_credits);
- }
- spin_unlock(&conn->mxk_lock);
- }
- }
-
- CDEBUG(D_NET, "switch %s for rx (0x%llx)\n", mxlnd_msgtype_to_str(type), seq);
- switch (type) {
- case MXLND_MSG_NOOP:
- break;
-
- case MXLND_MSG_EAGER:
- ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.eager.mxem_hdr,
- msg->mxm_srcnid, rx, 0);
- repost = ret < 0;
- break;
-
- case MXLND_MSG_PUT_REQ:
- ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.put_req.mxprm_hdr,
- msg->mxm_srcnid, rx, 1);
- repost = ret < 0;
- break;
-
- case MXLND_MSG_PUT_ACK: {
- u64 cookie = (u64) msg->mxm_u.put_ack.mxpam_dst_cookie;
- if (cookie > MXLND_MAX_COOKIE) {
- CDEBUG(D_NETERROR, "NAK for msg_type %d from %s\n", rx->mxc_msg_type,
- libcfs_nid2str(rx->mxc_nid));
- result = -((cookie >> 52) & 0xff);
- lntmsg[0] = rx->mxc_lntmsg[0];
- } else {
- mxlnd_send_data(kmxlnd_data.kmx_ni, rx->mxc_lntmsg[0],
- rx->mxc_peer, MXLND_MSG_PUT_DATA,
- rx->mxc_msg->mxm_u.put_ack.mxpam_dst_cookie);
- }
- /* repost == 1 */
- break;
- }
- case MXLND_MSG_GET_REQ:
- ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.get_req.mxgrm_hdr,
- msg->mxm_srcnid, rx, 1);
- repost = ret < 0;
- break;
-
- case MXLND_MSG_CONN_REQ:
- if (!lnet_ptlcompat_matchnid(kmxlnd_data.kmx_ni->ni_nid, msg->mxm_dstnid)) {
- CDEBUG(D_NETERROR, "Can't accept %s: bad dst nid %s\n",
- libcfs_nid2str(msg->mxm_srcnid),
- libcfs_nid2str(msg->mxm_dstnid));
- goto cleanup;
- }
- if (msg->mxm_u.conn_req.mxcrm_queue_depth != *kmxlnd_tunables.kmx_credits) {
- CDEBUG(D_NETERROR, "Can't accept %s: incompatible queue depth "
- "%d (%d wanted)\n",
- libcfs_nid2str(msg->mxm_srcnid),
- msg->mxm_u.conn_req.mxcrm_queue_depth,
- *kmxlnd_tunables.kmx_credits);
- incompatible = 1;
- }
- if (msg->mxm_u.conn_req.mxcrm_eager_size != MXLND_EAGER_SIZE) {
- CDEBUG(D_NETERROR, "Can't accept %s: incompatible EAGER size "
- "%d (%d wanted)\n",
- libcfs_nid2str(msg->mxm_srcnid),
- msg->mxm_u.conn_req.mxcrm_eager_size,
- (int) MXLND_EAGER_SIZE);
- incompatible = 1;
- }
- if (peer == NULL) {
- peer = mxlnd_find_peer_by_nid(msg->mxm_srcnid); /* adds peer ref */
- if (peer == NULL) {
- int hash = 0;
- struct kmx_peer *existing_peer = NULL;
- hash = mxlnd_nid_to_hash(msg->mxm_srcnid);
-
- mx_decompose_endpoint_addr(rx->mxc_status.source,
- &nic_id, &ep_id);
- rx->mxc_nid = msg->mxm_srcnid;
-
- /* adds conn ref for peer and one for this function */
- ret = mxlnd_peer_alloc(&peer, msg->mxm_srcnid);
- if (ret != 0) {
- goto cleanup;
- }
- LASSERT(peer->mxp_host->mxh_ep_id == ep_id);
- write_lock(&kmxlnd_data.kmx_peers_lock);
- existing_peer = mxlnd_find_peer_by_nid_locked(msg->mxm_srcnid);
- if (existing_peer) {
- mxlnd_conn_decref(peer->mxp_conn);
- mxlnd_peer_decref(peer);
- peer = existing_peer;
- mxlnd_conn_addref(peer->mxp_conn);
- } else {
- list_add_tail(&peer->mxp_peers,
- &kmxlnd_data.kmx_peers[hash]);
- write_unlock(&kmxlnd_data.kmx_peers_lock);
- atomic_inc(&kmxlnd_data.kmx_npeers);
- }
- } else {
- ret = mxlnd_conn_alloc(&conn, peer); /* adds 2nd ref */
- mxlnd_peer_decref(peer); /* drop ref taken above */
- if (ret != 0) {
- CDEBUG(D_NETERROR, "Cannot allocate mxp_conn\n");
- goto cleanup;
- }
- }
- conn_ref = 1; /* peer/conn_alloc() added ref for this function */
- conn = peer->mxp_conn;
- } else {
- struct kmx_conn *old_conn = conn;
-
- /* do not call mx_disconnect() */
- mxlnd_conn_disconnect(old_conn, 0, 0);
-
- /* the ref for this rx was taken on the old_conn */
- mxlnd_conn_decref(old_conn);
-
- /* This allocs a conn, points peer->mxp_conn to this one.
- * The old conn is still on the peer->mxp_conns list.
- * As the pending requests complete, they will call
- * conn_decref() which will eventually free it. */
- ret = mxlnd_conn_alloc(&conn, peer);
- if (ret != 0) {
- CDEBUG(D_NETERROR, "Cannot allocate peer->mxp_conn\n");
- goto cleanup;
- }
- /* conn_alloc() adds one ref for the peer and one for this function */
- conn_ref = 1;
- }
- spin_lock(&peer->mxp_lock);
- peer->mxp_incarnation = msg->mxm_srcstamp;
- peer->mxp_incompatible = incompatible;
- spin_unlock(&peer->mxp_lock);
- spin_lock(&conn->mxk_lock);
- conn->mxk_incarnation = msg->mxm_srcstamp;
- conn->mxk_status = MXLND_CONN_WAIT;
- spin_unlock(&conn->mxk_lock);
-
- /* handle_conn_ack() will create the CONN_ACK msg */
- mxlnd_iconnect(peer, MXLND_MASK_ICON_ACK);
-
- break;
-
- case MXLND_MSG_CONN_ACK:
- if (!lnet_ptlcompat_matchnid(kmxlnd_data.kmx_ni->ni_nid, msg->mxm_dstnid)) {
- CDEBUG(D_NETERROR, "Can't accept CONN_ACK from %s: "
- "bad dst nid %s\n", libcfs_nid2str(msg->mxm_srcnid),
- libcfs_nid2str(msg->mxm_dstnid));
- ret = -1;
- goto failed;
- }
- if (msg->mxm_u.conn_req.mxcrm_queue_depth != *kmxlnd_tunables.kmx_credits) {
- CDEBUG(D_NETERROR, "Can't accept CONN_ACK from %s: "
- "incompatible queue depth %d (%d wanted)\n",
- libcfs_nid2str(msg->mxm_srcnid),
- msg->mxm_u.conn_req.mxcrm_queue_depth,
- *kmxlnd_tunables.kmx_credits);
- spin_lock(&conn->mxk_lock);
- conn->mxk_status = MXLND_CONN_FAIL;
- spin_unlock(&conn->mxk_lock);
- incompatible = 1;
- ret = -1;
- }
- if (msg->mxm_u.conn_req.mxcrm_eager_size != MXLND_EAGER_SIZE) {
- CDEBUG(D_NETERROR, "Can't accept CONN_ACK from %s: "
- "incompatible EAGER size %d (%d wanted)\n",
- libcfs_nid2str(msg->mxm_srcnid),
- msg->mxm_u.conn_req.mxcrm_eager_size,
- (int) MXLND_EAGER_SIZE);
- spin_lock(&conn->mxk_lock);
- conn->mxk_status = MXLND_CONN_FAIL;
- spin_unlock(&conn->mxk_lock);
- incompatible = 1;
- ret = -1;
- }
- spin_lock(&peer->mxp_lock);
- peer->mxp_incarnation = msg->mxm_srcstamp;
- peer->mxp_incompatible = incompatible;
- spin_unlock(&peer->mxp_lock);
- spin_lock(&conn->mxk_lock);
- conn->mxk_credits = *kmxlnd_tunables.kmx_credits;
- conn->mxk_outstanding = 0;
- conn->mxk_incarnation = msg->mxm_srcstamp;
- conn->mxk_timeout = 0;
- if (!incompatible) {
- conn->mxk_status = MXLND_CONN_READY;
- }
- spin_unlock(&conn->mxk_lock);
- if (incompatible) mxlnd_conn_disconnect(conn, 0, 1);
- break;
-
- default:
- CDEBUG(D_NETERROR, "Bad MXLND message type %x from %s\n", msg->mxm_type,
- libcfs_nid2str(rx->mxc_nid));
- ret = -EPROTO;
- break;
- }
-
-failed:
- if (ret < 0) {
- MXLND_PRINT("setting PEER_CONN_FAILED\n");
- spin_lock(&conn->mxk_lock);
- conn->mxk_status = MXLND_CONN_FAIL;
- spin_unlock(&conn->mxk_lock);
- }
-
-cleanup:
- if (conn != NULL) {
- spin_lock(&conn->mxk_lock);
- conn->mxk_last_rx = cfs_time_current(); /* jiffies */
- spin_unlock(&conn->mxk_lock);
- }
-
- if (repost) {
- /* lnet_parse() failed, etc., repost now */
- mxlnd_put_idle_rx(rx);
- if (conn != NULL && credit == 1) {
- if (type == MXLND_MSG_PUT_DATA) {
- spin_lock(&conn->mxk_lock);
- conn->mxk_outstanding++;
- spin_unlock(&conn->mxk_lock);
- } else if (type != MXLND_MSG_GET_DATA &&
- (type == MXLND_MSG_EAGER ||
- type == MXLND_MSG_PUT_REQ ||
- type == MXLND_MSG_NOOP)) {
- spin_lock(&conn->mxk_lock);
- conn->mxk_outstanding++;
- spin_unlock(&conn->mxk_lock);
- }
- }
- if (conn_ref) mxlnd_conn_decref(conn);
- LASSERT(peer_ref == 0);
- }
-
- if (type == MXLND_MSG_PUT_DATA || type == MXLND_MSG_GET_DATA) {
- CDEBUG(D_NET, "leaving for rx (0x%llx)\n", bits);
- } else {
- CDEBUG(D_NET, "leaving for rx (0x%llx)\n", seq);
- }
-
- if (lntmsg[0] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[0], result);
- if (lntmsg[1] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[1], result);
-
- if (conn != NULL && credit == 1) mxlnd_check_sends(peer);
-
- return;
-}
-
-
-
-void
-mxlnd_handle_conn_req(struct kmx_peer *peer, mx_status_t status)
-{
- struct kmx_ctx *tx = NULL;
- struct kmx_msg *txmsg = NULL;
- struct kmx_conn *conn = peer->mxp_conn;
-
- /* a conn ref was taken when calling mx_iconnect(),
- * hold it until CONN_REQ or CONN_ACK completes */
-
- CDEBUG(D_NET, "entering\n");
- if (status.code != MX_STATUS_SUCCESS) {
- CDEBUG(D_NETERROR, "mx_iconnect() failed with %s (%d) to %s\n",
- mx_strstatus(status.code), status.code,
- libcfs_nid2str(peer->mxp_nid));
- spin_lock(&conn->mxk_lock);
- conn->mxk_status = MXLND_CONN_FAIL;
- spin_unlock(&conn->mxk_lock);
-
- if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) {
- struct kmx_conn *new_conn = NULL;
- CDEBUG(D_NETERROR, "timeout, calling conn_disconnect()\n");
- mxlnd_conn_disconnect(conn, 0, 1);
- mxlnd_conn_alloc(&new_conn, peer); /* adds a ref for this function */
- mxlnd_conn_decref(new_conn); /* which we no longer need */
- spin_lock(&peer->mxp_lock);
- peer->mxp_reconnect_time = 0;
- spin_unlock(&peer->mxp_lock);
- }
-
- mxlnd_conn_decref(conn);
- return;
- }
-
- spin_lock(&conn->mxk_lock);
- conn->mxk_epa = status.source;
- spin_unlock(&conn->mxk_lock);
- /* NOTE we are holding a ref on the conn which has a ref on the peer,
- * we should not need to lock the peer */
- mx_set_endpoint_addr_context(conn->mxk_epa, (void *) peer);
-
- /* mx_iconnect() succeeded, reset delay to 0 */
- spin_lock(&peer->mxp_lock);
- peer->mxp_reconnect_time = 0;
- spin_unlock(&peer->mxp_lock);
-
- /* marshal CONN_REQ msg */
- /* we are still using the conn ref from iconnect() - do not take another */
- tx = mxlnd_get_idle_tx();
- if (tx == NULL) {
- CDEBUG(D_NETERROR, "Can't allocate CONN_REQ tx for %s\n",
- libcfs_nid2str(peer->mxp_nid));
- spin_lock(&conn->mxk_lock);
- conn->mxk_status = MXLND_CONN_FAIL;
- spin_unlock(&conn->mxk_lock);
- mxlnd_conn_decref(conn);
- return;
- }
-
- tx->mxc_peer = peer;
- tx->mxc_conn = conn;
- mxlnd_init_tx_msg (tx, MXLND_MSG_CONN_REQ, sizeof(kmx_connreq_msg_t), peer->mxp_nid);
- txmsg = tx->mxc_msg;
- txmsg->mxm_u.conn_req.mxcrm_queue_depth = *kmxlnd_tunables.kmx_credits;
- txmsg->mxm_u.conn_req.mxcrm_eager_size = MXLND_EAGER_SIZE;
- tx->mxc_match = mxlnd_create_match(tx, 0);
-
- CDEBUG(D_NET, "sending MXLND_MSG_CONN_REQ\n");
- mxlnd_queue_tx(tx);
- return;
-}
-
-void
-mxlnd_handle_conn_ack(struct kmx_peer *peer, mx_status_t status)
-{
- struct kmx_ctx *tx = NULL;
- struct kmx_msg *txmsg = NULL;
- struct kmx_conn *conn = peer->mxp_conn;
-
- /* a conn ref was taken when calling mx_iconnect(),
- * hold it until CONN_REQ or CONN_ACK completes */
-
- CDEBUG(D_NET, "entering\n");
- if (status.code != MX_STATUS_SUCCESS) {
- CDEBUG(D_NETERROR, "mx_iconnect() failed for CONN_ACK with %s (%d) "
- "to %s mxp_nid = 0x%llx mxp_nic_id = 0x%0llx mxh_ep_id = %d\n",
- mx_strstatus(status.code), status.code,
- libcfs_nid2str(peer->mxp_nid),
- peer->mxp_nid,
- peer->mxp_nic_id,
- peer->mxp_host->mxh_ep_id);
- spin_lock(&conn->mxk_lock);
- conn->mxk_status = MXLND_CONN_FAIL;
- spin_unlock(&conn->mxk_lock);
-
- if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) {
- struct kmx_conn *new_conn = NULL;
- CDEBUG(D_NETERROR, "timeout, calling conn_disconnect()\n");
- mxlnd_conn_disconnect(conn, 0, 1);
- mxlnd_conn_alloc(&new_conn, peer); /* adds ref for
- this function... */
- mxlnd_conn_decref(new_conn); /* which we no longer need */
- spin_lock(&peer->mxp_lock);
- peer->mxp_reconnect_time = 0;
- spin_unlock(&peer->mxp_lock);
- }
-
- mxlnd_conn_decref(conn);
- return;
- }
- spin_lock(&conn->mxk_lock);
- conn->mxk_epa = status.source;
- if (likely(!peer->mxp_incompatible)) {
- conn->mxk_status = MXLND_CONN_READY;
- }
- spin_unlock(&conn->mxk_lock);
- /* NOTE we are holding a ref on the conn which has a ref on the peer,
- * we should not have to lock the peer */
- mx_set_endpoint_addr_context(conn->mxk_epa, (void *) peer);
-
- /* mx_iconnect() succeeded, reset delay to 0 */
- spin_lock(&peer->mxp_lock);
- peer->mxp_reconnect_time = 0;
- spin_unlock(&peer->mxp_lock);
-
- /* marshal CONN_ACK msg */
- tx = mxlnd_get_idle_tx();
- if (tx == NULL) {
- CDEBUG(D_NETERROR, "Can't allocate CONN_ACK tx for %s\n",
- libcfs_nid2str(peer->mxp_nid));
- spin_lock(&conn->mxk_lock);
- conn->mxk_status = MXLND_CONN_FAIL;
- spin_unlock(&conn->mxk_lock);
- mxlnd_conn_decref(conn);
- return;
- }
-
- tx->mxc_peer = peer;
- tx->mxc_conn = conn;
- CDEBUG(D_NET, "sending MXLND_MSG_CONN_ACK\n");
- mxlnd_init_tx_msg (tx, MXLND_MSG_CONN_ACK, sizeof(kmx_connreq_msg_t), peer->mxp_nid);
- txmsg = tx->mxc_msg;
- txmsg->mxm_u.conn_req.mxcrm_queue_depth = *kmxlnd_tunables.kmx_credits;
- txmsg->mxm_u.conn_req.mxcrm_eager_size = MXLND_EAGER_SIZE;
- tx->mxc_match = mxlnd_create_match(tx, 0);
-
- mxlnd_queue_tx(tx);
- return;
-}
-
-/**
- * mxlnd_request_waitd - the MX request completion thread(s)
- * @arg - thread id (as a void *)
- *
- * This thread waits for a MX completion and then completes the request.
- * We will create one thread per CPU.
- */
-int
-mxlnd_request_waitd(void *arg)
-{
- long id = (long) arg;
- char name[24];
- __u32 result = 0;
- mx_return_t mxret = MX_SUCCESS;
- mx_status_t status;
- struct kmx_ctx *ctx = NULL;
- enum kmx_req_state req_type = MXLND_REQ_TX;
- struct kmx_peer *peer = NULL;
- struct kmx_conn *conn = NULL;
-#if MXLND_POLLING
- int count = 0;
-#endif
-
- memset(name, 0, sizeof(name));
- snprintf(name, sizeof(name), "mxlnd_request_waitd_%02ld", id);
- cfs_daemonize(name);
- //cfs_block_allsigs();
-
- memset(&status, 0, sizeof(status));
-
- CDEBUG(D_NET, "%s starting\n", name);
-
- while (!kmxlnd_data.kmx_shutdown) {
- mxret = MX_SUCCESS;
- result = 0;
-#if MXLND_POLLING
- if (id == 0 && count++ < *kmxlnd_tunables.kmx_polling) {
- mxret = mx_test_any(kmxlnd_data.kmx_endpt, 0LL, 0LL,
- &status, &result);
- } else {
- count = 0;
- mxret = mx_wait_any(kmxlnd_data.kmx_endpt, MXLND_WAIT_TIMEOUT,
- 0LL, 0LL, &status, &result);
- }
-#else
- mxret = mx_wait_any(kmxlnd_data.kmx_endpt, MXLND_WAIT_TIMEOUT,
- 0LL, 0LL, &status, &result);
-#endif
- if (unlikely(kmxlnd_data.kmx_shutdown))
- break;
-
- if (result != 1) {
- /* nothing completed... */
- continue;
- }
-
- if (status.code != MX_STATUS_SUCCESS) {
- CDEBUG(D_NETERROR, "wait_any() failed with %s (%d) with "
- "match_info 0x%llx and length %d\n",
- mx_strstatus(status.code), status.code,
- (u64) status.match_info, status.msg_length);
- }
-
- /* This may be a mx_iconnect() request completing,
- * check the bit mask for CONN_REQ and CONN_ACK */
- if (status.match_info == MXLND_MASK_ICON_REQ ||
- status.match_info == MXLND_MASK_ICON_ACK) {
- peer = (struct kmx_peer*) status.context;
- if (status.match_info == MXLND_MASK_ICON_REQ) {
- mxlnd_handle_conn_req(peer, status);
- } else {
- mxlnd_handle_conn_ack(peer, status);
- }
- continue;
- }
-
- /* This must be a tx or rx */
-
- /* NOTE: if this is a RX from the unexpected callback, it may
- * have very little info. If we dropped it in unexpected_recv(),
- * it will not have a context. If so, ignore it. */
- ctx = (struct kmx_ctx *) status.context;
- if (ctx != NULL) {
-
- req_type = ctx->mxc_type;
- conn = ctx->mxc_conn; /* this may be NULL */
- mxlnd_deq_pending_ctx(ctx);
-
- /* copy status to ctx->mxc_status */
- memcpy(&ctx->mxc_status, &status, sizeof(status));
-
- switch (req_type) {
- case MXLND_REQ_TX:
- mxlnd_handle_tx_completion(ctx);
- break;
- case MXLND_REQ_RX:
- mxlnd_handle_rx_completion(ctx);
- break;
- default:
- CDEBUG(D_NETERROR, "Unknown ctx type %d\n", req_type);
- LBUG();
- break;
- }
-
- /* FIXME may need to reconsider this */
- /* conn is always set except for the first CONN_REQ rx
- * from a new peer */
- if (!(status.code == MX_STATUS_SUCCESS ||
- status.code == MX_STATUS_TRUNCATED) &&
- conn != NULL) {
- mxlnd_conn_disconnect(conn, 1, 1);
- }
- }
- CDEBUG(D_NET, "waitd() completed task\n");
- }
- CDEBUG(D_NET, "%s stopping\n", name);
- mxlnd_thread_stop(id);
- return 0;
-}
-
-
-unsigned long
-mxlnd_check_timeouts(unsigned long now)
-{
- int i = 0;
- int disconnect = 0;
- unsigned long next = 0;
- struct kmx_peer *peer = NULL;
- struct kmx_conn *conn = NULL;
-
- read_lock(&kmxlnd_data.kmx_peers_lock);
- for (i = 0; i < MXLND_HASH_SIZE; i++) {
- list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) {
-
- if (unlikely(kmxlnd_data.kmx_shutdown)) {
- read_unlock(&kmxlnd_data.kmx_peers_lock);
- return next;
- }
-
- spin_lock(&peer->mxp_lock);
- conn = peer->mxp_conn;
- if (conn) {
- mxlnd_conn_addref(conn);
- spin_unlock(&peer->mxp_lock);
- } else {
- spin_unlock(&peer->mxp_lock);
- continue;
- }
-
- spin_lock(&conn->mxk_lock);
-
- /* if nothing pending (timeout == 0) or
- * if conn is already disconnected,
- * skip this conn */
- if (conn->mxk_timeout == 0 ||
- conn->mxk_status == MXLND_CONN_DISCONNECT) {
- spin_unlock(&conn->mxk_lock);
- mxlnd_conn_decref(conn);
- continue;
- }
-
- /* we want to find the timeout that will occur first.
- * if it is in the future, we will sleep until then.
- * if it is in the past, then we will sleep one
- * second and repeat the process. */
- if ((next == 0) || (conn->mxk_timeout < next)) {
- next = conn->mxk_timeout;
- }
-
- disconnect = 0;
-
- if (time_after_eq(now, conn->mxk_timeout)) {
- disconnect = 1;
- }
- spin_unlock(&conn->mxk_lock);
-
- if (disconnect) {
- mxlnd_conn_disconnect(conn, 1, 1);
- }
- mxlnd_conn_decref(conn);
- }
- }
- read_unlock(&kmxlnd_data.kmx_peers_lock);
- if (next == 0) next = now + MXLND_COMM_TIMEOUT;
-
- return next;
-}
-
-/**
- * mxlnd_timeoutd - enforces timeouts on messages
- * @arg - thread id (as a void *)
- *
- * This thread queries each peer for its earliest timeout. If a peer has timed out,
- * it calls mxlnd_conn_disconnect().
- *
- * After checking for timeouts, try progressing sends (call check_sends()).
- */
-int
-mxlnd_timeoutd(void *arg)
-{
- int i = 0;
- long id = (long) arg;
- unsigned long now = 0;
- unsigned long next = 0;
- unsigned long delay = HZ;
- struct kmx_peer *peer = NULL;
- struct kmx_conn *conn = NULL;
-
- cfs_daemonize("mxlnd_timeoutd");
- //cfs_block_allsigs();
-
- CDEBUG(D_NET, "timeoutd starting\n");
-
- while (!kmxlnd_data.kmx_shutdown) {
-
- now = jiffies;
- /* if the next timeout has not arrived, go back to sleep */
- if (time_after(now, next)) {
- next = mxlnd_check_timeouts(now);
- }
-
- read_lock(&kmxlnd_data.kmx_peers_lock);
- for (i = 0; i < MXLND_HASH_SIZE; i++) {
- list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) {
- spin_lock(&peer->mxp_lock);
- conn = peer->mxp_conn;
- if (conn) mxlnd_conn_addref(conn); /* take ref... */
- spin_unlock(&peer->mxp_lock);
-
- if (conn == NULL)
- continue;
-
- if (conn->mxk_status != MXLND_CONN_DISCONNECT &&
- time_after(now, conn->mxk_last_tx + HZ)) {
- mxlnd_check_sends(peer);
- }
- mxlnd_conn_decref(conn); /* until here */
- }
- }
- read_unlock(&kmxlnd_data.kmx_peers_lock);
-
- mxlnd_sleep(delay);
- }
- CDEBUG(D_NET, "timeoutd stopping\n");
- mxlnd_thread_stop(id);
- return 0;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Copyright (C) 2006 Myricom, Inc.
- * Author: Scott Atchley <atchley at myri.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "mxlnd.h"
-
-static int n_waitd = MXLND_N_SCHED;
-CFS_MODULE_PARM(n_waitd, "i", int, 0444,
- "# of completion daemons");
-
-static int max_peers = MXLND_MAX_PEERS;
-CFS_MODULE_PARM(max_peers, "i", int, 0444,
- "maximum number of peers that may connect");
-
-static int cksum = MXLND_CKSUM;
-CFS_MODULE_PARM(cksum, "i", int, 0644,
- "set non-zero to enable message (not data payload) checksums");
-
-static int ntx = MXLND_NTX;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
- "# of total tx message descriptors");
-
-static int credits = MXLND_MSG_QUEUE_DEPTH;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "# concurrent sends");
-
-static int board = MXLND_MX_BOARD;
-CFS_MODULE_PARM(board, "i", int, 0444,
- "index value of the Myrinet board (NIC)");
-
-static int ep_id = MXLND_MX_EP_ID;
-CFS_MODULE_PARM(ep_id, "i", int, 0444, "MX endpoint ID");
-
-static int polling = MXLND_POLLING;
-CFS_MODULE_PARM(polling, "i", int, 0444,
- "Use 0 to block (wait). A value > 0 will poll that many times before blocking");
-
-static char *hosts = NULL;
-CFS_MODULE_PARM(hosts, "s", charp, 0444,
- "IP-to-hostname resolution file");
-
-kmx_tunables_t kmxlnd_tunables = {
- .kmx_n_waitd = &n_waitd,
- .kmx_max_peers = &max_peers,
- .kmx_cksum = &cksum,
- .kmx_ntx = &ntx,
- .kmx_credits = &credits,
- .kmx_board = &board,
- .kmx_ep_id = &ep_id,
- .kmx_polling = &polling,
- .kmx_hosts = &hosts
-};
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Copyright (C) 2006 Myricom, Inc.
- * Author: Scott Atchley <atchley at myri.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * MXLND wire format - sent in sender's byte order
- */
-
-typedef struct kmx_connreq_msg
-{
- u32 mxcrm_queue_depth; /* per peer max messages in flight */
- u32 mxcrm_eager_size; /* size of preposted eager messages */
-} WIRE_ATTR kmx_connreq_msg_t;
-
-typedef struct kmx_eager_msg
-{
- lnet_hdr_t mxem_hdr; /* lnet header */
- char mxem_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR kmx_eager_msg_t;
-
-typedef struct kmx_putreq_msg
-{
- lnet_hdr_t mxprm_hdr; /* lnet header */
- u64 mxprm_cookie; /* opaque completion cookie */
-} WIRE_ATTR kmx_putreq_msg_t;
-
-typedef struct kmx_putack_msg
-{
- u64 mxpam_src_cookie; /* reflected completion cookie */
- u64 mxpam_dst_cookie; /* opaque completion cookie */
-} WIRE_ATTR kmx_putack_msg_t;
-
-typedef struct kmx_getreq_msg
-{
- lnet_hdr_t mxgrm_hdr; /* lnet header */
- u64 mxgrm_cookie; /* opaque completion cookie */
-} WIRE_ATTR kmx_getreq_msg_t;
-
-typedef struct kmx_msg
-{
- /* First two fields fixed for all time */
- u32 mxm_magic; /* MXLND message */
- u16 mxm_version; /* version number */
-
- u8 mxm_type; /* message type */
- u8 mxm_credits; /* returned credits */
- u32 mxm_nob; /* # of bytes in whole message */
- u32 mxm_cksum; /* checksum (0 == no checksum) */
- u64 mxm_srcnid; /* sender's NID */
- u64 mxm_srcstamp; /* sender's incarnation */
- u64 mxm_dstnid; /* destination's NID */
- u64 mxm_dststamp; /* destination's incarnation */
- u64 mxm_seq; /* sequence number */
-
- union {
- kmx_connreq_msg_t conn_req;
- kmx_eager_msg_t eager;
- kmx_putreq_msg_t put_req;
- kmx_putack_msg_t put_ack;
- kmx_getreq_msg_t get_req;
- } WIRE_ATTR mxm_u;
-} WIRE_ATTR kmx_msg_t;
-
-#define MXLND_MSG_MAGIC 0x4d583130 /* unique magic 'MX10' */
-#define MXLND_MSG_VERSION 0x01
-
-#define MXLND_MSG_CONN_REQ 0xc /* connection request */
-#define MXLND_MSG_CONN_ACK 0xa /* connection request response */
-#define MXLND_MSG_EAGER 0xe /* eager message */
-#define MXLND_MSG_NOOP 0x1 /* no msg, return credits */
-#define MXLND_MSG_PUT_REQ 0x2 /* put request src->sink */
-#define MXLND_MSG_PUT_ACK 0x3 /* put ack src<-sink */
-#define MXLND_MSG_PUT_DATA 0x4 /* put payload src->sink */
-#define MXLND_MSG_GET_REQ 0x5 /* get request sink->src */
-#define MXLND_MSG_GET_DATA 0x6 /* get payload sink<-src */
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
-wirecheck
+++ /dev/null
-MODULES := ko2iblnd
-ko2iblnd-objs := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o
-
-# Need to make sure we use PRE, not POST here so that an external OFED
-# source pool overrides any in-kernel OFED sources
-EXTRA_PRE_CFLAGS := @O2IBCPPFLAGS@
-
-@INCLUDE_RULES@
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if BUILD_O2IBLND
-modulenet_DATA = ko2iblnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(ko2iblnd-objs:%.o=%.c) o2iblnd.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2006 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "o2iblnd.h"
-
-lnd_t the_kiblnd = {
- .lnd_type = O2IBLND,
- .lnd_startup = kiblnd_startup,
- .lnd_shutdown = kiblnd_shutdown,
- .lnd_ctl = kiblnd_ctl,
- .lnd_send = kiblnd_send,
- .lnd_recv = kiblnd_recv,
-};
-
-kib_data_t kiblnd_data;
-
-__u32
-kiblnd_cksum (void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return (sum == 0) ? 1 : sum;
-}
-
-void
-kiblnd_init_msg (kib_msg_t *msg, int type, int body_nob)
-{
- msg->ibm_type = type;
- msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
-}
-
-void
-kiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg,
- int credits, lnet_nid_t dstnid, __u64 dststamp)
-{
- kib_net_t *net = ni->ni_data;
-
- /* CAVEAT EMPTOR! all message fields not set here should have been
- * initialised previously. */
- msg->ibm_magic = IBLND_MSG_MAGIC;
- msg->ibm_version = IBLND_MSG_VERSION;
- /* ibm_type */
- msg->ibm_credits = credits;
- /* ibm_nob */
- msg->ibm_cksum = 0;
- msg->ibm_srcnid = lnet_ptlcompat_srcnid(ni->ni_nid, dstnid);
- msg->ibm_srcstamp = net->ibn_incarnation;
- msg->ibm_dstnid = dstnid;
- msg->ibm_dststamp = dststamp;
-
- if (*kiblnd_tunables.kib_cksum) {
- /* NB ibm_cksum zero while computing cksum */
- msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob);
- }
-}
-
-int
-kiblnd_unpack_msg(kib_msg_t *msg, int nob)
-{
- const int hdr_size = offsetof(kib_msg_t, ibm_u);
- __u32 msg_cksum;
- int flip;
- int msg_nob;
-#if !IBLND_MAP_ON_DEMAND
- int i;
- int n;
-#endif
- /* 6 bytes are enough to have received magic + version */
- if (nob < 6) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- if (msg->ibm_magic == IBLND_MSG_MAGIC) {
- flip = 0;
- } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) {
- flip = 1;
- } else {
- CERROR("Bad magic: %08x\n", msg->ibm_magic);
- return -EPROTO;
- }
-
- if (msg->ibm_version !=
- (flip ? __swab16(IBLND_MSG_VERSION) : IBLND_MSG_VERSION)) {
- CERROR("Bad version: %d\n", msg->ibm_version);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
- if (msg_nob > nob) {
- CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
- return -EPROTO;
- }
-
- /* checksum must be computed with ibm_cksum zero and BEFORE anything
- * gets flipped */
- msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
- msg->ibm_cksum = 0;
- if (msg_cksum != 0 &&
- msg_cksum != kiblnd_cksum(msg, msg_nob)) {
- CERROR("Bad checksum\n");
- return -EPROTO;
- }
- msg->ibm_cksum = msg_cksum;
-
- if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
- __swab16s(&msg->ibm_version);
- CLASSERT (sizeof(msg->ibm_type) == 1);
- CLASSERT (sizeof(msg->ibm_credits) == 1);
- msg->ibm_nob = msg_nob;
- __swab64s(&msg->ibm_srcnid);
- __swab64s(&msg->ibm_srcstamp);
- __swab64s(&msg->ibm_dstnid);
- __swab64s(&msg->ibm_dststamp);
- }
-
- if (msg->ibm_srcnid == LNET_NID_ANY) {
- CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
- return -EPROTO;
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Unknown message type %x\n", msg->ibm_type);
- return -EPROTO;
-
- case IBLND_MSG_NOOP:
- break;
-
- case IBLND_MSG_IMMEDIATE:
- if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
- CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
- return -EPROTO;
- }
- break;
-
- case IBLND_MSG_PUT_REQ:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) {
- CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.putreq)));
- return -EPROTO;
- }
- break;
-
- case IBLND_MSG_PUT_ACK:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) {
- CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.putack)));
- return -EPROTO;
- }
-#if IBLND_MAP_ON_DEMAND
- if (flip) {
- __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
- }
-#else
- if (flip) {
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrags);
- }
-
- n = msg->ibm_u.putack.ibpam_rd.rd_nfrags;
- if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {
- CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",
- n, IBLND_MAX_RDMA_FRAGS);
- return -EPROTO;
- }
-
- if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
- CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
- return -EPROTO;
- }
-
- if (flip) {
- for (i = 0; i < n; i++) {
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);
- __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr);
- }
- }
-#endif
- break;
-
- case IBLND_MSG_GET_REQ:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) {
- CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.get)));
- return -EPROTO;
- }
-#if IBLND_MAP_ON_DEMAND
- if (flip) {
- __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
- }
-#else
- if (flip) {
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrags);
- }
-
- n = msg->ibm_u.get.ibgm_rd.rd_nfrags;
- if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {
- CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",
- n, IBLND_MAX_RDMA_FRAGS);
- return -EPROTO;
- }
-
- if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
- CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
- return -EPROTO;
- }
-
- if (flip)
- for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrags; i++) {
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
- __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr);
- }
-#endif
- break;
-
- case IBLND_MSG_PUT_NAK:
- case IBLND_MSG_PUT_DONE:
- case IBLND_MSG_GET_DONE:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
- CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.completion)));
- return -EPROTO;
- }
- if (flip)
- __swab32s(&msg->ibm_u.completion.ibcm_status);
- break;
-
- case IBLND_MSG_CONNREQ:
- case IBLND_MSG_CONNACK:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
- CERROR("Short connreq/ack: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
- return -EPROTO;
- }
- if (flip) {
- __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth);
- __swab16s(&msg->ibm_u.connparams.ibcp_max_frags);
- __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
- }
- break;
- }
- return 0;
-}
-
-int
-kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- kib_net_t *net = ni->ni_data;
- unsigned long flags;
-
- LASSERT (net != NULL);
- LASSERT (nid != LNET_NID_ANY);
-
- LIBCFS_ALLOC(peer, sizeof(*peer));
- if (peer == NULL) {
- CERROR("Cannot allocate peer\n");
- return -ENOMEM;
- }
-
- memset(peer, 0, sizeof(*peer)); /* zero flags etc */
-
- peer->ibp_ni = ni;
- peer->ibp_nid = nid;
- peer->ibp_error = 0;
- peer->ibp_last_alive = cfs_time_current();
- atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */
-
- INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */
- INIT_LIST_HEAD(&peer->ibp_conns);
- INIT_LIST_HEAD(&peer->ibp_tx_queue);
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- /* always called with a ref on ni, which prevents ni being shutdown */
- LASSERT (net->ibn_shutdown == 0);
-
- /* npeers only grows with the global lock held */
- atomic_inc(&net->ibn_npeers);
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- *peerp = peer;
- return 0;
-}
-
-void
-kiblnd_destroy_peer (kib_peer_t *peer)
-{
- kib_net_t *net = peer->ibp_ni->ni_data;
-
- LASSERT (net != NULL);
- LASSERT (atomic_read(&peer->ibp_refcount) == 0);
- LASSERT (!kiblnd_peer_active(peer));
- LASSERT (peer->ibp_connecting == 0);
- LASSERT (peer->ibp_accepting == 0);
- LASSERT (list_empty(&peer->ibp_conns));
- LASSERT (list_empty(&peer->ibp_tx_queue));
-
- LIBCFS_FREE(peer, sizeof(*peer));
-
- /* NB a peer's connections keep a reference on their peer until
- * they are destroyed, so we can be assured that _all_ state to do
- * with this peer has been cleaned up when its refcount drops to
- * zero. */
- atomic_dec(&net->ibn_npeers);
-}
-
-void
-kiblnd_destroy_dev (kib_dev_t *dev)
-{
- LASSERT (dev->ibd_nnets == 0);
-
- if (!list_empty(&dev->ibd_list)) /* on kib_devs? */
- list_del_init(&dev->ibd_list);
-
- if (dev->ibd_mr != NULL)
- ib_dereg_mr(dev->ibd_mr);
-
- if (dev->ibd_pd != NULL)
- ib_dealloc_pd(dev->ibd_pd);
-
- if (dev->ibd_cmid != NULL)
- rdma_destroy_id(dev->ibd_cmid);
-
- LIBCFS_FREE(dev, sizeof(*dev));
-}
-
-kib_peer_t *
-kiblnd_find_peer_locked (lnet_nid_t nid)
-{
- /* the caller is responsible for accounting the additional reference
- * that this creates */
- struct list_head *peer_list = kiblnd_nid2peerlist(nid);
- struct list_head *tmp;
- kib_peer_t *peer;
-
- list_for_each (tmp, peer_list) {
-
- peer = list_entry(tmp, kib_peer_t, ibp_list);
-
- LASSERT (peer->ibp_connecting > 0 || /* creating conns */
- peer->ibp_accepting > 0 ||
- !list_empty(&peer->ibp_conns)); /* active conn */
-
- if (peer->ibp_nid != nid)
- continue;
-
- CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
- peer, libcfs_nid2str(nid),
- atomic_read(&peer->ibp_refcount));
- return peer;
- }
- return NULL;
-}
-
-void
-kiblnd_unlink_peer_locked (kib_peer_t *peer)
-{
- LASSERT (list_empty(&peer->ibp_conns));
-
- LASSERT (kiblnd_peer_active(peer));
- list_del_init(&peer->ibp_list);
- /* lose peerlist's ref */
- kiblnd_peer_decref(peer);
-}
-
-int
-kiblnd_get_peer_info (lnet_ni_t *ni, int index,
- lnet_nid_t *nidp, int *count)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- int i;
- unsigned long flags;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
-
- list_for_each (ptmp, &kiblnd_data.kib_peers[i]) {
-
- peer = list_entry(ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_connecting > 0 ||
- peer->ibp_accepting > 0 ||
- !list_empty(&peer->ibp_conns));
-
- if (peer->ibp_ni != ni)
- continue;
-
- if (index-- > 0)
- continue;
-
- *nidp = peer->ibp_nid;
- *count = atomic_read(&peer->ibp_refcount);
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
- flags);
- return 0;
- }
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- return -ENOENT;
-}
-
-void
-kiblnd_del_peer_locked (kib_peer_t *peer)
-{
- struct list_head *ctmp;
- struct list_head *cnxt;
- kib_conn_t *conn;
-
- if (list_empty(&peer->ibp_conns)) {
- kiblnd_unlink_peer_locked(peer);
- } else {
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, kib_conn_t, ibc_list);
-
- kiblnd_close_conn_locked(conn, 0);
- }
- /* NB closing peer's last conn unlinked it. */
- }
- /* NB peer now unlinked; might even be freed if the peer table had the
- * last ref on it. */
-}
-
-int
-kiblnd_del_peer (lnet_ni_t *ni, lnet_nid_t nid)
-{
- CFS_LIST_HEAD (zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- kib_peer_t *peer;
- int lo;
- int hi;
- int i;
- unsigned long flags;
- int rc = -ENOENT;
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY) {
- lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
- } else {
- lo = 0;
- hi = kiblnd_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_connecting > 0 ||
- peer->ibp_accepting > 0 ||
- !list_empty(&peer->ibp_conns));
-
- if (peer->ibp_ni != ni)
- continue;
-
- if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
- continue;
-
- if (!list_empty(&peer->ibp_tx_queue)) {
- LASSERT (list_empty(&peer->ibp_conns));
-
- list_splice_init(&peer->ibp_tx_queue, &zombies);
- }
-
- kiblnd_del_peer_locked(peer);
- rc = 0; /* matched something */
- }
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_txlist_done(ni, &zombies, -EIO);
-
- return rc;
-}
-
-kib_conn_t *
-kiblnd_get_conn_by_idx (lnet_ni_t *ni, int index)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- kib_conn_t *conn;
- struct list_head *ctmp;
- int i;
- unsigned long flags;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
- list_for_each (ptmp, &kiblnd_data.kib_peers[i]) {
-
- peer = list_entry(ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_connecting > 0 ||
- peer->ibp_accepting > 0 ||
- !list_empty(&peer->ibp_conns));
-
- if (peer->ibp_ni != ni)
- continue;
-
- list_for_each (ctmp, &peer->ibp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry(ctmp, kib_conn_t, ibc_list);
- kiblnd_conn_addref(conn);
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
- flags);
- return conn;
- }
- }
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- return NULL;
-}
-
-void
-kiblnd_debug_rx (kib_rx_t *rx)
-{
- CDEBUG(D_CONSOLE, " %p status %d msg_type %x cred %d\n",
- rx, rx->rx_status, rx->rx_msg->ibm_type,
- rx->rx_msg->ibm_credits);
-}
-
-void
-kiblnd_debug_tx (kib_tx_t *tx)
-{
- CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lx "
- "cookie "LPX64" msg %s%s type %x cred %d\n",
- tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting,
- tx->tx_status, tx->tx_deadline, tx->tx_cookie,
- tx->tx_lntmsg[0] == NULL ? "-" : "!",
- tx->tx_lntmsg[1] == NULL ? "-" : "!",
- tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits);
-}
-
-void
-kiblnd_debug_conn (kib_conn_t *conn)
-{
- struct list_head *tmp;
- int i;
-
- spin_lock(&conn->ibc_lock);
-
- CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n",
- atomic_read(&conn->ibc_refcount), conn,
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- CDEBUG(D_CONSOLE, " state %d nposted %d cred %d o_cred %d r_cred %d\n",
- conn->ibc_state, conn->ibc_nsends_posted, conn->ibc_credits,
- conn->ibc_outstanding_credits, conn->ibc_reserved_credits);
- CDEBUG(D_CONSOLE, " comms_err %d\n", conn->ibc_comms_error);
-
- CDEBUG(D_CONSOLE, " early_rxs:\n");
- list_for_each(tmp, &conn->ibc_early_rxs)
- kiblnd_debug_rx(list_entry(tmp, kib_rx_t, rx_list));
-
- CDEBUG(D_CONSOLE, " tx_noops:\n");
- list_for_each(tmp, &conn->ibc_tx_noops)
- kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
- CDEBUG(D_CONSOLE, " tx_queue_nocred:\n");
- list_for_each(tmp, &conn->ibc_tx_queue_nocred)
- kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
- CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n");
- list_for_each(tmp, &conn->ibc_tx_queue_rsrvd)
- kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
- CDEBUG(D_CONSOLE, " tx_queue:\n");
- list_for_each(tmp, &conn->ibc_tx_queue)
- kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
- CDEBUG(D_CONSOLE, " active_txs:\n");
- list_for_each(tmp, &conn->ibc_active_txs)
- kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
- CDEBUG(D_CONSOLE, " rxs:\n");
- for (i = 0; i < IBLND_RX_MSGS; i++)
- kiblnd_debug_rx(&conn->ibc_rxs[i]);
-
- spin_unlock(&conn->ibc_lock);
-}
-
-kib_conn_t *
-kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid, int state)
-{
- /* CAVEAT EMPTOR:
- * If the new conn is created successfully it takes over the caller's
- * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself
- * is destroyed. On failure, the caller's ref on 'peer' remains and
- * she must dispose of 'cmid'. (Actually I'd block forever if I tried
- * to destroy 'cmid' here since I'm called from the CM which still has
- * its ref on 'cmid'). */
- kib_conn_t *conn;
- kib_net_t *net = peer->ibp_ni->ni_data;
- int i;
- int page_offset;
- int ipage;
- int rc;
- struct ib_cq *cq;
- struct ib_qp_init_attr *init_qp_attr;
- unsigned long flags;
-
- LASSERT (net != NULL);
- LASSERT (!in_interrupt());
-
- LIBCFS_ALLOC(init_qp_attr, sizeof(*init_qp_attr));
- if (init_qp_attr == NULL) {
- CERROR("Can't allocate qp_attr for %s\n",
- libcfs_nid2str(peer->ibp_nid));
- goto failed_0;
- }
-
- LIBCFS_ALLOC(conn, sizeof(*conn));
- if (conn == NULL) {
- CERROR("Can't allocate connection for %s\n",
- libcfs_nid2str(peer->ibp_nid));
- goto failed_1;
- }
-
- memset(conn, 0, sizeof(*conn)); /* zero flags, NULL pointers etc... */
-
- conn->ibc_state = IBLND_CONN_INIT;
- conn->ibc_peer = peer; /* I take the caller's ref */
- cmid->context = conn; /* for future CM callbacks */
- conn->ibc_cmid = cmid;
-
- INIT_LIST_HEAD(&conn->ibc_early_rxs);
- INIT_LIST_HEAD(&conn->ibc_tx_noops);
- INIT_LIST_HEAD(&conn->ibc_tx_queue);
- INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd);
- INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred);
- INIT_LIST_HEAD(&conn->ibc_active_txs);
- spin_lock_init(&conn->ibc_lock);
-
- LIBCFS_ALLOC(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
- if (conn->ibc_connvars == NULL) {
- CERROR("Can't allocate in-progress connection state\n");
- goto failed_2;
- }
- memset(conn->ibc_connvars, 0, sizeof(*conn->ibc_connvars));
-
- LIBCFS_ALLOC(conn->ibc_rxs, IBLND_RX_MSGS * sizeof(kib_rx_t));
- if (conn->ibc_rxs == NULL) {
- CERROR("Cannot allocate RX buffers\n");
- goto failed_2;
- }
- memset(conn->ibc_rxs, 0, IBLND_RX_MSGS * sizeof(kib_rx_t));
-
- rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, IBLND_RX_MSG_PAGES);
- if (rc != 0)
- goto failed_2;
-
- for (i = ipage = page_offset = 0; i < IBLND_RX_MSGS; i++) {
- struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
- kib_rx_t *rx = &conn->ibc_rxs[i];
-
- rx->rx_conn = conn;
- rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
- page_offset);
- rx->rx_msgaddr = kiblnd_dma_map_single(cmid->device,
- rx->rx_msg, IBLND_MSG_SIZE,
- DMA_FROM_DEVICE);
- KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr);
-
- CDEBUG(D_NET,"rx %d: %p "LPX64"("LPX64")\n",
- i, rx->rx_msg, rx->rx_msgaddr,
- lnet_page2phys(page) + page_offset);
-
- page_offset += IBLND_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBLND_RX_MSG_PAGES);
- }
- }
-
-#if (IBLND_OFED_VERSION == 1025)
- cq = ib_create_cq(cmid->device,
- kiblnd_cq_completion, kiblnd_cq_event, conn,
- IBLND_CQ_ENTRIES(), 0);
-#else
- cq = ib_create_cq(cmid->device,
- kiblnd_cq_completion, kiblnd_cq_event, conn,
- IBLND_CQ_ENTRIES());
-#endif
- if (!IS_ERR(cq)) {
- conn->ibc_cq = cq;
- } else {
- CERROR("Can't create CQ: %ld\n", PTR_ERR(cq));
- goto failed_2;
- }
-
- rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- if (rc != 0) {
- CERROR("Can't request completion notificiation: %d\n", rc);
- goto failed_2;
- }
-
- memset(init_qp_attr, 0, sizeof(*init_qp_attr));
- init_qp_attr->event_handler = kiblnd_qp_event;
- init_qp_attr->qp_context = conn;
- init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS;
- init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS;
- init_qp_attr->cap.max_send_sge = 1;
- init_qp_attr->cap.max_recv_sge = 1;
- init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
- init_qp_attr->qp_type = IB_QPT_RC;
- init_qp_attr->send_cq = cq;
- init_qp_attr->recv_cq = cq;
-
- rc = 0;
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- switch (*kiblnd_tunables.kib_ib_mtu) {
- default:
- rc = *kiblnd_tunables.kib_ib_mtu;
- /* fall through to... */
- case 0: /* set tunable to the default
- * CAVEAT EMPTOR! this assumes the default is one of the MTUs
- * below, otherwise we'll WARN on the next QP create */
- *kiblnd_tunables.kib_ib_mtu =
- ib_mtu_enum_to_int(cmid->route.path_rec->mtu);
- break;
- case 256:
- cmid->route.path_rec->mtu = IB_MTU_256;
- break;
- case 512:
- cmid->route.path_rec->mtu = IB_MTU_512;
- break;
- case 1024:
- cmid->route.path_rec->mtu = IB_MTU_1024;
- break;
- case 2048:
- cmid->route.path_rec->mtu = IB_MTU_2048;
- break;
- case 4096:
- cmid->route.path_rec->mtu = IB_MTU_4096;
- break;
- }
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- if (rc != 0)
- CWARN("Invalid IB MTU value %d, using default value %d\n",
- rc, *kiblnd_tunables.kib_ib_mtu);
-
- rc = rdma_create_qp(cmid, net->ibn_dev->ibd_pd, init_qp_attr);
- if (rc != 0) {
- CERROR("Can't create QP: %d\n", rc);
- goto failed_2;
- }
-
- LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
-
- /* 1 ref for caller and each rxmsg */
- atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS);
- conn->ibc_nrx = IBLND_RX_MSGS;
-
- /* post receives */
- for (i = 0; i < IBLND_RX_MSGS; i++) {
- rc = kiblnd_post_rx(&conn->ibc_rxs[i],
- IBLND_POSTRX_NO_CREDIT);
- if (rc != 0) {
- CERROR("Can't post rxmsg: %d\n", rc);
-
- /* Make posted receives complete */
- kiblnd_abort_receives(conn);
-
- /* correct # of posted buffers
- * NB locking needed now I'm racing with completion */
- spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags);
- conn->ibc_nrx -= IBLND_RX_MSGS - i;
- spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock,
- flags);
-
- /* Drop my own and unused rxbuffer refcounts */
- while (i++ <= IBLND_RX_MSGS)
- kiblnd_conn_decref(conn);
-
- return NULL;
- }
- }
-
- /* Init successful! */
- LASSERT (state == IBLND_CONN_ACTIVE_CONNECT ||
- state == IBLND_CONN_PASSIVE_WAIT);
- conn->ibc_state = state;
-
- /* 1 more conn */
- atomic_inc(&net->ibn_nconns);
- return conn;
-
- failed_2:
- kiblnd_destroy_conn(conn);
- failed_1:
- LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
- failed_0:
- return NULL;
-}
-
-void
-kiblnd_destroy_conn (kib_conn_t *conn)
-{
- struct rdma_cm_id *cmid = conn->ibc_cmid;
- kib_peer_t *peer = conn->ibc_peer;
- int rc;
- int i;
-
- LASSERT (!in_interrupt());
- LASSERT (atomic_read(&conn->ibc_refcount) == 0);
- LASSERT (list_empty(&conn->ibc_early_rxs));
- LASSERT (list_empty(&conn->ibc_tx_noops));
- LASSERT (list_empty(&conn->ibc_tx_queue));
- LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
- LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
- LASSERT (list_empty(&conn->ibc_active_txs));
- LASSERT (conn->ibc_nsends_posted == 0);
-
- switch (conn->ibc_state) {
- default:
- /* conn must be completely disengaged from the network */
- LBUG();
-
- case IBLND_CONN_DISCONNECTED:
- /* connvars should have been freed already */
- LASSERT (conn->ibc_connvars == NULL);
- break;
-
- case IBLND_CONN_INIT:
- break;
- }
-
- if (conn->ibc_cmid->qp != NULL)
- rdma_destroy_qp(conn->ibc_cmid);
-
- if (conn->ibc_cq != NULL) {
- rc = ib_destroy_cq(conn->ibc_cq);
- if (rc != 0)
- CWARN("Error destroying CQ: %d\n", rc);
- }
-
- if (conn->ibc_rx_pages != NULL) {
- LASSERT (conn->ibc_rxs != NULL);
-
- for (i = 0; i < IBLND_RX_MSGS; i++) {
- kib_rx_t *rx = &conn->ibc_rxs[i];
-
- LASSERT (rx->rx_nob >= 0); /* not posted */
-
- kiblnd_dma_unmap_single(conn->ibc_cmid->device,
- KIBLND_UNMAP_ADDR(rx, rx_msgunmap,
- rx->rx_msgaddr),
- IBLND_MSG_SIZE, DMA_FROM_DEVICE);
- }
-
- kiblnd_free_pages(conn->ibc_rx_pages);
- }
-
- if (conn->ibc_rxs != NULL) {
- LIBCFS_FREE(conn->ibc_rxs,
- IBLND_RX_MSGS * sizeof(kib_rx_t));
- }
-
- if (conn->ibc_connvars != NULL)
- LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
-
- /* See CAVEAT EMPTOR above in kiblnd_create_conn */
- if (conn->ibc_state != IBLND_CONN_INIT) {
- kib_net_t *net = peer->ibp_ni->ni_data;
-
- kiblnd_peer_decref(peer);
- rdma_destroy_id(cmid);
- atomic_dec(&net->ibn_nconns);
- }
-
- LIBCFS_FREE(conn, sizeof(*conn));
-}
-
-int
-kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, kib_conn_t, ibc_list);
-
- count++;
- kiblnd_close_conn_locked(conn, why);
- }
-
- return count;
-}
-
-int
-kiblnd_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, kib_conn_t, ibc_list);
-
- if (conn->ibc_incarnation == incarnation)
- continue;
-
- CDEBUG(D_NET, "Closing stale conn -> %s incarnation:"LPX64"("LPX64")\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_incarnation, incarnation);
-
- count++;
- kiblnd_close_conn_locked(conn, -ESTALE);
- }
-
- return count;
-}
-
-int
-kiblnd_close_matching_conns (lnet_ni_t *ni, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- unsigned long flags;
- int count = 0;
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
- else {
- lo = 0;
- hi = kiblnd_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
-
- peer = list_entry(ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_connecting > 0 ||
- peer->ibp_accepting > 0 ||
- !list_empty(&peer->ibp_conns));
-
- if (peer->ibp_ni != ni)
- continue;
-
- if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
- continue;
-
- count += kiblnd_close_peer_conns_locked(peer, 0);
- }
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- /* wildcards always succeed */
- if (nid == LNET_NID_ANY)
- return 0;
-
- return (count == 0) ? -ENOENT : 0;
-}
-
-int
-kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
-
- switch(cmd) {
- case IOC_LIBCFS_GET_PEER: {
- lnet_nid_t nid = 0;
- int count = 0;
-
- rc = kiblnd_get_peer_info(ni, data->ioc_count,
- &nid, &count);
- data->ioc_nid = nid;
- data->ioc_count = count;
- break;
- }
-
- case IOC_LIBCFS_DEL_PEER: {
- rc = kiblnd_del_peer(ni, data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_GET_CONN: {
- kib_conn_t *conn = kiblnd_get_conn_by_idx(ni, data->ioc_count);
-
- if (conn == NULL) {
- rc = -ENOENT;
- } else {
- // kiblnd_debug_conn(conn);
- rc = 0;
- data->ioc_nid = conn->ibc_peer->ibp_nid;
- kiblnd_conn_decref(conn);
- }
- break;
- }
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- rc = kiblnd_close_matching_conns(ni, data->ioc_nid);
- break;
- }
-
- default:
- break;
- }
-
- return rc;
-}
-
-void
-kiblnd_free_pages (kib_pages_t *p)
-{
- int npages = p->ibp_npages;
- int i;
-
- for (i = 0; i < npages; i++)
- if (p->ibp_pages[i] != NULL)
- __free_page(p->ibp_pages[i]);
-
- LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
-}
-
-int
-kiblnd_alloc_pages (kib_pages_t **pp, int npages)
-{
- kib_pages_t *p;
- int i;
-
- LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
- if (p == NULL) {
- CERROR("Can't allocate descriptor for %d pages\n", npages);
- return -ENOMEM;
- }
-
- memset(p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
- p->ibp_npages = npages;
-
- for (i = 0; i < npages; i++) {
- p->ibp_pages[i] = alloc_page(GFP_KERNEL);
- if (p->ibp_pages[i] == NULL) {
- CERROR("Can't allocate page %d of %d\n", i, npages);
- kiblnd_free_pages(p);
- return -ENOMEM;
- }
- }
-
- *pp = p;
- return 0;
-}
-
-void
-kiblnd_free_tx_descs (lnet_ni_t *ni)
-{
- int i;
- kib_net_t *net = ni->ni_data;
-
- LASSERT (net != NULL);
-
- if (net->ibn_tx_descs != NULL) {
- for (i = 0; i < IBLND_TX_MSGS(); i++) {
- kib_tx_t *tx = &net->ibn_tx_descs[i];
-
-#if IBLND_MAP_ON_DEMAND
- if (tx->tx_pages != NULL)
- LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV *
- sizeof(*tx->tx_pages));
-#else
- if (tx->tx_wrq != NULL)
- LIBCFS_FREE(tx->tx_wrq,
- (1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_wrq));
-
- if (tx->tx_sge != NULL)
- LIBCFS_FREE(tx->tx_sge,
- (1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_sge));
-
- if (tx->tx_rd != NULL)
- LIBCFS_FREE(tx->tx_rd,
- offsetof(kib_rdma_desc_t,
- rd_frags[IBLND_MAX_RDMA_FRAGS]));
-
- if (tx->tx_frags != NULL)
- LIBCFS_FREE(tx->tx_frags,
- IBLND_MAX_RDMA_FRAGS *
- sizeof(*tx->tx_frags));
-#endif
- }
-
- LIBCFS_FREE(net->ibn_tx_descs,
- IBLND_TX_MSGS() * sizeof(kib_tx_t));
- }
-
- if (net->ibn_tx_pages != NULL)
- kiblnd_free_pages(net->ibn_tx_pages);
-}
-
-int
-kiblnd_alloc_tx_descs (lnet_ni_t *ni)
-{
- int i;
- int rc;
- kib_net_t *net = ni->ni_data;
-
- LASSERT (net != NULL);
-
- rc = kiblnd_alloc_pages(&net->ibn_tx_pages, IBLND_TX_MSG_PAGES());
-
- if (rc != 0) {
- CERROR("Can't allocate tx pages\n");
- return rc;
- }
-
- LIBCFS_ALLOC (net->ibn_tx_descs,
- IBLND_TX_MSGS() * sizeof(kib_tx_t));
- if (net->ibn_tx_descs == NULL) {
- CERROR("Can't allocate %d tx descriptors\n", IBLND_TX_MSGS());
- return -ENOMEM;
- }
-
- memset(net->ibn_tx_descs, 0,
- IBLND_TX_MSGS() * sizeof(kib_tx_t));
-
- for (i = 0; i < IBLND_TX_MSGS(); i++) {
- kib_tx_t *tx = &net->ibn_tx_descs[i];
-
-#if IBLND_MAP_ON_DEMAND
- LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV *
- sizeof(*tx->tx_pages));
- if (tx->tx_pages == NULL) {
- CERROR("Can't allocate phys page vector[%d]\n",
- LNET_MAX_IOV);
- return -ENOMEM;
- }
-#else
- LIBCFS_ALLOC(tx->tx_wrq,
- (1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_wrq));
- if (tx->tx_wrq == NULL)
- return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_sge,
- (1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_sge));
- if (tx->tx_sge == NULL)
- return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_rd,
- offsetof(kib_rdma_desc_t,
- rd_frags[IBLND_MAX_RDMA_FRAGS]));
- if (tx->tx_rd == NULL)
- return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_frags,
- IBLND_MAX_RDMA_FRAGS *
- sizeof(*tx->tx_frags));
- if (tx->tx_frags == NULL)
- return -ENOMEM;
-#endif
- }
-
- return 0;
-}
-
-void
-kiblnd_unmap_tx_descs (lnet_ni_t *ni)
-{
- int i;
- kib_tx_t *tx;
- kib_net_t *net = ni->ni_data;
-
- LASSERT (net != NULL);
-
- for (i = 0; i < IBLND_TX_MSGS(); i++) {
- tx = &net->ibn_tx_descs[i];
-
- kiblnd_dma_unmap_single(net->ibn_dev->ibd_cmid->device,
- KIBLND_UNMAP_ADDR(tx, tx_msgunmap,
- tx->tx_msgaddr),
- IBLND_MSG_SIZE, DMA_TO_DEVICE);
- }
-}
-
-void
-kiblnd_map_tx_descs (lnet_ni_t *ni)
-{
- int ipage = 0;
- int page_offset = 0;
- int i;
- struct page *page;
- kib_tx_t *tx;
- kib_net_t *net = ni->ni_data;
-
- LASSERT (net != NULL);
-
- /* pre-mapped messages are not bigger than 1 page */
- CLASSERT (IBLND_MSG_SIZE <= PAGE_SIZE);
-
- /* No fancy arithmetic when we do the buffer calculations */
- CLASSERT (PAGE_SIZE % IBLND_MSG_SIZE == 0);
-
- for (i = 0; i < IBLND_TX_MSGS(); i++) {
- page = net->ibn_tx_pages->ibp_pages[ipage];
- tx = &net->ibn_tx_descs[i];
-
- tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
- page_offset);
-
- tx->tx_msgaddr = kiblnd_dma_map_single(
- net->ibn_dev->ibd_cmid->device,
- tx->tx_msg, IBLND_MSG_SIZE, DMA_TO_DEVICE);
- KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr);
-
- list_add(&tx->tx_list, &net->ibn_idle_txs);
-
- page_offset += IBLND_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBLND_TX_MSG_PAGES());
- }
- }
-}
-
-void
-kiblnd_base_shutdown (void)
-{
- int i;
-
- LASSERT (list_empty(&kiblnd_data.kib_devs));
-
- CDEBUG(D_MALLOC, "before LND base cleanup: kmem %d\n",
- atomic_read(&libcfs_kmemory));
-
- switch (kiblnd_data.kib_init) {
- default:
- LBUG();
-
- case IBLND_INIT_ALL:
- case IBLND_INIT_DATA:
- LASSERT (kiblnd_data.kib_peers != NULL);
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
- LASSERT (list_empty(&kiblnd_data.kib_peers[i]));
- }
- LASSERT (list_empty(&kiblnd_data.kib_connd_zombies));
- LASSERT (list_empty(&kiblnd_data.kib_connd_conns));
-
- /* flag threads to terminate; wake and wait for them to die */
- kiblnd_data.kib_shutdown = 1;
- wake_up_all(&kiblnd_data.kib_sched_waitq);
- wake_up_all(&kiblnd_data.kib_connd_waitq);
-
- i = 2;
- while (atomic_read(&kiblnd_data.kib_nthreads) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d threads to terminate\n",
- atomic_read(&kiblnd_data.kib_nthreads));
- cfs_pause(cfs_time_seconds(1));
- }
-
- /* fall through */
-
- case IBLND_INIT_NOTHING:
- break;
- }
-
- if (kiblnd_data.kib_peers != NULL)
- LIBCFS_FREE(kiblnd_data.kib_peers,
- sizeof(struct list_head) *
- kiblnd_data.kib_peer_hash_size);
-
- CDEBUG(D_MALLOC, "after LND base cleanup: kmem %d\n",
- atomic_read(&libcfs_kmemory));
-
- kiblnd_data.kib_init = IBLND_INIT_NOTHING;
- PORTAL_MODULE_UNUSE;
-}
-
-void
-kiblnd_shutdown (lnet_ni_t *ni)
-{
- kib_net_t *net = ni->ni_data;
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- int i;
- unsigned long flags;
-
- LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL);
-
- if (net == NULL)
- goto out;
-
- CDEBUG(D_MALLOC, "before LND net cleanup: kmem %d\n",
- atomic_read(&libcfs_kmemory));
-
- write_lock_irqsave(g_lock, flags);
- net->ibn_shutdown = 1;
- write_unlock_irqrestore(g_lock, flags);
-
- switch (net->ibn_init) {
- default:
- LBUG();
-
- case IBLND_INIT_ALL:
- /* nuke all existing peers within this net */
- kiblnd_del_peer(ni, LNET_NID_ANY);
-
- /* Wait for all peer state to clean up */
- i = 2;
- while (atomic_read(&net->ibn_npeers) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */
- "%s: waiting for %d peers to disconnect\n",
- libcfs_nid2str(ni->ni_nid),
- atomic_read(&net->ibn_npeers));
- cfs_pause(cfs_time_seconds(1));
- }
-
- kiblnd_unmap_tx_descs(ni);
-
- LASSERT (net->ibn_dev->ibd_nnets > 0);
- net->ibn_dev->ibd_nnets--;
-
- /* fall through */
-
- case IBLND_INIT_NOTHING:
- LASSERT (atomic_read(&net->ibn_nconns) == 0);
-
-#if IBLND_MAP_ON_DEMAND
- if (net->ibn_fmrpool != NULL)
- ib_destroy_fmr_pool(net->ibn_fmrpool);
-#endif
- if (net->ibn_dev != NULL &&
- net->ibn_dev->ibd_nnets == 0)
- kiblnd_destroy_dev(net->ibn_dev);
-
- break;
- }
-
- kiblnd_free_tx_descs(ni);
-
- CDEBUG(D_MALLOC, "after LND net cleanup: kmem %d\n",
- atomic_read(&libcfs_kmemory));
-
- net->ibn_init = IBLND_INIT_NOTHING;
- ni->ni_data = NULL;
-
- LIBCFS_FREE(net, sizeof(*net));
-
-out:
- if (list_empty(&kiblnd_data.kib_devs))
- kiblnd_base_shutdown();
- return;
-}
-
-int
-kiblnd_base_startup (void)
-{
- int rc;
- int i;
-
- LASSERT (kiblnd_data.kib_init == IBLND_INIT_NOTHING);
-
- if (*kiblnd_tunables.kib_credits > *kiblnd_tunables.kib_ntx) {
- CERROR("Can't set credits(%d) > ntx(%d)\n",
- *kiblnd_tunables.kib_credits,
- *kiblnd_tunables.kib_ntx);
- return -EINVAL;
- }
-
- PORTAL_MODULE_USE;
- memset(&kiblnd_data, 0, sizeof(kiblnd_data)); /* zero pointers, flags etc */
-
- rwlock_init(&kiblnd_data.kib_global_lock);
-
- INIT_LIST_HEAD(&kiblnd_data.kib_devs);
-
- kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE;
- LIBCFS_ALLOC(kiblnd_data.kib_peers,
- sizeof(struct list_head) * kiblnd_data.kib_peer_hash_size);
- if (kiblnd_data.kib_peers == NULL) {
- goto failed;
- }
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
- INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]);
-
- spin_lock_init(&kiblnd_data.kib_connd_lock);
- INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
- INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
- init_waitqueue_head(&kiblnd_data.kib_connd_waitq);
-
- spin_lock_init(&kiblnd_data.kib_sched_lock);
- INIT_LIST_HEAD(&kiblnd_data.kib_sched_conns);
- init_waitqueue_head(&kiblnd_data.kib_sched_waitq);
-
- kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR;
-
- /* lists/ptrs/locks initialised */
- kiblnd_data.kib_init = IBLND_INIT_DATA;
- /*****************************************************/
-
- for (i = 0; i < IBLND_N_SCHED; i++) {
- rc = kiblnd_thread_start(kiblnd_scheduler, (void *)((long)i));
- if (rc != 0) {
- CERROR("Can't spawn o2iblnd scheduler[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- rc = kiblnd_thread_start(kiblnd_connd, NULL);
- if (rc != 0) {
- CERROR("Can't spawn o2iblnd connd: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- kiblnd_data.kib_init = IBLND_INIT_ALL;
- /*****************************************************/
-
- return 0;
-
- failed:
- kiblnd_base_shutdown();
- return -ENETDOWN;
-}
-
-int
-kiblnd_startup (lnet_ni_t *ni)
-{
- char *ifname;
- kib_net_t *net;
- kib_dev_t *ibdev;
- struct list_head *tmp;
- struct timeval tv;
- int rc;
-
- LASSERT (ni->ni_lnd == &the_kiblnd);
-
- if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
- rc = kiblnd_base_startup();
- if (rc != 0)
- return rc;
- }
-
- LIBCFS_ALLOC(net, sizeof(*net));
- ni->ni_data = net;
- if (net == NULL)
- goto failed;
-
- memset(net, 0, sizeof(*net));
-
- do_gettimeofday(&tv);
- net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
- ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits;
- ni->ni_peertxcredits = *kiblnd_tunables.kib_peercredits;
-
- spin_lock_init(&net->ibn_tx_lock);
- INIT_LIST_HEAD(&net->ibn_idle_txs);
-
- rc = kiblnd_alloc_tx_descs(ni);
- if (rc != 0) {
- CERROR("Can't allocate tx descs\n");
- goto failed;
- }
-
- if (ni->ni_interfaces[0] != NULL) {
- /* Use the IPoIB interface specified in 'networks=' */
-
- CLASSERT (LNET_MAX_INTERFACES > 1);
- if (ni->ni_interfaces[1] != NULL) {
- CERROR("Multiple interfaces not supported\n");
- goto failed;
- }
-
- ifname = ni->ni_interfaces[0];
- } else {
- ifname = *kiblnd_tunables.kib_default_ipif;
- }
-
- if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) {
- CERROR("IPoIB interface name too long: %s\n", ifname);
- goto failed;
- }
-
- ibdev = NULL;
- list_for_each (tmp, &kiblnd_data.kib_devs) {
- ibdev = list_entry(tmp, kib_dev_t, ibd_list);
-
- if (!strcmp(&ibdev->ibd_ifname[0], ifname))
- break;
-
- ibdev = NULL;
- }
-
- if (ibdev == NULL) {
- __u32 ip;
- __u32 netmask;
- int up;
- struct rdma_cm_id *id;
- struct ib_pd *pd;
- struct ib_mr *mr;
- struct sockaddr_in addr;
-
- rc = libcfs_ipif_query(ifname, &up, &ip, &netmask);
- if (rc != 0) {
- CERROR("Can't query IPoIB interface %s: %d\n",
- ifname, rc);
- goto failed;
- }
-
- if (!up) {
- CERROR("Can't query IPoIB interface %s: it's down\n",
- ifname);
- goto failed;
- }
-
- LIBCFS_ALLOC(ibdev, sizeof(*ibdev));
- if (ibdev == NULL)
- goto failed;
-
- memset(ibdev, 0, sizeof(*ibdev));
-
- INIT_LIST_HEAD(&ibdev->ibd_list); /* not yet in kib_devs */
- ibdev->ibd_ifip = ip;
- strcpy(&ibdev->ibd_ifname[0], ifname);
-
- id = rdma_create_id(kiblnd_cm_callback, ibdev, RDMA_PS_TCP);
- if (!IS_ERR(id)) {
- ibdev->ibd_cmid = id;
- } else {
- CERROR("Can't create listen ID: %ld\n", PTR_ERR(id));
- goto failed;
- }
-
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_port = htons(*kiblnd_tunables.kib_service);
- addr.sin_addr.s_addr = htonl(ip);
-
- rc = rdma_bind_addr(id, (struct sockaddr *)&addr);
- if (rc != 0) {
- CERROR("Can't bind to %s: %d\n", ifname, rc);
- goto failed;
- }
-
- /* Binding should have assigned me an IB device */
- LASSERT (id->device != NULL);
-
- pd = ib_alloc_pd(id->device);
- if (!IS_ERR(pd)) {
- ibdev->ibd_pd = pd;
- } else {
- CERROR("Can't allocate PD: %ld\n", PTR_ERR(pd));
- goto failed;
- }
-
-#if IBLND_MAP_ON_DEMAND
- /* MR for sends and receives */
- mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
-#else
- /* MR for sends, recieves _and_ RDMA...........v */
- mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE);
-#endif
- if (!IS_ERR(mr)) {
- ibdev->ibd_mr = mr;
- } else {
- CERROR("Can't get MR: %ld\n", PTR_ERR(mr));
- goto failed;
- }
-
- rc = rdma_listen(id, 0);
- if (rc != 0) {
- CERROR("Can't start listener: %d\n", rc);
- goto failed;
- }
-
- list_add_tail(&ibdev->ibd_list,
- &kiblnd_data.kib_devs);
- }
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip);
- net->ibn_dev = ibdev;
-
-#if IBLND_MAP_ON_DEMAND
- /* FMR pool for RDMA */
- {
- struct ib_fmr_pool *fmrpool;
- struct ib_fmr_pool_param param = {
- .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE,
- .page_shift = PAGE_SHIFT,
- .access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE),
- .pool_size = *kiblnd_tunables.kib_fmr_pool_size,
- .dirty_watermark = *kiblnd_tunables.kib_fmr_flush_trigger,
- .flush_function = NULL,
- .flush_arg = NULL,
- .cache = *kiblnd_tunables.kib_fmr_cache};
-
- if (*kiblnd_tunables.kib_fmr_pool_size <
- *kiblnd_tunables.kib_ntx) {
- CERROR("Can't set fmr pool size (%d) < ntx(%d)\n",
- *kiblnd_tunables.kib_fmr_pool_size,
- *kiblnd_tunables.kib_ntx);
- goto failed;
- }
-
- fmrpool = ib_create_fmr_pool(ibdev->ibd_pd, ¶m);
- if (!IS_ERR(fmrpool)) {
- net->ibn_fmrpool = fmrpool;
- } else {
- CERROR("Can't create FMR pool: %ld\n",
- PTR_ERR(fmrpool));
- goto failed;
- }
- }
-#endif
-
- kiblnd_map_tx_descs(ni);
-
- ibdev->ibd_nnets++;
- net->ibn_init = IBLND_INIT_ALL;
-
- return 0;
-
-failed:
- kiblnd_shutdown(ni);
-
- CDEBUG(D_NET, "kiblnd_startup failed\n");
- return -ENETDOWN;
-}
-
-void __exit
-kiblnd_module_fini (void)
-{
- lnet_unregister_lnd(&the_kiblnd);
- kiblnd_tunables_fini();
-}
-
-int __init
-kiblnd_module_init (void)
-{
- int rc;
-
- CLASSERT (sizeof(kib_msg_t) <= IBLND_MSG_SIZE);
-#if !IBLND_MAP_ON_DEMAND
- CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
- <= IBLND_MSG_SIZE);
- CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
- <= IBLND_MSG_SIZE);
-#endif
- rc = kiblnd_tunables_init();
- if (rc != 0)
- return rc;
-
- lnet_register_lnd(&the_kiblnd);
-
- return 0;
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel OpenIB gen2 LND v1.00");
-MODULE_LICENSE("GPL");
-
-module_init(kiblnd_module_init);
-module_exit(kiblnd_module_fini);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2006 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#include <linux/pci.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-
-#if !HAVE_GFP_T
-typedef int gfp_t;
-#endif
-
-#include <rdma/rdma_cm.h>
-#include <rdma/ib_cm.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_fmr_pool.h>
-
-/* tunables fixed at compile time */
-#ifdef CONFIG_SMP
-# define IBLND_N_SCHED num_online_cpus() /* # schedulers */
-#else
-# define IBLND_N_SCHED 1 /* # schedulers */
-#endif
-
-#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */
-#define IBLND_RESCHED 100 /* # scheduler loops before reschedule */
-#define IBLND_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */
-#define IBLND_CREDIT_HIGHWATER 7 /* when eagerly to return credits */
-#define IBLND_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */
-
-#define IBLND_MAP_ON_DEMAND 0
-#if IBLND_MAP_ON_DEMAND
-# define IBLND_MAX_RDMA_FRAGS 1
-#else
-# define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV
-#endif
-
-/************************/
-/* derived constants... */
-
-/* TX messages (shared by all connections) */
-#define IBLND_TX_MSGS() (*kiblnd_tunables.kib_ntx)
-#define IBLND_TX_MSG_BYTES() (IBLND_TX_MSGS() * IBLND_MSG_SIZE)
-#define IBLND_TX_MSG_PAGES() ((IBLND_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)
-
-/* RX messages (per connection) */
-#define IBLND_RX_MSGS (IBLND_MSG_QUEUE_SIZE * 2)
-#define IBLND_RX_MSG_BYTES (IBLND_RX_MSGS * IBLND_MSG_SIZE)
-#define IBLND_RX_MSG_PAGES ((IBLND_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
-
-/* WRs and CQEs (per connection) */
-#define IBLND_RECV_WRS IBLND_RX_MSGS
-#define IBLND_SEND_WRS ((*kiblnd_tunables.kib_concurrent_sends) * \
- (1 + IBLND_MAX_RDMA_FRAGS))
-#define IBLND_CQ_ENTRIES() (IBLND_RECV_WRS + IBLND_SEND_WRS)
-
-typedef struct
-{
- unsigned int *kib_service; /* IB service number */
- int *kib_min_reconnect_interval; /* first failed connection retry... */
- int *kib_max_reconnect_interval; /* ...exponentially increasing to this */
- int *kib_cksum; /* checksum kib_msg_t? */
- int *kib_timeout; /* comms timeout (seconds) */
- int *kib_keepalive; /* keepalive timeout (seconds) */
- int *kib_ntx; /* # tx descs */
- int *kib_credits; /* # concurrent sends */
- int *kib_peercredits; /* # concurrent sends to 1 peer */
- char **kib_default_ipif; /* default IPoIB interface */
- int *kib_retry_count;
- int *kib_rnr_retry_count;
- int *kib_concurrent_sends; /* send work queue sizing */
- int *kib_ib_mtu; /* IB MTU */
-#if IBLND_MAP_ON_DEMAND
- int *kib_fmr_pool_size; /* # FMRs in pool */
- int *kib_fmr_flush_trigger; /* When to trigger FMR flush */
- int *kib_fmr_cache; /* enable FMR pool cache? */
-#endif
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */
-#endif
-} kib_tunables_t;
-
-typedef struct
-{
- int ibp_npages; /* # pages */
- struct page *ibp_pages[0];
-} kib_pages_t;
-
-typedef struct
-{
- struct list_head ibd_list; /* chain on kib_devs */
- __u32 ibd_ifip; /* IPoIB interface IP */
- char ibd_ifname[32]; /* IPoIB interface name */
- int ibd_nnets; /* # nets extant */
-
- struct rdma_cm_id *ibd_cmid; /* IB listener (bound to 1 device) */
- struct ib_pd *ibd_pd; /* PD for the device */
- struct ib_mr *ibd_mr; /* MR for non RDMA I/O */
-} kib_dev_t;
-
-typedef struct
-{
- __u64 ibn_incarnation; /* my epoch */
- int ibn_init; /* initialisation state */
- int ibn_shutdown; /* shutting down? */
-
- atomic_t ibn_npeers; /* # peers extant */
- atomic_t ibn_nconns; /* # connections extant */
-
- struct kib_tx *ibn_tx_descs; /* all the tx descriptors */
- kib_pages_t *ibn_tx_pages; /* premapped tx msg pages */
- struct list_head ibn_idle_txs; /* idle tx descriptors */
- spinlock_t ibn_tx_lock; /* serialise */
-
-#if IBLND_MAP_ON_DEMAND
- struct ib_fmr_pool *ibn_fmrpool; /* FMR pool for RDMA I/O */
-#endif
-
- kib_dev_t *ibn_dev; /* underlying IB device */
-} kib_net_t;
-
-typedef struct
-{
- int kib_init; /* initialisation state */
- int kib_shutdown; /* shut down? */
- struct list_head kib_devs; /* IB devices extant */
- atomic_t kib_nthreads; /* # live threads */
- rwlock_t kib_global_lock; /* stabilize net/dev/peer/conn ops */
-
- struct list_head *kib_peers; /* hash table of all my known peers */
- int kib_peer_hash_size; /* size of kib_peers */
-
- void *kib_connd; /* the connd task (serialisation assertions) */
- struct list_head kib_connd_conns; /* connections to setup/teardown */
- struct list_head kib_connd_zombies; /* connections with zero refcount */
- wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */
- spinlock_t kib_connd_lock; /* serialise */
-
- wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */
- struct list_head kib_sched_conns; /* conns to check for rx completions */
- spinlock_t kib_sched_lock; /* serialise */
-
- __u64 kib_next_tx_cookie; /* RDMA completion cookie */
- struct ib_qp_attr kib_error_qpa; /* QP->ERROR */
-} kib_data_t;
-
-#define IBLND_INIT_NOTHING 0
-#define IBLND_INIT_DATA 1
-#define IBLND_INIT_ALL 2
-
-/************************************************************************
- * IB Wire message format.
- * These are sent in sender's byte order (i.e. receiver flips).
- */
-
-typedef struct kib_connparams
-{
- __u16 ibcp_queue_depth;
- __u16 ibcp_max_frags;
- __u32 ibcp_max_msg_size;
-} WIRE_ATTR kib_connparams_t;
-
-typedef struct
-{
- lnet_hdr_t ibim_hdr; /* portals header */
- char ibim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR kib_immediate_msg_t;
-
-#if IBLND_MAP_ON_DEMAND
-typedef struct
-{
- __u64 rd_addr; /* IO VMA address */
- __u32 rd_nob; /* # of bytes */
- __u32 rd_key; /* remote key */
-} WIRE_ATTR kib_rdma_desc_t;
-#else
-typedef struct
-{
- __u32 rf_nob; /* # bytes this frag */
- __u64 rf_addr; /* CAVEAT EMPTOR: misaligned!! */
-} WIRE_ATTR kib_rdma_frag_t;
-
-typedef struct
-{
- __u32 rd_key; /* local/remote key */
- __u32 rd_nfrags; /* # fragments */
- kib_rdma_frag_t rd_frags[0]; /* buffer frags */
-} WIRE_ATTR kib_rdma_desc_t;
-#endif
-
-typedef struct
-{
- lnet_hdr_t ibprm_hdr; /* portals header */
- __u64 ibprm_cookie; /* opaque completion cookie */
-} WIRE_ATTR kib_putreq_msg_t;
-
-typedef struct
-{
- __u64 ibpam_src_cookie; /* reflected completion cookie */
- __u64 ibpam_dst_cookie; /* opaque completion cookie */
- kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */
-} WIRE_ATTR kib_putack_msg_t;
-
-typedef struct
-{
- lnet_hdr_t ibgm_hdr; /* portals header */
- __u64 ibgm_cookie; /* opaque completion cookie */
- kib_rdma_desc_t ibgm_rd; /* rdma descriptor */
-} WIRE_ATTR kib_get_msg_t;
-
-typedef struct
-{
- __u64 ibcm_cookie; /* opaque completion cookie */
- __s32 ibcm_status; /* < 0 failure: >= 0 length */
-} WIRE_ATTR kib_completion_msg_t;
-
-typedef struct
-{
- /* First 2 fields fixed FOR ALL TIME */
- __u32 ibm_magic; /* I'm an openibnal message */
- __u16 ibm_version; /* this is my version number */
-
- __u8 ibm_type; /* msg type */
- __u8 ibm_credits; /* returned credits */
- __u32 ibm_nob; /* # bytes in whole message */
- __u32 ibm_cksum; /* checksum (0 == no checksum) */
- __u64 ibm_srcnid; /* sender's NID */
- __u64 ibm_srcstamp; /* sender's incarnation */
- __u64 ibm_dstnid; /* destination's NID */
- __u64 ibm_dststamp; /* destination's incarnation */
-
- union {
- kib_connparams_t connparams;
- kib_immediate_msg_t immediate;
- kib_putreq_msg_t putreq;
- kib_putack_msg_t putack;
- kib_get_msg_t get;
- kib_completion_msg_t completion;
- } WIRE_ATTR ibm_u;
-} WIRE_ATTR kib_msg_t;
-
-#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC /* unique magic */
-
-#define IBLND_MSG_VERSION 0x11
-
-#define IBLND_MSG_CONNREQ 0xc0 /* connection request */
-#define IBLND_MSG_CONNACK 0xc1 /* connection acknowledge */
-#define IBLND_MSG_NOOP 0xd0 /* nothing (just credits) */
-#define IBLND_MSG_IMMEDIATE 0xd1 /* immediate */
-#define IBLND_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */
-#define IBLND_MSG_PUT_NAK 0xd3 /* completion (sink->src) */
-#define IBLND_MSG_PUT_ACK 0xd4 /* putack (sink->src) */
-#define IBLND_MSG_PUT_DONE 0xd5 /* completion (src->sink) */
-#define IBLND_MSG_GET_REQ 0xd6 /* getreq (sink->src) */
-#define IBLND_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */
-
-typedef struct {
- __u32 ibr_magic; /* sender's magic */
- __u16 ibr_version; /* sender's version */
- __u8 ibr_why; /* reject reason */
-} WIRE_ATTR kib_rej_t;
-
-
-/* connection rejection reasons */
-#define IBLND_REJECT_CONN_RACE 1 /* You lost connection race */
-#define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */
-#define IBLND_REJECT_FATAL 3 /* Anything else */
-
-/***********************************************************************/
-
-typedef struct kib_rx /* receive message */
-{
- struct list_head rx_list; /* queue for attention */
- struct kib_conn *rx_conn; /* owning conn */
- int rx_nob; /* # bytes received (-1 while posted) */
- enum ib_wc_status rx_status; /* completion status */
- kib_msg_t *rx_msg; /* message buffer (host vaddr) */
- __u64 rx_msgaddr; /* message buffer (I/O addr) */
- DECLARE_PCI_UNMAP_ADDR (rx_msgunmap); /* for dma_unmap_single() */
- struct ib_recv_wr rx_wrq; /* receive work item... */
- struct ib_sge rx_sge; /* ...and its memory */
-} kib_rx_t;
-
-#define IBLND_POSTRX_DONT_POST 0 /* don't post */
-#define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */
-#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */
-#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give myself back 1 reserved credit */
-
-typedef struct kib_tx /* transmit message */
-{
- struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */
- struct kib_conn *tx_conn; /* owning conn */
- int tx_sending; /* # tx callbacks outstanding */
- int tx_queued; /* queued for sending */
- int tx_waiting; /* waiting for peer */
- int tx_status; /* LNET completion status */
- unsigned long tx_deadline; /* completion deadline */
- __u64 tx_cookie; /* completion cookie */
- lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */
- kib_msg_t *tx_msg; /* message buffer (host vaddr) */
- __u64 tx_msgaddr; /* message buffer (I/O addr) */
- DECLARE_PCI_UNMAP_ADDR (tx_msgunmap); /* for dma_unmap_single() */
- int tx_nwrq; /* # send work items */
-#if IBLND_MAP_ON_DEMAND
- struct ib_send_wr tx_wrq[2]; /* send work items... */
- struct ib_sge tx_sge[2]; /* ...and their memory */
- kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */
- __u64 *tx_pages; /* rdma phys page addrs */
- struct ib_pool_fmr *tx_fmr; /* rdma mapping (mapped if != NULL) */
-#else
- struct ib_send_wr *tx_wrq; /* send work items... */
- struct ib_sge *tx_sge; /* ...and their memory */
- kib_rdma_desc_t *tx_rd; /* rdma descriptor */
- int tx_nfrags; /* # entries in... */
- struct scatterlist *tx_frags; /* dma_map_sg descriptor */
- int tx_dmadir; /* dma direction */
-#endif
-} kib_tx_t;
-
-typedef struct kib_connvars
-{
- /* connection-in-progress variables */
- kib_msg_t cv_msg;
-} kib_connvars_t;
-
-typedef struct kib_conn
-{
- struct kib_peer *ibc_peer; /* owning peer */
- struct list_head ibc_list; /* stash on peer's conn list */
- struct list_head ibc_sched_list; /* schedule for attention */
- __u64 ibc_incarnation; /* which instance of the peer */
- atomic_t ibc_refcount; /* # users */
- int ibc_state; /* what's happening */
- int ibc_nsends_posted; /* # uncompleted sends */
- int ibc_credits; /* # credits I have */
- int ibc_outstanding_credits; /* # credits to return */
- int ibc_reserved_credits;/* # ACK/DONE msg credits */
- int ibc_comms_error; /* set on comms error */
- int ibc_nrx:8; /* receive buffers owned */
- int ibc_scheduled:1; /* scheduled for attention */
- int ibc_ready:1; /* CQ callback fired */
- unsigned long ibc_last_send; /* time of last send */
- struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */
- struct list_head ibc_tx_noops; /* IBLND_MSG_NOOPs */
- struct list_head ibc_tx_queue; /* sends that need a credit */
- struct list_head ibc_tx_queue_nocred;/* sends that don't need a credit */
- struct list_head ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */
- struct list_head ibc_active_txs; /* active tx awaiting completion */
- spinlock_t ibc_lock; /* serialise */
- kib_rx_t *ibc_rxs; /* the rx descs */
- kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */
-
- struct rdma_cm_id *ibc_cmid; /* CM id */
- struct ib_cq *ibc_cq; /* completion queue */
-
- kib_connvars_t *ibc_connvars; /* in-progress connection state */
-} kib_conn_t;
-
-#define IBLND_CONN_INIT 0 /* being intialised */
-#define IBLND_CONN_ACTIVE_CONNECT 1 /* active sending req */
-#define IBLND_CONN_PASSIVE_WAIT 2 /* passive waiting for rtu */
-#define IBLND_CONN_ESTABLISHED 3 /* connection established */
-#define IBLND_CONN_CLOSING 4 /* being closed */
-#define IBLND_CONN_DISCONNECTED 5 /* disconnected */
-
-typedef struct kib_peer
-{
- struct list_head ibp_list; /* stash on global peer list */
- lnet_nid_t ibp_nid; /* who's on the other end(s) */
- lnet_ni_t *ibp_ni; /* LNet interface */
- atomic_t ibp_refcount; /* # users */
- struct list_head ibp_conns; /* all active connections */
- struct list_head ibp_tx_queue; /* msgs waiting for a conn */
- int ibp_connecting; /* current active connection attempts */
- int ibp_accepting; /* current passive connection attempts */
- int ibp_error; /* errno on closing this peer */
- cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */
-} kib_peer_t;
-
-
-extern kib_data_t kiblnd_data;
-extern kib_tunables_t kiblnd_tunables;
-
-#define kiblnd_conn_addref(conn) \
-do { \
- CDEBUG(D_NET, "conn[%p] (%d)++\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \
- atomic_inc(&(conn)->ibc_refcount); \
-} while (0)
-
-#define kiblnd_conn_decref(conn) \
-do { \
- unsigned long flags; \
- \
- CDEBUG(D_NET, "conn[%p] (%d)--\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \
- if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \
- spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); \
- list_add_tail(&(conn)->ibc_list, \
- &kiblnd_data.kib_connd_zombies); \
- wake_up(&kiblnd_data.kib_connd_waitq); \
- spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); \
- } \
-} while (0)
-
-#define kiblnd_peer_addref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \
- atomic_inc(&(peer)->ibp_refcount); \
-} while (0)
-
-#define kiblnd_peer_decref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \
- if (atomic_dec_and_test(&(peer)->ibp_refcount)) \
- kiblnd_destroy_peer(peer); \
-} while (0)
-
-static inline struct list_head *
-kiblnd_nid2peerlist (lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % kiblnd_data.kib_peer_hash_size;
-
- return (&kiblnd_data.kib_peers [hash]);
-}
-
-static inline int
-kiblnd_peer_active (kib_peer_t *peer)
-{
- /* Am I in the peer hash table? */
- return (!list_empty(&peer->ibp_list));
-}
-
-static inline kib_conn_t *
-kiblnd_get_conn_locked (kib_peer_t *peer)
-{
- LASSERT (!list_empty(&peer->ibp_conns));
-
- /* just return the first connection */
- return list_entry(peer->ibp_conns.next, kib_conn_t, ibc_list);
-}
-
-static inline int
-kiblnd_send_keepalive(kib_conn_t *conn)
-{
- return (*kiblnd_tunables.kib_keepalive > 0) &&
- time_after(jiffies, conn->ibc_last_send +
- *kiblnd_tunables.kib_keepalive*HZ);
-}
-
-static inline int
-kiblnd_send_noop(kib_conn_t *conn)
-{
- LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- if (conn->ibc_outstanding_credits < IBLND_CREDIT_HIGHWATER &&
- !kiblnd_send_keepalive(conn))
- return 0; /* No need to send NOOP */
-
- if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */
- !list_empty(&conn->ibc_tx_queue_nocred) || /* can be piggybacked */
- conn->ibc_credits == 0) /* no credit */
- return 0;
-
- if (conn->ibc_credits == 1 && /* last credit reserved for */
- conn->ibc_outstanding_credits == 0) /* giving back credits */
- return 0;
-
- /* No tx to piggyback NOOP onto or no credit to send a tx */
- return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1);
-}
-
-static inline void
-kiblnd_abort_receives(kib_conn_t *conn)
-{
- ib_modify_qp(conn->ibc_cmid->qp,
- &kiblnd_data.kib_error_qpa, IB_QP_STATE);
-}
-
-/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the
- * lowest bits of the work request id to stash the work item type. */
-
-#define IBLND_WID_TX 0
-#define IBLND_WID_RDMA 1
-#define IBLND_WID_RX 2
-#define IBLND_WID_MASK 3UL
-
-static inline __u64
-kiblnd_ptr2wreqid (void *ptr, int type)
-{
- unsigned long lptr = (unsigned long)ptr;
-
- LASSERT ((lptr & IBLND_WID_MASK) == 0);
- LASSERT ((type & ~IBLND_WID_MASK) == 0);
- return (__u64)(lptr | type);
-}
-
-static inline void *
-kiblnd_wreqid2ptr (__u64 wreqid)
-{
- return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK);
-}
-
-static inline int
-kiblnd_wreqid2type (__u64 wreqid)
-{
- return (wreqid & IBLND_WID_MASK);
-}
-
-static inline void
-kiblnd_set_conn_state (kib_conn_t *conn, int state)
-{
- conn->ibc_state = state;
- mb();
-}
-
-#if IBLND_MAP_ON_DEMAND
-static inline int
-kiblnd_rd_size (kib_rdma_desc_t *rd)
-{
- return rd->rd_nob;
-}
-#else
-static inline int
-kiblnd_rd_size (kib_rdma_desc_t *rd)
-{
- int i;
- int size;
-
- for (i = size = 0; i < rd->rd_nfrags; i++)
- size += rd->rd_frags[i].rf_nob;
-
- return size;
-}
-#endif
-
-#if (IBLND_OFED_VERSION == 1020) || (IBLND_OFED_VERSION == 1025)
-
-static inline __u64 kiblnd_dma_map_single(struct ib_device *dev,
- void *msg, size_t size,
- enum dma_data_direction direction)
-{
- return ib_dma_map_single(dev, msg, size, direction);
-}
-
-static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
- __u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- ib_dma_unmap_single(dev, addr, size, direction);
-}
-
-#define KIBLND_UNMAP_ADDR_SET(p, m, a) do {} while (0)
-#define KIBLND_UNMAP_ADDR(p, m, a) (a)
-
-static inline int kiblnd_dma_map_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- return ib_dma_map_sg(dev, sg, nents, direction);
-}
-
-static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- ib_dma_unmap_sg(dev, sg, nents, direction);
-}
-
-static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return ib_sg_dma_address(dev, sg);
-}
-
-static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return ib_sg_dma_len(dev, sg);
-}
-
-/* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly
- * right because OFED1.2 defines it as const, to use it we have to add
- * (void *) cast to overcome "const" */
-
-#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data)
-#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
-
-#elif (IBLND_OFED_VERSION == 1010)
-
-static inline dma_addr_t kiblnd_dma_map_single(struct ib_device *dev,
- void *msg, size_t size,
- enum dma_data_direction direction)
-{
- return dma_map_single(dev->dma_device, msg, size, direction);
-}
-
-static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
- dma_addr_t addr, size_t size,
- enum dma_data_direction direction)
-{
- dma_unmap_single(dev->dma_device, addr, size, direction);
-}
-
-#define KIBLND_UNMAP_ADDR_SET(p, m, a) pci_unmap_addr_set(p, m, a)
-#define KIBLND_UNMAP_ADDR(p, m, a) pci_unmap_addr(p, m)
-
-static inline int kiblnd_dma_map_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- return dma_map_sg(dev->dma_device, sg, nents, direction);
-}
-
-static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- return dma_unmap_sg(dev->dma_device, sg, nents, direction);
-}
-
-
-static inline dma_addr_t kiblnd_sg_dma_address(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg_dma_address(sg);
-}
-
-
-static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg_dma_len(sg);
-}
-
-#define KIBLND_CONN_PARAM(e) ((e)->private_data)
-#define KIBLND_CONN_PARAM_LEN(e) ((e)->private_data_len)
-
-#endif
-
-int kiblnd_startup (lnet_ni_t *ni);
-void kiblnd_shutdown (lnet_ni_t *ni);
-int kiblnd_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg);
-
-int kiblnd_tunables_init(void);
-void kiblnd_tunables_fini(void);
-
-int kiblnd_connd (void *arg);
-int kiblnd_scheduler(void *arg);
-int kiblnd_thread_start (int (*fn)(void *arg), void *arg);
-
-int kiblnd_alloc_pages (kib_pages_t **pp, int npages);
-void kiblnd_free_pages (kib_pages_t *p);
-
-int kiblnd_cm_callback(struct rdma_cm_id *cmid,
- struct rdma_cm_event *event);
-
-int kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
-void kiblnd_destroy_peer (kib_peer_t *peer);
-void kiblnd_destroy_dev (kib_dev_t *dev);
-void kiblnd_unlink_peer_locked (kib_peer_t *peer);
-void kiblnd_peer_alive (kib_peer_t *peer);
-kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid);
-void kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error);
-int kiblnd_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation);
-
-void kiblnd_connreq_done(kib_conn_t *conn, int status);
-kib_conn_t *kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid,
- int state);
-void kiblnd_destroy_conn (kib_conn_t *conn);
-void kiblnd_close_conn (kib_conn_t *conn, int error);
-void kiblnd_close_conn_locked (kib_conn_t *conn, int error);
-
-int kiblnd_init_rdma (lnet_ni_t *ni, kib_tx_t *tx, int type,
- int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie);
-
-void kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn);
-void kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn);
-void kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob);
-void kiblnd_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int status);
-void kiblnd_check_sends (kib_conn_t *conn);
-
-void kiblnd_qp_event(struct ib_event *event, void *arg);
-void kiblnd_cq_event(struct ib_event *event, void *arg);
-void kiblnd_cq_completion(struct ib_cq *cq, void *arg);
-
-void kiblnd_init_msg (kib_msg_t *msg, int type, int body_nob);
-void kiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg,
- int credits, lnet_nid_t dstnid, __u64 dststamp);
-int kiblnd_unpack_msg(kib_msg_t *msg, int nob);
-int kiblnd_post_rx (kib_rx_t *rx, int credit);
-
-int kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-
-
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2006 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "o2iblnd.h"
-
-char *
-kiblnd_msgtype2str(int type)
-{
- switch (type) {
- case IBLND_MSG_CONNREQ:
- return "CONNREQ";
-
- case IBLND_MSG_CONNACK:
- return "CONNACK";
-
- case IBLND_MSG_NOOP:
- return "NOOP";
-
- case IBLND_MSG_IMMEDIATE:
- return "IMMEDIATE";
-
- case IBLND_MSG_PUT_REQ:
- return "PUT_REQ";
-
- case IBLND_MSG_PUT_NAK:
- return "PUT_NAK";
-
- case IBLND_MSG_PUT_ACK:
- return "PUT_ACK";
-
- case IBLND_MSG_PUT_DONE:
- return "PUT_DONE";
-
- case IBLND_MSG_GET_REQ:
- return "GET_REQ";
-
- case IBLND_MSG_GET_DONE:
- return "GET_DONE";
-
- default:
- return "???";
- }
-}
-
-void
-kiblnd_tx_done (lnet_ni_t *ni, kib_tx_t *tx)
-{
- lnet_msg_t *lntmsg[2];
- kib_net_t *net = ni->ni_data;
- int rc;
- int i;
-
- LASSERT (net != NULL);
- LASSERT (!in_interrupt());
- LASSERT (!tx->tx_queued); /* mustn't be queued for sending */
- LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */
- LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */
-
-#if IBLND_MAP_ON_DEMAND
- if (tx->tx_fmr != NULL) {
- rc = ib_fmr_pool_unmap(tx->tx_fmr);
- LASSERT (rc == 0);
-
- if (tx->tx_status != 0) {
- rc = ib_flush_fmr_pool(net->ibn_fmrpool);
- LASSERT (rc == 0);
- }
-
- tx->tx_fmr = NULL;
- }
-#else
- if (tx->tx_nfrags != 0) {
- kiblnd_dma_unmap_sg(net->ibn_dev->ibd_cmid->device,
- tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
- tx->tx_nfrags = 0;
- }
-#endif
- /* tx may have up to 2 lnet msgs to finalise */
- lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
- lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
- rc = tx->tx_status;
-
- if (tx->tx_conn != NULL) {
- LASSERT (ni == tx->tx_conn->ibc_peer->ibp_ni);
-
- kiblnd_conn_decref(tx->tx_conn);
- tx->tx_conn = NULL;
- }
-
- tx->tx_nwrq = 0;
- tx->tx_status = 0;
-
- spin_lock(&net->ibn_tx_lock);
-
- list_add(&tx->tx_list, &net->ibn_idle_txs);
-
- spin_unlock(&net->ibn_tx_lock);
-
- /* delay finalize until my descs have been freed */
- for (i = 0; i < 2; i++) {
- if (lntmsg[i] == NULL)
- continue;
-
- lnet_finalize(ni, lntmsg[i], rc);
- }
-}
-
-void
-kiblnd_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int status)
-{
- kib_tx_t *tx;
-
- while (!list_empty (txlist)) {
- tx = list_entry (txlist->next, kib_tx_t, tx_list);
-
- list_del (&tx->tx_list);
- /* complete now */
- tx->tx_waiting = 0;
- tx->tx_status = status;
- kiblnd_tx_done(ni, tx);
- }
-}
-
-kib_tx_t *
-kiblnd_get_idle_tx (lnet_ni_t *ni)
-{
- kib_net_t *net = ni->ni_data;
- kib_tx_t *tx;
-
- LASSERT (net != NULL);
-
- spin_lock(&net->ibn_tx_lock);
-
- if (list_empty(&net->ibn_idle_txs)) {
- spin_unlock(&net->ibn_tx_lock);
- return NULL;
- }
-
- tx = list_entry(net->ibn_idle_txs.next, kib_tx_t, tx_list);
- list_del(&tx->tx_list);
-
- /* Allocate a new completion cookie. It might not be needed,
- * but we've got a lock right now and we're unlikely to
- * wrap... */
- tx->tx_cookie = kiblnd_data.kib_next_tx_cookie++;
-
- spin_unlock(&net->ibn_tx_lock);
-
- LASSERT (tx->tx_nwrq == 0);
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_sending == 0);
- LASSERT (!tx->tx_waiting);
- LASSERT (tx->tx_status == 0);
- LASSERT (tx->tx_conn == NULL);
- LASSERT (tx->tx_lntmsg[0] == NULL);
- LASSERT (tx->tx_lntmsg[1] == NULL);
-#if IBLND_MAP_ON_DEMAND
- LASSERT (tx->tx_fmr == NULL);
-#else
- LASSERT (tx->tx_nfrags == 0);
-#endif
-
- return tx;
-}
-
-void
-kiblnd_drop_rx (kib_rx_t *rx)
-{
- kib_conn_t *conn = rx->rx_conn;
- unsigned long flags;
-
- spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags);
- LASSERT (conn->ibc_nrx > 0);
- conn->ibc_nrx--;
- spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags);
-
- kiblnd_conn_decref(conn);
-}
-
-int
-kiblnd_post_rx (kib_rx_t *rx, int credit)
-{
- kib_conn_t *conn = rx->rx_conn;
- kib_net_t *net = conn->ibc_peer->ibp_ni->ni_data;
- struct ib_recv_wr *bad_wrq;
- int rc;
-
- LASSERT (net != NULL);
- LASSERT (!in_interrupt());
- LASSERT (credit == IBLND_POSTRX_NO_CREDIT ||
- credit == IBLND_POSTRX_PEER_CREDIT ||
- credit == IBLND_POSTRX_RSRVD_CREDIT);
-
- rx->rx_sge.length = IBLND_MSG_SIZE;
- rx->rx_sge.lkey = net->ibn_dev->ibd_mr->lkey;
- rx->rx_sge.addr = rx->rx_msgaddr;
-
- rx->rx_wrq.next = NULL;
- rx->rx_wrq.sg_list = &rx->rx_sge;
- rx->rx_wrq.num_sge = 1;
- rx->rx_wrq.wr_id = kiblnd_ptr2wreqid(rx, IBLND_WID_RX);
-
- LASSERT (conn->ibc_state >= IBLND_CONN_INIT);
- LASSERT (rx->rx_nob >= 0); /* not posted */
-
- if (conn->ibc_state > IBLND_CONN_ESTABLISHED) {
- kiblnd_drop_rx(rx); /* No more posts for this rx */
- return 0;
- }
-
- rx->rx_nob = -1; /* flag posted */
-
- rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq);
-
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */
- return rc;
-
- if (rc != 0) {
- CERROR("Can't post rx for %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kiblnd_close_conn(conn, rc);
- kiblnd_drop_rx(rx); /* No more posts for this rx */
- return rc;
- }
-
- if (credit == IBLND_POSTRX_NO_CREDIT)
- return 0;
-
- spin_lock(&conn->ibc_lock);
- if (credit == IBLND_POSTRX_PEER_CREDIT)
- conn->ibc_outstanding_credits++;
- else
- conn->ibc_reserved_credits++;
- spin_unlock(&conn->ibc_lock);
-
- kiblnd_check_sends(conn);
- return 0;
-}
-
-kib_tx_t *
-kiblnd_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie)
-{
- struct list_head *tmp;
-
- list_for_each(tmp, &conn->ibc_active_txs) {
- kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
-
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_sending != 0 || tx->tx_waiting);
-
- if (tx->tx_cookie != cookie)
- continue;
-
- if (tx->tx_waiting &&
- tx->tx_msg->ibm_type == txtype)
- return tx;
-
- CWARN("Bad completion: %swaiting, type %x (wanted %x)\n",
- tx->tx_waiting ? "" : "NOT ",
- tx->tx_msg->ibm_type, txtype);
- }
- return NULL;
-}
-
-void
-kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie)
-{
- kib_tx_t *tx;
- lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
- int idle;
-
- spin_lock(&conn->ibc_lock);
-
- tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie);
- if (tx == NULL) {
- spin_unlock(&conn->ibc_lock);
-
- CWARN("Unmatched completion type %x cookie "LPX64" from %s\n",
- txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_close_conn(conn, -EPROTO);
- return;
- }
-
- if (tx->tx_status == 0) { /* success so far */
- if (status < 0) { /* failed? */
- tx->tx_status = status;
- } else if (txtype == IBLND_MSG_GET_REQ) {
- lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status);
- }
- }
-
- tx->tx_waiting = 0;
-
- idle = !tx->tx_queued && (tx->tx_sending == 0);
- if (idle)
- list_del(&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kiblnd_tx_done(ni, tx);
-}
-
-void
-kiblnd_send_completion (kib_conn_t *conn, int type, int status, __u64 cookie)
-{
- lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
- kib_tx_t *tx = kiblnd_get_idle_tx(ni);
-
- if (tx == NULL) {
- CERROR("Can't get tx for completion %x for %s\n",
- type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
- }
-
- tx->tx_msg->ibm_u.completion.ibcm_status = status;
- tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
- kiblnd_init_tx_msg(ni, tx, type, sizeof(kib_completion_msg_t));
-
- kiblnd_queue_tx(tx, conn);
-}
-
-void
-kiblnd_handle_rx (kib_rx_t *rx)
-{
- kib_msg_t *msg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
- int credits = msg->ibm_credits;
- kib_tx_t *tx;
- int rc = 0;
- int rc2;
- int post_credit;
-
- LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- CDEBUG (D_NET, "Received %x[%d] from %s\n",
- msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- if (credits != 0) {
- /* Have I received credits that will let me send? */
- spin_lock(&conn->ibc_lock);
-
- if (conn->ibc_credits + credits > IBLND_MSG_QUEUE_SIZE) {
- rc2 = conn->ibc_credits;
- spin_unlock(&conn->ibc_lock);
-
- CERROR("Bad credits from %s: %d + %d > %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- rc2, credits, IBLND_MSG_QUEUE_SIZE);
-
- kiblnd_close_conn(conn, -EPROTO);
- kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT);
- return;
- }
-
- conn->ibc_credits += credits;
-
- /* This ensures the credit taken by NOOP can be returned */
- if (msg->ibm_type == IBLND_MSG_NOOP)
- conn->ibc_outstanding_credits++;
-
- spin_unlock(&conn->ibc_lock);
- kiblnd_check_sends(conn);
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Bad IBLND message type %x from %s\n",
- msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- post_credit = IBLND_POSTRX_NO_CREDIT;
- rc = -EPROTO;
- break;
-
- case IBLND_MSG_NOOP:
- if (credits != 0) /* credit already posted */
- post_credit = IBLND_POSTRX_NO_CREDIT;
- else /* a keepalive NOOP */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_IMMEDIATE:
- post_credit = IBLND_POSTRX_DONT_POST;
- rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr,
- msg->ibm_srcnid, rx, 0);
- if (rc < 0) /* repost on error */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_PUT_REQ:
- post_credit = IBLND_POSTRX_DONT_POST;
- rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr,
- msg->ibm_srcnid, rx, 1);
- if (rc < 0) /* repost on error */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_PUT_NAK:
- CWARN ("PUT_NACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid));
- post_credit = IBLND_POSTRX_RSRVD_CREDIT;
- kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBLND_MSG_PUT_ACK:
- post_credit = IBLND_POSTRX_RSRVD_CREDIT;
-
- spin_lock(&conn->ibc_lock);
- tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ,
- msg->ibm_u.putack.ibpam_src_cookie);
- if (tx != NULL)
- list_del(&tx->tx_list);
- spin_unlock(&conn->ibc_lock);
-
- if (tx == NULL) {
- CERROR("Unmatched PUT_ACK from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- rc = -EPROTO;
- break;
- }
-
- LASSERT (tx->tx_waiting);
- /* CAVEAT EMPTOR: I could be racing with tx_complete, but...
- * (a) I can overwrite tx_msg since my peer has received it!
- * (b) tx_waiting set tells tx_complete() it's not done. */
-
- tx->tx_nwrq = 0; /* overwrite PUT_REQ */
-
- rc2 = kiblnd_init_rdma(ni, tx, IBLND_MSG_PUT_DONE,
- kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd),
- &msg->ibm_u.putack.ibpam_rd,
- msg->ibm_u.putack.ibpam_dst_cookie);
- if (rc2 < 0)
- CERROR("Can't setup rdma for PUT to %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2);
-
- spin_lock(&conn->ibc_lock);
- tx->tx_waiting = 0; /* clear waiting and queue atomically */
- kiblnd_queue_tx_locked(tx, conn);
- spin_unlock(&conn->ibc_lock);
- break;
-
- case IBLND_MSG_PUT_DONE:
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBLND_MSG_GET_REQ:
- post_credit = IBLND_POSTRX_DONT_POST;
- rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr,
- msg->ibm_srcnid, rx, 1);
- if (rc < 0) /* repost on error */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_GET_DONE:
- post_credit = IBLND_POSTRX_RSRVD_CREDIT;
- kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
- }
-
- if (rc < 0) /* protocol error */
- kiblnd_close_conn(conn, rc);
-
- if (post_credit != IBLND_POSTRX_DONT_POST)
- kiblnd_post_rx(rx, post_credit);
-}
-
-void
-kiblnd_rx_complete (kib_rx_t *rx, int status, int nob)
-{
- kib_msg_t *msg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
- kib_net_t *net = ni->ni_data;
- unsigned long flags;
- int rc;
- int err = -EIO;
-
- LASSERT (net != NULL);
- LASSERT (rx->rx_nob < 0); /* was posted */
- rx->rx_nob = 0; /* isn't now */
-
- if (conn->ibc_state > IBLND_CONN_ESTABLISHED)
- goto ignore;
-
- if (status != IB_WC_SUCCESS) {
- CDEBUG(D_NETERROR, "Rx from %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), status);
- goto failed;
- }
-
- LASSERT (nob >= 0);
- rx->rx_nob = nob;
-
- rc = kiblnd_unpack_msg(msg, rx->rx_nob);
- if (rc != 0) {
- CERROR ("Error %d unpacking rx from %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- goto failed;
- }
-
- if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
- msg->ibm_dstnid != ni->ni_nid ||
- msg->ibm_srcstamp != conn->ibc_incarnation ||
- msg->ibm_dststamp != net->ibn_incarnation) {
- CERROR ("Stale rx from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- err = -ESTALE;
- goto failed;
- }
-
- /* set time last known alive */
- kiblnd_peer_alive(conn->ibc_peer);
-
- /* racing with connection establishment/teardown! */
-
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- /* must check holding global lock to eliminate race */
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- list_add_tail(&rx->rx_list, &conn->ibc_early_rxs);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock,
- flags);
- return;
- }
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock,
- flags);
- }
- kiblnd_handle_rx(rx);
- return;
-
- failed:
- CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
- kiblnd_close_conn(conn, err);
- ignore:
- kiblnd_drop_rx(rx); /* Don't re-post rx. */
-}
-
-struct page *
-kiblnd_kvaddr_to_page (unsigned long vaddr)
-{
- struct page *page;
-
- if (vaddr >= VMALLOC_START &&
- vaddr < VMALLOC_END) {
- page = vmalloc_to_page ((void *)vaddr);
- LASSERT (page != NULL);
- return page;
- }
-#ifdef CONFIG_HIGHMEM
- if (vaddr >= PKMAP_BASE &&
- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
- /* No highmem pages only used for bulk (kiov) I/O */
- CERROR("find page for address in highmem\n");
- LBUG();
- }
-#endif
- page = virt_to_page (vaddr);
- LASSERT (page != NULL);
- return page;
-}
-
-#if !IBLND_MAP_ON_DEMAND
-int
-kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
- unsigned int niov, struct iovec *iov, int offset, int nob)
-
-{
- struct scatterlist *sg;
- int i;
- int fragnob;
- unsigned long vaddr;
- struct page *page;
- int page_offset;
- kib_net_t *net = ni->ni_data;
-
- LASSERT (nob > 0);
- LASSERT (niov > 0);
- LASSERT (net != NULL);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- sg = tx->tx_frags;
- do {
- LASSERT (niov > 0);
-
- vaddr = ((unsigned long)iov->iov_base) + offset;
- page_offset = vaddr & (PAGE_SIZE - 1);
- page = kiblnd_kvaddr_to_page(vaddr);
- if (page == NULL) {
- CERROR ("Can't find page\n");
- return -EFAULT;
- }
-
- fragnob = min((int)(iov->iov_len - offset), nob);
- fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
-
- sg->page = page;
- sg->offset = page_offset;
- sg->length = fragnob;
- sg++;
-
- if (offset + fragnob < iov->iov_len) {
- offset += fragnob;
- } else {
- offset = 0;
- iov++;
- niov--;
- }
- nob -= fragnob;
- } while (nob > 0);
-
- /* If rd is not tx_rd, it's going to get sent to a peer and I'm the
- * RDMA sink */
- tx->tx_nfrags = sg - tx->tx_frags;
- tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-
- rd->rd_nfrags = kiblnd_dma_map_sg(net->ibn_dev->ibd_cmid->device,
- tx->tx_frags, tx->tx_nfrags,
- tx->tx_dmadir);
- rd->rd_key = (rd != tx->tx_rd) ?
- net->ibn_dev->ibd_mr->rkey : net->ibn_dev->ibd_mr->lkey;
-
- for (i = 0; i < rd->rd_nfrags; i++) {
- rd->rd_frags[i].rf_nob = kiblnd_sg_dma_len(
- net->ibn_dev->ibd_cmid->device, &tx->tx_frags[i]);
- rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address(
- net->ibn_dev->ibd_cmid->device, &tx->tx_frags[i]);
- }
-
- return 0;
-}
-
-int
-kiblnd_setup_rd_kiov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
-{
- struct scatterlist *sg;
- int i;
- int fragnob;
- kib_net_t *net = ni->ni_data;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT (nob > 0);
- LASSERT (nkiov > 0);
- LASSERT (net != NULL);
-
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- nkiov--;
- kiov++;
- LASSERT (nkiov > 0);
- }
-
- sg = tx->tx_frags;
- do {
- LASSERT (nkiov > 0);
-
- fragnob = min((int)(kiov->kiov_len - offset), nob);
-
- memset(sg, 0, sizeof(*sg));
- sg->page = kiov->kiov_page;
- sg->offset = kiov->kiov_offset + offset;
- sg->length = fragnob;
- sg++;
-
- offset = 0;
- kiov++;
- nkiov--;
- nob -= fragnob;
- } while (nob > 0);
-
- /* If rd is not tx_rd, it's going to get sent to a peer and I'm the
- * RDMA sink */
- tx->tx_nfrags = sg - tx->tx_frags;
- tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-
- rd->rd_nfrags = kiblnd_dma_map_sg(net->ibn_dev->ibd_cmid->device,
- tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
- rd->rd_key = (rd != tx->tx_rd) ?
- net->ibn_dev->ibd_mr->rkey : net->ibn_dev->ibd_mr->lkey;
-
- for (i = 0; i < tx->tx_nfrags; i++) {
- rd->rd_frags[i].rf_nob = kiblnd_sg_dma_len(
- net->ibn_dev->ibd_cmid->device, &tx->tx_frags[i]);
- rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address(
- net->ibn_dev->ibd_cmid->device, &tx->tx_frags[i]);
-#if 0
- CDEBUG(D_WARNING,"frag[%d]: "LPX64" for %d\n",
- i, rd->rd_frags[i].rf_addr, rd->rd_frags[i].rf_nob);
-#endif
- }
-
- return 0;
-}
-#else
-int
-kiblnd_map_tx (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
- int npages, unsigned long page_offset, int nob)
-{
- struct ib_pool_fmr *fmr;
- kib_net_t *net = ni->ni_data;
-
- LASSERT (net != NULL);
- LASSERT (tx->tx_fmr == NULL);
- LASSERT (page_offset < PAGE_SIZE);
- LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT)));
- LASSERT (npages <= LNET_MAX_IOV);
-
- rd->rd_addr = 0;
-
- fmr = ib_fmr_pool_map_phys(net->ibn_fmrpool, tx->tx_pages,
- npages, rd->rd_addr);
- if (IS_ERR(fmr)) {
- CERROR ("Can't map %d pages: %ld\n", npages, PTR_ERR(fmr));
- return PTR_ERR(fmr);
- }
-
- /* If rd is not tx_rd, it's going to get sent to a peer, who will need
- * the rkey */
-
- rd->rd_key = (rd != tx->tx_rd) ? fmr->fmr->rkey : fmr->fmr->lkey;
- rd->rd_nob = nob;
-
- tx->tx_fmr = fmr;
- return 0;
-}
-
-int
-kiblnd_setup_rd_iov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
- unsigned int niov, struct iovec *iov, int offset, int nob)
-
-{
- int resid;
- int fragnob;
- struct page *page;
- int npages;
- unsigned long page_offset;
- unsigned long vaddr;
-
- LASSERT (nob > 0);
- LASSERT (niov > 0);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- if (nob > iov->iov_len - offset) {
- CERROR ("Can't map multiple vaddr fragments\n");
- return (-EMSGSIZE);
- }
-
- vaddr = ((unsigned long)iov->iov_base) + offset;
-
- page_offset = vaddr & (PAGE_SIZE - 1);
- resid = nob;
- npages = 0;
-
- do {
- LASSERT (npages < LNET_MAX_IOV);
-
- page = kiblnd_kvaddr_to_page(vaddr);
- if (page == NULL) {
- CERROR("Can't find page for %lu\n", vaddr);
- return -EFAULT;
- }
-
- tx->tx_pages[npages++] = lnet_page2phys(page);
-
- fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1));
- vaddr += fragnob;
- resid -= fragnob;
-
- } while (resid > 0);
-
- return kiblnd_map_tx(ni, tx, rd, npages, page_offset, nob);
-}
-
-int
-kiblnd_setup_rd_kiov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
-{
- int resid;
- int npages;
- unsigned long page_offset;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT (nob > 0);
- LASSERT (nkiov > 0);
- LASSERT (nkiov <= LNET_MAX_IOV);
-
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- nkiov--;
- kiov++;
- LASSERT (nkiov > 0);
- }
-
- page_offset = kiov->kiov_offset + offset;
-
- resid = offset + nob;
- npages = 0;
-
- do {
- LASSERT (npages < LNET_MAX_IOV);
- LASSERT (nkiov > 0);
-
- if ((npages > 0 && kiov->kiov_offset != 0) ||
- (resid > kiov->kiov_len &&
- (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) {
- /* Can't have gaps */
- CERROR ("Can't make payload contiguous in I/O VM:"
- "page %d, offset %d, len %d \n",
- npages, kiov->kiov_offset, kiov->kiov_len);
-
- return -EINVAL;
- }
-
- tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page);
- resid -= kiov->kiov_len;
- kiov++;
- nkiov--;
- } while (resid > 0);
-
- return kiblnd_map_tx(ni, tx, rd, npages, page_offset, nob);
-}
-#endif
-
-void
-kiblnd_check_sends (kib_conn_t *conn)
-{
- kib_tx_t *tx;
- lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
- int rc;
- int consume_cred = 0;
- struct ib_send_wr *bad_wrq;
- int done;
-
- /* Don't send anything until after the connection is established */
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- CDEBUG(D_NET, "%s too soon\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
- }
-
- spin_lock(&conn->ibc_lock);
-
- LASSERT (conn->ibc_nsends_posted <=
- *kiblnd_tunables.kib_concurrent_sends);
- LASSERT (conn->ibc_reserved_credits >= 0);
-
- while (conn->ibc_reserved_credits > 0 &&
- !list_empty(&conn->ibc_tx_queue_rsrvd)) {
- tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
- kib_tx_t, tx_list);
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
- conn->ibc_reserved_credits--;
- }
-
- if (kiblnd_send_noop(conn)) {
- spin_unlock(&conn->ibc_lock);
-
- tx = kiblnd_get_idle_tx(ni);
- if (tx != NULL)
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0);
-
- spin_lock(&conn->ibc_lock);
-
- if (tx != NULL)
- kiblnd_queue_tx_locked(tx, conn);
- }
-
- for (;;) {
- if (!list_empty(&conn->ibc_tx_queue_nocred)) {
- tx = list_entry(conn->ibc_tx_queue_nocred.next,
- kib_tx_t, tx_list);
- consume_cred = 0;
- } else if (!list_empty(&conn->ibc_tx_noops)) {
- tx = list_entry(conn->ibc_tx_noops.next,
- kib_tx_t, tx_list);
- consume_cred = 1;
- } else if (!list_empty(&conn->ibc_tx_queue)) {
- tx = list_entry(conn->ibc_tx_queue.next,
- kib_tx_t, tx_list);
- consume_cred = 1;
- } else {
- /* nothing to send right now */
- break;
- }
-
- LASSERT (tx->tx_queued);
- /* We rely on this for QP sizing */
- LASSERT (tx->tx_nwrq > 0 &&
- tx->tx_nwrq <= 1 + IBLND_MAX_RDMA_FRAGS);
-
- LASSERT (conn->ibc_outstanding_credits >= 0);
- LASSERT (conn->ibc_outstanding_credits <= IBLND_MSG_QUEUE_SIZE);
- LASSERT (conn->ibc_credits >= 0);
- LASSERT (conn->ibc_credits <= IBLND_MSG_QUEUE_SIZE);
-
- if (conn->ibc_nsends_posted ==
- *kiblnd_tunables.kib_concurrent_sends) {
- /* tx completions outstanding... */
- CDEBUG(D_NET, "%s: posted enough\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
- }
-
- if (consume_cred) {
- if (conn->ibc_credits == 0) { /* no credits */
- CDEBUG(D_NET, "%s: no credits\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break; /* NB ibc_tx_queue_nocred checked */
- }
-
- /* Last credit reserved for NOOP */
- if (conn->ibc_credits == 1 &&
- tx->tx_msg->ibm_type != IBLND_MSG_NOOP) {
- CDEBUG(D_NET, "%s: not using last credit\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break; /* NB ibc_tx_noops checked */
- }
- }
-
- list_del(&tx->tx_list);
- tx->tx_queued = 0;
-
- /* NB don't drop ibc_lock before bumping tx_sending */
-
- if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP &&
- !kiblnd_send_noop(conn)) {
- /* redundant NOOP */
- spin_unlock(&conn->ibc_lock);
- kiblnd_tx_done(ni, tx);
- spin_lock(&conn->ibc_lock);
- CDEBUG(D_NET, "%s: redundant noop\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- continue;
- }
-
- kiblnd_pack_msg(ni, tx->tx_msg, conn->ibc_outstanding_credits,
- conn->ibc_peer->ibp_nid, conn->ibc_incarnation);
-
- conn->ibc_outstanding_credits = 0;
- conn->ibc_nsends_posted++;
- if (consume_cred)
- conn->ibc_credits--;
-
- /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA
- * PUT. If so, it was first queued here as a PUT_REQ, sent and
- * stashed on ibc_active_txs, matched by an incoming PUT_ACK,
- * and then re-queued here. It's (just) possible that
- * tx_sending is non-zero if we've not done the tx_complete() from
- * the first send; hence the ++ rather than = below. */
- tx->tx_sending++;
-
- list_add (&tx->tx_list, &conn->ibc_active_txs);
-#if 0
- {
- int i;
-
- for (i = 0; i < tx->tx_nwrq - 1; i++) {
- LASSERT (tx->tx_wrq[i].opcode == IB_WR_RDMA_WRITE);
- LASSERT (tx->tx_wrq[i].next == &tx->tx_wrq[i+1]);
- LASSERT (tx->tx_wrq[i].sg_list == &tx->tx_sge[i]);
-
- CDEBUG(D_WARNING, "WORK[%d]: RDMA "LPX64
- " for %d k %x -> "LPX64" k %x\n", i,
- tx->tx_wrq[i].sg_list->addr,
- tx->tx_wrq[i].sg_list->length,
- tx->tx_wrq[i].sg_list->lkey,
- tx->tx_wrq[i].wr.rdma.remote_addr,
- tx->tx_wrq[i].wr.rdma.rkey);
- }
-
- LASSERT (tx->tx_wrq[i].opcode == IB_WR_SEND);
- LASSERT (tx->tx_wrq[i].next == NULL);
- LASSERT (tx->tx_wrq[i].sg_list == &tx->tx_sge[i]);
-
- CDEBUG(D_WARNING, "WORK[%d]: SEND "LPX64" for %d k %x\n", i,
- tx->tx_wrq[i].sg_list->addr,
- tx->tx_wrq[i].sg_list->length,
- tx->tx_wrq[i].sg_list->lkey);
- }
-#endif
- /* I'm still holding ibc_lock! */
- if (conn->ibc_state != IBLND_CONN_ESTABLISHED)
- rc = -ECONNABORTED;
- else
- rc = ib_post_send(conn->ibc_cmid->qp, tx->tx_wrq, &bad_wrq);
-
- conn->ibc_last_send = jiffies;
-
- if (rc != 0) {
- /* NB credits are transferred in the actual
- * message, which can only be the last work item */
- conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits;
- if (consume_cred)
- conn->ibc_credits++;
- conn->ibc_nsends_posted--;
-
- tx->tx_status = rc;
- tx->tx_waiting = 0;
- tx->tx_sending--;
-
- done = (tx->tx_sending == 0);
- if (done)
- list_del (&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
- CERROR("Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- else
- CDEBUG(D_NET, "Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- kiblnd_close_conn(conn, rc);
-
- if (done)
- kiblnd_tx_done(ni, tx);
- return;
- }
- }
-
- spin_unlock(&conn->ibc_lock);
-}
-
-void
-kiblnd_tx_complete (kib_tx_t *tx, int status)
-{
- int failed = (status != IB_WC_SUCCESS);
- kib_conn_t *conn = tx->tx_conn;
- int idle;
-
- LASSERT (tx->tx_sending > 0);
-
- if (failed) {
- if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
- CDEBUG(D_NETERROR, "Tx -> %s cookie "LPX64
- "sending %d waiting %d: failed %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- tx->tx_cookie, tx->tx_sending, tx->tx_waiting,
- status);
-
- kiblnd_close_conn(conn, -EIO);
- } else {
- kiblnd_peer_alive(conn->ibc_peer);
- }
-
- spin_lock(&conn->ibc_lock);
-
- /* I could be racing with rdma completion. Whoever makes 'tx' idle
- * gets to free it, which also drops its ref on 'conn'. */
-
- tx->tx_sending--;
- conn->ibc_nsends_posted--;
-
- if (failed) {
- tx->tx_waiting = 0; /* don't wait for peer */
- tx->tx_status = -EIO;
- }
-
- idle = (tx->tx_sending == 0) && /* This is the final callback */
- !tx->tx_waiting && /* Not waiting for peer */
- !tx->tx_queued; /* Not re-queued (PUT_DONE) */
- if (idle)
- list_del(&tx->tx_list);
-
- kiblnd_conn_addref(conn); /* 1 ref for me.... */
-
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
-
- kiblnd_check_sends(conn);
-
- kiblnd_conn_decref(conn); /* ...until here */
-}
-
-void
-kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
-{
- kib_net_t *net = ni->ni_data;
- struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
- struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
- int nob = offsetof (kib_msg_t, ibm_u) + body_nob;
-
- LASSERT (net != NULL);
- LASSERT (tx->tx_nwrq >= 0);
- LASSERT (tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
- LASSERT (nob <= IBLND_MSG_SIZE);
-
- kiblnd_init_msg(tx->tx_msg, type, body_nob);
-
- sge->addr = tx->tx_msgaddr;
- sge->lkey = net->ibn_dev->ibd_mr->lkey;
- sge->length = nob;
-
- memset(wrq, 0, sizeof(*wrq));
-
- wrq->next = NULL;
- wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
- wrq->sg_list = sge;
- wrq->num_sge = 1;
- wrq->opcode = IB_WR_SEND;
- wrq->send_flags = IB_SEND_SIGNALED;
-
- tx->tx_nwrq++;
-}
-
-int
-kiblnd_init_rdma (lnet_ni_t *ni, kib_tx_t *tx, int type,
- int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie)
-{
- kib_msg_t *ibmsg = tx->tx_msg;
- kib_rdma_desc_t *srcrd = tx->tx_rd;
- struct ib_sge *sge = &tx->tx_sge[0];
- struct ib_send_wr *wrq = &tx->tx_wrq[0];
- int rc = nob;
-
-#if IBLND_MAP_ON_DEMAND
- LASSERT (!in_interrupt());
- LASSERT (tx->tx_nwrq == 0);
- LASSERT (type == IBLND_MSG_GET_DONE ||
- type == IBLND_MSG_PUT_DONE);
-
- sge->addr = srcrd->rd_addr;
- sge->lkey = srcrd->rd_key;
- sge->length = nob;
-
- wrq = &tx->tx_wrq[0];
-
- wrq->next = &tx->tx_wrq[1];
- wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
- wrq->sg_list = sge;
- wrq->num_sge = 1;
- wrq->opcode = IB_WR_RDMA_WRITE;
- wrq->send_flags = 0;
-
- wrq->wr.rdma.remote_addr = dstrd->rd_addr;
- wrq->wr.rdma.rkey = dstrd->rd_key;
-
- tx->tx_nwrq = 1;
-#else
- /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */
- int resid = nob;
- kib_rdma_frag_t *srcfrag;
- int srcidx;
- kib_rdma_frag_t *dstfrag;
- int dstidx;
- int wrknob;
-
- LASSERT (!in_interrupt());
- LASSERT (tx->tx_nwrq == 0);
- LASSERT (type == IBLND_MSG_GET_DONE ||
- type == IBLND_MSG_PUT_DONE);
-
- srcidx = dstidx = 0;
- srcfrag = &srcrd->rd_frags[0];
- dstfrag = &dstrd->rd_frags[0];
-
- while (resid > 0) {
- if (srcidx >= srcrd->rd_nfrags) {
- CERROR("Src buffer exhausted: %d frags\n", srcidx);
- rc = -EPROTO;
- break;
- }
-
- if (dstidx == dstrd->rd_nfrags) {
- CERROR("Dst buffer exhausted: %d frags\n", dstidx);
- rc = -EPROTO;
- break;
- }
-
- if (tx->tx_nwrq == IBLND_MAX_RDMA_FRAGS) {
- CERROR("RDMA too fragmented: %d/%d src %d/%d dst frags\n",
- srcidx, srcrd->rd_nfrags,
- dstidx, dstrd->rd_nfrags);
- rc = -EMSGSIZE;
- break;
- }
-
- wrknob = MIN(MIN(srcfrag->rf_nob, dstfrag->rf_nob), resid);
-
- sge = &tx->tx_sge[tx->tx_nwrq];
- sge->addr = srcfrag->rf_addr;
- sge->length = wrknob;
- sge->lkey = srcrd->rd_key;
-
- wrq = &tx->tx_wrq[tx->tx_nwrq];
-
- wrq->next = wrq + 1;
- wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
- wrq->sg_list = sge;
- wrq->num_sge = 1;
- wrq->opcode = IB_WR_RDMA_WRITE;
- wrq->send_flags = 0;
-
- wrq->wr.rdma.remote_addr = dstfrag->rf_addr;
- wrq->wr.rdma.rkey = dstrd->rd_key;
-
- wrq++;
- sge++;
-
- resid -= wrknob;
- if (wrknob < srcfrag->rf_nob) {
- srcfrag->rf_nob -= wrknob;
- srcfrag->rf_addr += wrknob;
- } else {
- srcfrag++;
- srcidx++;
- }
-
- if (wrknob < dstfrag->rf_nob) {
- dstfrag->rf_nob -= wrknob;
- dstfrag->rf_addr += wrknob;
- } else {
- dstfrag++;
- dstidx++;
- }
-
- tx->tx_nwrq++;
- }
-
- if (rc < 0) /* no RDMA if completing with failure */
- tx->tx_nwrq = 0;
-#endif
- ibmsg->ibm_u.completion.ibcm_status = rc;
- ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
- kiblnd_init_tx_msg(ni, tx, type, sizeof (kib_completion_msg_t));
-
- return rc;
-}
-
-void
-kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
-{
- struct list_head *q;
-
- LASSERT (tx->tx_nwrq > 0); /* work items set up */
- LASSERT (!tx->tx_queued); /* not queued for sending already */
-
- tx->tx_queued = 1;
- tx->tx_deadline = jiffies + (*kiblnd_tunables.kib_timeout * HZ);
-
- if (tx->tx_conn == NULL) {
- kiblnd_conn_addref(conn);
- tx->tx_conn = conn;
- LASSERT (tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE);
- } else {
- /* PUT_DONE first attached to conn as a PUT_REQ */
- LASSERT (tx->tx_conn == conn);
- LASSERT (tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE);
- }
-
- switch (tx->tx_msg->ibm_type) {
- default:
- LBUG();
-
- case IBLND_MSG_PUT_REQ:
- case IBLND_MSG_GET_REQ:
- q = &conn->ibc_tx_queue_rsrvd;
- break;
-
- case IBLND_MSG_PUT_NAK:
- case IBLND_MSG_PUT_ACK:
- case IBLND_MSG_PUT_DONE:
- case IBLND_MSG_GET_DONE:
- q = &conn->ibc_tx_queue_nocred;
- break;
-
- case IBLND_MSG_NOOP:
- q = &conn->ibc_tx_noops;
- break;
-
- case IBLND_MSG_IMMEDIATE:
- q = &conn->ibc_tx_queue;
- break;
- }
-
- list_add_tail(&tx->tx_list, q);
-}
-
-void
-kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn)
-{
- spin_lock(&conn->ibc_lock);
- kiblnd_queue_tx_locked(tx, conn);
- spin_unlock(&conn->ibc_lock);
-
- kiblnd_check_sends(conn);
-}
-
-void
-kiblnd_connect_peer (kib_peer_t *peer)
-{
- struct rdma_cm_id *cmid;
- kib_net_t *net = peer->ibp_ni->ni_data;
- struct sockaddr_in srcaddr;
- struct sockaddr_in dstaddr;
- int rc;
-
- LASSERT (net != NULL);
- LASSERT (peer->ibp_connecting > 0);
-
- cmid = rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP);
- if (IS_ERR(cmid)) {
- CERROR("Can't create CMID for %s: %ld\n",
- libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid));
- rc = PTR_ERR(cmid);
- goto failed;
- }
-
- memset(&srcaddr, 0, sizeof(srcaddr));
- srcaddr.sin_family = AF_INET;
- srcaddr.sin_addr.s_addr = htonl(net->ibn_dev->ibd_ifip);
-
- memset(&dstaddr, 0, sizeof(dstaddr));
- dstaddr.sin_family = AF_INET;
- dstaddr.sin_port = htons(*kiblnd_tunables.kib_service);
- dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
-
- kiblnd_peer_addref(peer); /* cmid's ref */
-
- rc = rdma_resolve_addr(cmid,
- (struct sockaddr *)&srcaddr,
- (struct sockaddr *)&dstaddr,
- *kiblnd_tunables.kib_timeout * 1000);
- if (rc == 0)
- return;
-
- /* Can't initiate address resolution: */
- CERROR("Can't resolve addr for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
-
- kiblnd_peer_decref(peer); /* cmid's ref */
- rdma_destroy_id(cmid);
- failed:
- kiblnd_peer_connect_failed(peer, 1, rc);
-}
-
-void
-kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- kib_peer_t *peer2;
- kib_conn_t *conn;
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- unsigned long flags;
- int rc;
-
- /* If I get here, I've committed to send, so I complete the tx with
- * failure on any problems */
-
- LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */
- LASSERT (tx->tx_nwrq > 0); /* work items have been set up */
-
- /* First time, just use a read lock since I expect to find my peer
- * connected */
- read_lock_irqsave(g_lock, flags);
-
- peer = kiblnd_find_peer_locked(nid);
- if (peer != NULL && !list_empty(&peer->ibp_conns)) {
- /* Found a peer with an established connection */
- conn = kiblnd_get_conn_locked(peer);
- kiblnd_conn_addref(conn); /* 1 ref for me... */
-
- read_unlock_irqrestore(g_lock, flags);
-
- kiblnd_queue_tx(tx, conn);
- kiblnd_conn_decref(conn); /* ...to here */
- return;
- }
-
- read_unlock(g_lock);
- /* Re-try with a write lock */
- write_lock(g_lock);
-
- peer = kiblnd_find_peer_locked(nid);
- if (peer != NULL) {
- if (list_empty(&peer->ibp_conns)) {
- /* found a peer, but it's still connecting... */
- LASSERT (peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0);
- list_add_tail (&tx->tx_list, &peer->ibp_tx_queue);
- write_unlock_irqrestore(g_lock, flags);
- } else {
- conn = kiblnd_get_conn_locked(peer);
- kiblnd_conn_addref(conn); /* 1 ref for me... */
-
- write_unlock_irqrestore(g_lock, flags);
-
- kiblnd_queue_tx(tx, conn);
- kiblnd_conn_decref(conn); /* ...to here */
- }
- return;
- }
-
- write_unlock_irqrestore(g_lock, flags);
-
- /* Allocate a peer ready to add to the peer table and retry */
- rc = kiblnd_create_peer(ni, &peer, nid);
- if (rc != 0) {
- CERROR("Can't create peer %s\n", libcfs_nid2str(nid));
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kiblnd_tx_done(ni, tx);
- return;
- }
-
- write_lock_irqsave(g_lock, flags);
-
- peer2 = kiblnd_find_peer_locked(nid);
- if (peer2 != NULL) {
- if (list_empty(&peer2->ibp_conns)) {
- /* found a peer, but it's still connecting... */
- LASSERT (peer2->ibp_connecting != 0 ||
- peer2->ibp_accepting != 0);
- list_add_tail (&tx->tx_list, &peer2->ibp_tx_queue);
- write_unlock_irqrestore(g_lock, flags);
- } else {
- conn = kiblnd_get_conn_locked(peer2);
- kiblnd_conn_addref(conn); /* 1 ref for me... */
-
- write_unlock_irqrestore(g_lock, flags);
-
- kiblnd_queue_tx(tx, conn);
- kiblnd_conn_decref(conn); /* ...to here */
- }
-
- kiblnd_peer_decref(peer);
- return;
- }
-
- /* Brand new peer */
- LASSERT (peer->ibp_connecting == 0);
- peer->ibp_connecting = 1;
-
- /* always called with a ref on ni, which prevents ni being shutdown */
- LASSERT (((kib_net_t *)ni->ni_data)->ibn_shutdown == 0);
-
- list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
-
- kiblnd_peer_addref(peer);
- list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
-
- write_unlock_irqrestore(g_lock, flags);
-
- kiblnd_connect_peer(peer);
- kiblnd_peer_decref(peer);
-}
-
-int
-kiblnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- lnet_hdr_t *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- kib_msg_t *ibmsg;
- kib_tx_t *tx;
- int nob;
- int rc;
-
- /* NB 'private' is different depending on what we're sending.... */
-
- CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT (payload_nob == 0 || payload_niov > 0);
- LASSERT (payload_niov <= LNET_MAX_IOV);
-
- /* Thread context */
- LASSERT (!in_interrupt());
- /* payload is either all vaddrs or all pages */
- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
- switch (type) {
- default:
- LBUG();
- return (-EIO);
-
- case LNET_MSG_ACK:
- LASSERT (payload_nob == 0);
- break;
-
- case LNET_MSG_GET:
- if (routing || target_is_router)
- break; /* send IMMEDIATE */
-
- /* is the REPLY message too small for RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
- if (nob <= IBLND_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kiblnd_get_idle_tx(ni);
- if (tx == NULL) {
- CERROR("Can allocate txd for GET to %s: \n",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.get.ibgm_hdr = *hdr;
- ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
-
- if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
- rc = kiblnd_setup_rd_iov(ni, tx,
- &ibmsg->ibm_u.get.ibgm_rd,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.iov,
- 0, lntmsg->msg_md->md_length);
- else
- rc = kiblnd_setup_rd_kiov(ni, tx,
- &ibmsg->ibm_u.get.ibgm_rd,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.kiov,
- 0, lntmsg->msg_md->md_length);
- if (rc != 0) {
- CERROR("Can't setup GET sink for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kiblnd_tx_done(ni, tx);
- return -EIO;
- }
-#if IBLND_MAP_ON_DEMAND
- nob = sizeof(kib_get_msg_t);
-#else
- nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[tx->tx_nfrags]);
-#endif
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob);
-
- tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg);
- if (tx->tx_lntmsg[1] == NULL) {
- CERROR("Can't create reply for GET -> %s\n",
- libcfs_nid2str(target.nid));
- kiblnd_tx_done(ni, tx);
- return -EIO;
- }
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */
- tx->tx_waiting = 1; /* waiting for GET_DONE */
- kiblnd_launch_tx(ni, tx, target.nid);
- return 0;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- /* Is the payload small enough not to need RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob <= IBLND_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kiblnd_get_idle_tx(ni);
- if (tx == NULL) {
- CERROR("Can't allocate %s txd for %s\n",
- type == LNET_MSG_PUT ? "PUT" : "REPLY",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- if (payload_kiov == NULL)
- rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
- payload_niov, payload_iov,
- payload_offset, payload_nob);
- else
- rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- if (rc != 0) {
- CERROR("Can't setup PUT src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kiblnd_tx_done(ni, tx);
- return -EIO;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
- ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(kib_putreq_msg_t));
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */
- kiblnd_launch_tx(ni, tx, target.nid);
- return 0;
- }
-
- /* send IMMEDIATE */
-
- LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob])
- <= IBLND_MSG_SIZE);
-
- tx = kiblnd_get_idle_tx(ni);
- if (tx == NULL) {
- CERROR ("Can't send %d to %s: tx descs exhausted\n",
- type, libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
-
- if (payload_kiov != NULL)
- lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- else
- lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-
- nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]);
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- kiblnd_launch_tx(ni, tx, target.nid);
- return 0;
-}
-
-void
-kiblnd_reply (lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg)
-{
- lnet_process_id_t target = lntmsg->msg_target;
- unsigned int niov = lntmsg->msg_niov;
- struct iovec *iov = lntmsg->msg_iov;
- lnet_kiov_t *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- kib_tx_t *tx;
- int rc;
-
- tx = kiblnd_get_idle_tx(ni);
- if (tx == NULL) {
- CERROR("Can't get tx for REPLY to %s\n",
- libcfs_nid2str(target.nid));
- goto failed_0;
- }
-
- if (nob == 0)
- rc = 0;
- else if (kiov == NULL)
- rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
- niov, iov, offset, nob);
- else
- rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
- niov, kiov, offset, nob);
-
- if (rc != 0) {
- CERROR("Can't setup GET src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- rc = kiblnd_init_rdma(ni, tx, IBLND_MSG_GET_DONE, nob,
- &rx->rx_msg->ibm_u.get.ibgm_rd,
- rx->rx_msg->ibm_u.get.ibgm_cookie);
- if (rc < 0) {
- CERROR("Can't setup rdma for GET from %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- if (nob == 0) {
- /* No RDMA: local completion may happen now! */
- lnet_finalize(ni, lntmsg, 0);
- } else {
- /* RDMA: lnet_finalize(lntmsg) when it
- * completes */
- tx->tx_lntmsg[0] = lntmsg;
- }
-
- kiblnd_queue_tx(tx, rx->rx_conn);
- return;
-
- failed_1:
- kiblnd_tx_done(ni, tx);
- failed_0:
- lnet_finalize(ni, lntmsg, -EIO);
-}
-
-int
-kiblnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- kib_rx_t *rx = private;
- kib_msg_t *rxmsg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- kib_tx_t *tx;
- kib_msg_t *txmsg;
- int nob;
- int post_credit = IBLND_POSTRX_PEER_CREDIT;
- int rc = 0;
-
- LASSERT (mlen <= rlen);
- LASSERT (!in_interrupt());
- /* Either all pages or all vaddrs */
- LASSERT (!(kiov != NULL && iov != NULL));
-
- switch (rxmsg->ibm_type) {
- default:
- LBUG();
-
- case IBLND_MSG_IMMEDIATE:
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]);
- if (nob > rx->rx_nob) {
- CERROR ("Immediate message from %s too big: %d(%d)\n",
- libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
- nob, rx->rx_nob);
- rc = -EPROTO;
- break;
- }
-
- if (kiov != NULL)
- lnet_copy_flat2kiov(niov, kiov, offset,
- IBLND_MSG_SIZE, rxmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- mlen);
- else
- lnet_copy_flat2iov(niov, iov, offset,
- IBLND_MSG_SIZE, rxmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- mlen);
- lnet_finalize (ni, lntmsg, 0);
- break;
-
- case IBLND_MSG_PUT_REQ:
- if (mlen == 0) {
- lnet_finalize(ni, lntmsg, 0);
- kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- tx = kiblnd_get_idle_tx(ni);
- if (tx == NULL) {
- CERROR("Can't allocate tx for %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /* Not replying will break the connection */
- rc = -ENOMEM;
- break;
- }
-
- txmsg = tx->tx_msg;
- if (kiov == NULL)
- rc = kiblnd_setup_rd_iov(ni, tx,
- &txmsg->ibm_u.putack.ibpam_rd,
- niov, iov, offset, mlen);
- else
- rc = kiblnd_setup_rd_kiov(ni, tx,
- &txmsg->ibm_u.putack.ibpam_rd,
- niov, kiov, offset, mlen);
- if (rc != 0) {
- CERROR("Can't setup PUT sink for %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kiblnd_tx_done(ni, tx);
- /* tell peer it's over */
- kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
- txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
-#if IBLND_MAP_ON_DEMAND
- nob = sizeof(kib_putack_msg_t);
-#else
- nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[tx->tx_nfrags]);
-#endif
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_DONE */
- kiblnd_queue_tx(tx, conn);
-
- /* reposted buffer reserved for PUT_DONE */
- post_credit = IBLND_POSTRX_NO_CREDIT;
- break;
-
- case IBLND_MSG_GET_REQ:
- if (lntmsg != NULL) {
- /* Optimized GET; RDMA lntmsg's payload */
- kiblnd_reply(ni, rx, lntmsg);
- } else {
- /* GET didn't match anything */
- kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE,
- -ENODATA,
- rxmsg->ibm_u.get.ibgm_cookie);
- }
- break;
- }
-
- kiblnd_post_rx(rx, post_credit);
- return rc;
-}
-
-int
-kiblnd_thread_start (int (*fn)(void *arg), void *arg)
-{
- long pid = kernel_thread (fn, arg, 0);
-
- if (pid < 0)
- return ((int)pid);
-
- atomic_inc (&kiblnd_data.kib_nthreads);
- return (0);
-}
-
-void
-kiblnd_thread_fini (void)
-{
- atomic_dec (&kiblnd_data.kib_nthreads);
-}
-
-void
-kiblnd_peer_alive (kib_peer_t *peer)
-{
- /* This is racy, but everyone's only writing cfs_time_current() */
- peer->ibp_last_alive = cfs_time_current();
- mb();
-}
-
-void
-kiblnd_peer_notify (kib_peer_t *peer)
-{
- time_t last_alive = 0;
- int error = 0;
- unsigned long flags;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (list_empty(&peer->ibp_conns) &&
- peer->ibp_accepting == 0 &&
- peer->ibp_connecting == 0 &&
- peer->ibp_error != 0) {
- error = peer->ibp_error;
- peer->ibp_error = 0;
-
- last_alive = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() -
- peer->ibp_last_alive);
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- if (error != 0)
- lnet_notify(peer->ibp_ni,
- peer->ibp_nid, 0, last_alive);
-}
-
-void
-kiblnd_close_conn_locked (kib_conn_t *conn, int error)
-{
- /* This just does the immediate housekeeping. 'error' is zero for a
- * normal shutdown which can happen only after the connection has been
- * established. If the connection is established, schedule the
- * connection to be finished off by the connd. Otherwise the connd is
- * already dealing with it (either to set it up or tear it down).
- * Caller holds kib_global_lock exclusively in irq context */
- unsigned long flags;
- kib_peer_t *peer = conn->ibc_peer;
-
- LASSERT (error != 0 || conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- if (error != 0 && conn->ibc_comms_error == 0)
- conn->ibc_comms_error = error;
-
- if (conn->ibc_state != IBLND_CONN_ESTABLISHED)
- return; /* already being handled */
-
- if (error == 0 &&
- list_empty(&conn->ibc_tx_noops) &&
- list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_rsrvd) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- list_empty(&conn->ibc_active_txs)) {
- CDEBUG(D_NET, "closing conn to %s\n",
- libcfs_nid2str(peer->ibp_nid));
- } else {
- CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s%s\n",
- libcfs_nid2str(peer->ibp_nid), error,
- list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
- list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
- list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
- list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
- list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
- }
-
- list_del (&conn->ibc_list);
- /* connd (see below) takes over ibc_list's ref */
-
- if (list_empty (&peer->ibp_conns) && /* no more conns */
- kiblnd_peer_active(peer)) { /* still in peer table */
- kiblnd_unlink_peer_locked(peer);
-
- /* set/clear error on last conn */
- peer->ibp_error = conn->ibc_comms_error;
- }
-
- kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING);
-
- spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
-
- list_add_tail (&conn->ibc_list, &kiblnd_data.kib_connd_conns);
- wake_up (&kiblnd_data.kib_connd_waitq);
-
- spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
-}
-
-void
-kiblnd_close_conn (kib_conn_t *conn, int error)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_close_conn_locked(conn, error);
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-}
-
-void
-kiblnd_handle_early_rxs(kib_conn_t *conn)
-{
- unsigned long flags;
- kib_rx_t *rx;
-
- LASSERT (!in_interrupt());
- LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- while (!list_empty(&conn->ibc_early_rxs)) {
- rx = list_entry(conn->ibc_early_rxs.next,
- kib_rx_t, rx_list);
- list_del(&rx->rx_list);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_handle_rx(rx);
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- }
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-}
-
-void
-kiblnd_abort_txs(kib_conn_t *conn, struct list_head *txs)
-{
- LIST_HEAD (zombies);
- struct list_head *tmp;
- struct list_head *nxt;
- kib_tx_t *tx;
-
- spin_lock(&conn->ibc_lock);
-
- list_for_each_safe (tmp, nxt, txs) {
- tx = list_entry (tmp, kib_tx_t, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_waiting ||
- tx->tx_sending != 0);
- } else {
- LASSERT (tx->tx_queued);
- }
-
- tx->tx_status = -ECONNABORTED;
- tx->tx_queued = 0;
- tx->tx_waiting = 0;
-
- if (tx->tx_sending == 0) {
- list_del (&tx->tx_list);
- list_add (&tx->tx_list, &zombies);
- }
- }
-
- spin_unlock(&conn->ibc_lock);
-
- kiblnd_txlist_done(conn->ibc_peer->ibp_ni,
- &zombies, -ECONNABORTED);
-}
-
-void
-kiblnd_finalise_conn (kib_conn_t *conn)
-{
- LASSERT (!in_interrupt());
- LASSERT (conn->ibc_state > IBLND_CONN_INIT);
-
- kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED);
-
- /* abort_receives moves QP state to IB_QPS_ERR. This is only required
- * for connections that didn't get as far as being connected, because
- * rdma_disconnect() does this for free. */
- kiblnd_abort_receives(conn);
-
- /* Complete all tx descs not waiting for sends to complete.
- * NB we should be safe from RDMA now that the QP has changed state */
-
- kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
- kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
- kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
- kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
- kiblnd_abort_txs(conn, &conn->ibc_active_txs);
-
- kiblnd_handle_early_rxs(conn);
-}
-
-void
-kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error)
-{
- LIST_HEAD (zombies);
- unsigned long flags;
-
- LASSERT (error != 0);
- LASSERT (!in_interrupt());
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (active) {
- LASSERT (peer->ibp_connecting > 0);
- peer->ibp_connecting--;
- } else {
- LASSERT (peer->ibp_accepting > 0);
- peer->ibp_accepting--;
- }
-
- if (peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0) {
- /* another connection attempt under way... */
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- return;
- }
-
- if (list_empty(&peer->ibp_conns)) {
- /* Take peer's blocked transmits to complete with error */
- list_add(&zombies, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- if (kiblnd_peer_active(peer))
- kiblnd_unlink_peer_locked(peer);
-
- peer->ibp_error = error;
- } else {
- /* Can't have blocked transmits if there are connections */
- LASSERT (list_empty(&peer->ibp_tx_queue));
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_peer_notify(peer);
-
- if (list_empty (&zombies))
- return;
-
- CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH);
-}
-
-void
-kiblnd_connreq_done(kib_conn_t *conn, int status)
-{
- struct list_head txs;
-
- kib_peer_t *peer = conn->ibc_peer;
- int active;
- unsigned long flags;
- kib_tx_t *tx;
-
- active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
-
- CDEBUG(D_NET,"%s: %d, %d\n", libcfs_nid2str(peer->ibp_nid),
- active, status);
-
- LASSERT (!in_interrupt());
- LASSERT ((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT &&
- peer->ibp_connecting > 0) ||
- (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT &&
- peer->ibp_accepting > 0));
-
- LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
- conn->ibc_connvars = NULL;
-
- if (status != 0) {
- /* failed to establish connection */
- kiblnd_peer_connect_failed(peer, active, status);
- kiblnd_finalise_conn(conn);
- return;
- }
-
- /* connection established */
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- conn->ibc_last_send = jiffies;
- kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED);
- kiblnd_peer_alive(peer);
-
- /* Add conn to peer's list and nuke any dangling conns from a different
- * peer instance... */
- kiblnd_conn_addref(conn); /* +1 ref for ibc_list */
- list_add(&conn->ibc_list, &peer->ibp_conns);
- if (active)
- peer->ibp_connecting--;
- else
- peer->ibp_accepting--;
-
- kiblnd_close_stale_conns_locked(peer, conn->ibc_incarnation);
-
- /* grab pending txs while I have the lock */
- list_add(&txs, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- if (!kiblnd_peer_active(peer) || /* peer has been deleted */
- conn->ibc_comms_error != 0) { /* error has happened already */
- lnet_ni_t *ni = peer->ibp_ni;
-
- /* start to shut down connection */
- kiblnd_close_conn_locked(conn, -ECONNABORTED);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_txlist_done(ni, &txs, -ECONNABORTED);
-
- return;
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- /* Schedule blocked txs */
- spin_lock (&conn->ibc_lock);
- while (!list_empty (&txs)) {
- tx = list_entry (txs.next, kib_tx_t, tx_list);
- list_del (&tx->tx_list);
-
- kiblnd_queue_tx_locked(tx, conn);
- }
- spin_unlock (&conn->ibc_lock);
-
- kiblnd_check_sends(conn);
-
- /* schedule blocked rxs */
- kiblnd_handle_early_rxs(conn);
-}
-
-void
-kiblnd_reject(struct rdma_cm_id *cmid, int why)
-{
- int rc;
- kib_rej_t rej = {.ibr_magic = IBLND_MSG_MAGIC,
- .ibr_version = IBLND_MSG_VERSION,
- .ibr_why = why};
-
- rc = rdma_reject(cmid, &rej, sizeof(rej));
-
- if (rc != 0)
- CWARN("Error %d sending reject\n", rc);
-}
-
-int
-kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob)
-{
- kib_msg_t *ackmsg;
- kib_msg_t *reqmsg = priv;
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- struct rdma_conn_param cp;
- unsigned long flags;
- lnet_ni_t *ni = NULL;
- kib_dev_t *ibdev;
- kib_peer_t *peer;
- kib_peer_t *peer2;
- kib_conn_t *conn;
- lnet_nid_t nid;
- int rc;
- int rej = IBLND_REJECT_FATAL;
-
- LASSERT (!in_interrupt());
-
- /* cmid inherits 'context' from the corresponding listener id */
- ibdev = (kib_dev_t *)cmid->context;
- LASSERT (ibdev != NULL);
-
- if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
- CERROR("Short connection request\n");
- goto failed;
- }
-
- if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
- reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC) ||
- (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
- reqmsg->ibm_version != IBLND_MSG_VERSION) ||
- (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
- reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION))) {
- /* Future protocol version compatibility support! If the
- * o2iblnd-specific protocol changes, or when LNET unifies
- * protocols over all LNDs, the initial connection will
- * negotiate a protocol version. I trap this here to avoid
- * console errors; the reject tells the peer which protocol I
- * speak. */
- goto failed;
- }
-
- rc = kiblnd_unpack_msg(reqmsg, priv_nob);
- if (rc != 0) {
- CERROR("Can't parse connection request: %d\n", rc);
- goto failed;
- }
-
- nid = reqmsg->ibm_srcnid;
-
- if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
- CERROR("Unexpected connreq msg type: %x from %s\n",
- reqmsg->ibm_type, libcfs_nid2str(nid));
- goto failed;
- }
-
- if (reqmsg->ibm_u.connparams.ibcp_queue_depth != IBLND_MSG_QUEUE_SIZE) {
- CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n",
- libcfs_nid2str(nid),
- reqmsg->ibm_u.connparams.ibcp_queue_depth,
- IBLND_MSG_QUEUE_SIZE);
- goto failed;
- }
-
- if (reqmsg->ibm_u.connparams.ibcp_max_frags != IBLND_MAX_RDMA_FRAGS) {
- CERROR("Can't accept %s: incompatible max_frags %d (%d wanted)\n",
- libcfs_nid2str(nid),
- reqmsg->ibm_u.connparams.ibcp_max_frags,
- IBLND_MAX_RDMA_FRAGS);
- goto failed;
- }
-
- if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
- CERROR("Can't accept %s: message size %d too big (%d max)\n",
- libcfs_nid2str(nid),
- reqmsg->ibm_u.connparams.ibcp_max_msg_size,
- IBLND_MSG_SIZE);
- goto failed;
- }
-
- ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
- if (ni == NULL || /* no matching net */
- ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */
- ((kib_net_t*)ni->ni_data)->ibn_dev != ibdev) { /* wrong device */
- CERROR("Can't accept %s: bad dst nid %s\n",
- libcfs_nid2str(nid),
- libcfs_nid2str(reqmsg->ibm_dstnid));
-
- goto failed;
- }
-
- /* assume 'nid' is a new peer; create */
- rc = kiblnd_create_peer(ni, &peer, nid);
- if (rc != 0) {
- CERROR("Can't create peer for %s\n", libcfs_nid2str(nid));
- rej = IBLND_REJECT_NO_RESOURCES;
- goto failed;
- }
-
- write_lock_irqsave(g_lock, flags);
-
- peer2 = kiblnd_find_peer_locked(nid);
- if (peer2 != NULL) {
- /* tie-break connection race in favour of the higher NID */
- if (peer2->ibp_connecting != 0 &&
- nid < ni->ni_nid) {
- write_unlock_irqrestore(g_lock, flags);
-
- CWARN("Conn race %s\n",
- libcfs_nid2str(peer2->ibp_nid));
-
- kiblnd_peer_decref(peer);
- rej = IBLND_REJECT_CONN_RACE;
- goto failed;
- }
-
- peer2->ibp_accepting++;
- kiblnd_peer_addref(peer2);
-
- write_unlock_irqrestore(g_lock, flags);
- kiblnd_peer_decref(peer);
- peer = peer2;
- } else {
- /* Brand new peer */
- LASSERT (peer->ibp_accepting == 0);
- peer->ibp_accepting = 1;
-
- /* I have a ref on ni that prevents it being shutdown */
- LASSERT (((kib_net_t *)ni->ni_data)->ibn_shutdown == 0);
-
- kiblnd_peer_addref(peer);
- list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
-
- write_unlock_irqrestore(g_lock, flags);
- }
-
- conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT);
- if (conn == NULL) {
- kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
- kiblnd_peer_decref(peer);
- rej = IBLND_REJECT_NO_RESOURCES;
- goto failed;
- }
-
- /* conn now "owns" cmid, so I return success from here on to ensure the
- * CM callback doesn't destroy cmid. */
-
- conn->ibc_incarnation = reqmsg->ibm_srcstamp;
- conn->ibc_credits = IBLND_MSG_QUEUE_SIZE;
- conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE;
- LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
- <= IBLND_RX_MSGS);
-
- ackmsg = &conn->ibc_connvars->cv_msg;
- memset(ackmsg, 0, sizeof(*ackmsg));
-
- kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
- sizeof(ackmsg->ibm_u.connparams));
- ackmsg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE;
- ackmsg->ibm_u.connparams.ibcp_max_frags = IBLND_MAX_RDMA_FRAGS;
- ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
- kiblnd_pack_msg(ni, ackmsg, 0, nid, reqmsg->ibm_srcstamp);
-
- memset(&cp, 0, sizeof(cp));
- cp.private_data = ackmsg;
- cp.private_data_len = ackmsg->ibm_nob;
- cp.responder_resources = 0; /* No atomic ops or RDMA reads */
- cp.initiator_depth = 0;
- cp.flow_control = 1;
- cp.retry_count = *kiblnd_tunables.kib_retry_count;
- cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count;
-
- CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid));
-
- rc = rdma_accept(cmid, &cp);
- if (rc != 0) {
- CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc);
- kiblnd_reject(cmid, IBLND_REJECT_FATAL);
- kiblnd_connreq_done(conn, rc);
- kiblnd_conn_decref(conn);
- }
-
- lnet_ni_decref(ni);
- return 0;
-
- failed:
- if (ni != NULL)
- lnet_ni_decref(ni);
-
- kiblnd_reject(cmid, rej);
- return -ECONNREFUSED;
-}
-
-void
-kiblnd_reconnect (kib_conn_t *conn, char *why)
-{
- kib_peer_t *peer = conn->ibc_peer;
- int retry = 0;
- unsigned long flags;
-
- LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
- LASSERT (peer->ibp_connecting > 0); /* 'conn' at least */
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- /* retry connection if it's still needed and no other connection
- * attempts (active or passive) are in progress */
- if (!list_empty(&peer->ibp_tx_queue) &&
- peer->ibp_connecting == 1 &&
- peer->ibp_accepting == 0) {
- retry = 1;
- peer->ibp_connecting++;
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- if (retry) {
- CDEBUG(D_NETERROR, "%s: retrying (%s)\n",
- libcfs_nid2str(peer->ibp_nid), why);
- kiblnd_connect_peer(peer);
- }
-}
-
-void
-kiblnd_rejected (kib_conn_t *conn, int reason, void *priv, int priv_nob)
-{
- kib_peer_t *peer = conn->ibc_peer;
-
- LASSERT (!in_interrupt());
- LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
-
- switch (reason) {
- case IB_CM_REJ_STALE_CONN:
- kiblnd_reconnect(conn, "stale");
- break;
-
- case IB_CM_REJ_CONSUMER_DEFINED:
- if (priv_nob >= sizeof(kib_rej_t)) {
- kib_rej_t *rej = priv;
-
- if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) ||
- rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) {
- __swab32s(&rej->ibr_magic);
- __swab16s(&rej->ibr_version);
- }
-
- if (rej->ibr_magic != IBLND_MSG_MAGIC &&
- rej->ibr_magic != LNET_PROTO_MAGIC) {
- CERROR("%s rejected: consumer defined fatal error\n",
- libcfs_nid2str(peer->ibp_nid));
- break;
- }
-
- if (rej->ibr_version != IBLND_MSG_VERSION) {
- CERROR("%s rejected: o2iblnd version %d error\n",
- libcfs_nid2str(peer->ibp_nid),
- rej->ibr_version);
- break;
- }
-
- switch (rej->ibr_why) {
- case IBLND_REJECT_CONN_RACE:
- kiblnd_reconnect(conn, "conn race");
- break;
-
- case IBLND_REJECT_NO_RESOURCES:
- CERROR("%s rejected: o2iblnd no resources\n",
- libcfs_nid2str(peer->ibp_nid));
- break;
- case IBLND_REJECT_FATAL:
- CERROR("%s rejected: o2iblnd fatal error\n",
- libcfs_nid2str(peer->ibp_nid));
- break;
- default:
- CERROR("%s rejected: o2iblnd reason %d\n",
- libcfs_nid2str(peer->ibp_nid),
- rej->ibr_why);
- break;
- }
- break;
- }
- /* fall through */
- default:
- CDEBUG(D_NETERROR, "%s rejected: reason %d, size %d\n",
- libcfs_nid2str(peer->ibp_nid), reason, priv_nob);
- break;
- }
-
- kiblnd_connreq_done(conn, -ECONNREFUSED);
-}
-
-void
-kiblnd_check_connreply (kib_conn_t *conn, void *priv, int priv_nob)
-{
- kib_peer_t *peer = conn->ibc_peer;
- lnet_ni_t *ni = peer->ibp_ni;
- kib_net_t *net = ni->ni_data;
- kib_msg_t *msg = priv;
- int rc = kiblnd_unpack_msg(msg, priv_nob);
- unsigned long flags;
-
- LASSERT (net != NULL);
-
- if (rc != 0) {
- CERROR("Can't unpack connack from %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- goto failed;
- }
-
- if (msg->ibm_type != IBLND_MSG_CONNACK) {
- CERROR("Unexpected message %d from %s\n",
- msg->ibm_type, libcfs_nid2str(peer->ibp_nid));
- rc = -EPROTO;
- goto failed;
- }
-
- if (msg->ibm_u.connparams.ibcp_queue_depth != IBLND_MSG_QUEUE_SIZE) {
- CERROR("%s has incompatible queue depth %d(%d wanted)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_queue_depth,
- IBLND_MSG_QUEUE_SIZE);
- rc = -EPROTO;
- goto failed;
- }
-
- if (msg->ibm_u.connparams.ibcp_max_frags != IBLND_MAX_RDMA_FRAGS) {
- CERROR("%s has incompatible max_frags %d (%d wanted)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_max_frags,
- IBLND_MAX_RDMA_FRAGS);
- rc = -EPROTO;
- goto failed;
- }
-
- if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
- CERROR("%s max message size %d too big (%d max)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_max_msg_size,
- IBLND_MSG_SIZE);
- rc = -EPROTO;
- goto failed;
- }
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- if (msg->ibm_dstnid == ni->ni_nid &&
- msg->ibm_dststamp == net->ibn_incarnation)
- rc = 0;
- else
- rc = -ESTALE;
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- if (rc != 0) {
- CERROR("Stale connection reply from %s\n",
- libcfs_nid2str(peer->ibp_nid));
- goto failed;
- }
-
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_credits = IBLND_MSG_QUEUE_SIZE;
- conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE;
- LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
- <= IBLND_RX_MSGS);
-
- kiblnd_connreq_done(conn, 0);
- return;
-
- failed:
- /* NB My QP has already established itself, so I handle anything going
- * wrong here by setting ibc_comms_error.
- * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then
- * immediately tears it down. */
-
- LASSERT (rc != 0);
- conn->ibc_comms_error = rc;
- kiblnd_connreq_done(conn, 0);
-}
-
-int
-kiblnd_active_connect (struct rdma_cm_id *cmid)
-{
- kib_peer_t *peer = (kib_peer_t *)cmid->context;
- kib_conn_t *conn;
- kib_msg_t *msg;
- struct rdma_conn_param cp;
- int rc;
-
- conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT);
- if (conn == NULL) {
- kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
- kiblnd_peer_decref(peer); /* lose cmid's ref */
- return -ENOMEM;
- }
-
- /* conn "owns" cmid now, so I return success from here on to ensure the
- * CM callback doesn't destroy cmid. conn also takes over cmid's ref
- * on peer */
-
- msg = &conn->ibc_connvars->cv_msg;
-
- memset(msg, 0, sizeof(*msg));
- kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
- msg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE;
- msg->ibm_u.connparams.ibcp_max_frags = IBLND_MAX_RDMA_FRAGS;
- msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
- kiblnd_pack_msg(peer->ibp_ni, msg, 0, peer->ibp_nid, 0);
-
- memset(&cp, 0, sizeof(cp));
- cp.private_data = msg;
- cp.private_data_len = msg->ibm_nob;
- cp.responder_resources = 0; /* No atomic ops or RDMA reads */
- cp.initiator_depth = 0;
- cp.flow_control = 1;
- cp.retry_count = *kiblnd_tunables.kib_retry_count;
- cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count;
-
- LASSERT(cmid->context == (void *)conn);
- LASSERT(conn->ibc_cmid == cmid);
-
- rc = rdma_connect(cmid, &cp);
- if (rc != 0) {
- CERROR("Can't connect to %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- kiblnd_connreq_done(conn, rc);
- kiblnd_conn_decref(conn);
- }
-
- return 0;
-}
-
-int
-kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
-{
- kib_peer_t *peer;
- kib_conn_t *conn;
- int rc;
-
- switch (event->event) {
- default:
- LBUG();
-
- case RDMA_CM_EVENT_CONNECT_REQUEST:
- /* destroy cmid on failure */
- rc = kiblnd_passive_connect(cmid,
- (void *)KIBLND_CONN_PARAM(event),
- KIBLND_CONN_PARAM_LEN(event));
- CDEBUG(D_NET, "connreq: %d\n", rc);
- return rc;
-
- case RDMA_CM_EVENT_ADDR_ERROR:
- peer = (kib_peer_t *)cmid->context;
- CDEBUG(D_NETERROR, "%s: ADDR ERROR %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
- kiblnd_peer_decref(peer);
- return -EHOSTUNREACH; /* rc != 0 destroys cmid */
-
- case RDMA_CM_EVENT_ADDR_RESOLVED:
- peer = (kib_peer_t *)cmid->context;
-
- CDEBUG(D_NET,"%s Addr resolved: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
-
- if (event->status != 0) {
- CDEBUG(D_NETERROR, "Can't resolve address for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- rc = event->status;
- } else {
- rc = rdma_resolve_route(
- cmid, *kiblnd_tunables.kib_timeout * 1000);
- if (rc == 0)
- return 0;
- /* Can't initiate route resolution */
- CERROR("Can't resolve route for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- }
- kiblnd_peer_connect_failed(peer, 1, rc);
- kiblnd_peer_decref(peer);
- return rc; /* rc != 0 destroys cmid */
-
- case RDMA_CM_EVENT_ROUTE_ERROR:
- peer = (kib_peer_t *)cmid->context;
- CDEBUG(D_NETERROR, "%s: ROUTE ERROR %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
- kiblnd_peer_decref(peer);
- return -EHOSTUNREACH; /* rc != 0 destroys cmid */
-
- case RDMA_CM_EVENT_ROUTE_RESOLVED:
- peer = (kib_peer_t *)cmid->context;
- CDEBUG(D_NET,"%s Route resolved: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
-
- if (event->status == 0)
- return kiblnd_active_connect(cmid);
-
- CDEBUG(D_NETERROR, "Can't resolve route for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, event->status);
- kiblnd_peer_decref(peer);
- return event->status; /* rc != 0 destroys cmid */
-
- case RDMA_CM_EVENT_UNREACHABLE:
- conn = (kib_conn_t *)cmid->context;
- LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
- conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
- CDEBUG(D_NETERROR, "%s: UNREACHABLE %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
- kiblnd_connreq_done(conn, -ENETDOWN);
- kiblnd_conn_decref(conn);
- return 0;
-
- case RDMA_CM_EVENT_CONNECT_ERROR:
- conn = (kib_conn_t *)cmid->context;
- LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
- conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
- CDEBUG(D_NETERROR, "%s: CONNECT ERROR %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
- kiblnd_connreq_done(conn, -ENOTCONN);
- kiblnd_conn_decref(conn);
- return 0;
-
- case RDMA_CM_EVENT_REJECTED:
- conn = (kib_conn_t *)cmid->context;
- switch (conn->ibc_state) {
- default:
- LBUG();
-
- case IBLND_CONN_PASSIVE_WAIT:
- CERROR ("%s: REJECTED %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- event->status);
- kiblnd_connreq_done(conn, -ECONNRESET);
- break;
-
- case IBLND_CONN_ACTIVE_CONNECT:
- kiblnd_rejected(conn, event->status,
- (void *)KIBLND_CONN_PARAM(event),
- KIBLND_CONN_PARAM_LEN(event));
- break;
- }
- kiblnd_conn_decref(conn);
- return 0;
-
- case RDMA_CM_EVENT_ESTABLISHED:
- conn = (kib_conn_t *)cmid->context;
- switch (conn->ibc_state) {
- default:
- LBUG();
-
- case IBLND_CONN_PASSIVE_WAIT:
- CDEBUG(D_NET, "ESTABLISHED (passive): %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_connreq_done(conn, 0);
- break;
-
- case IBLND_CONN_ACTIVE_CONNECT:
- CDEBUG(D_NET, "ESTABLISHED(active): %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_check_connreply(conn,
- (void *)KIBLND_CONN_PARAM(event),
- KIBLND_CONN_PARAM_LEN(event));
- break;
- }
- /* net keeps its ref on conn! */
- return 0;
-
- case RDMA_CM_EVENT_DISCONNECTED:
- conn = (kib_conn_t *)cmid->context;
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- CERROR("%s DISCONNECTED\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_connreq_done(conn, -ECONNRESET);
- } else {
- kiblnd_close_conn(conn, 0);
- }
- kiblnd_conn_decref(conn);
- return 0;
-
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- LCONSOLE_ERROR_MSG(0x131,
- "Received notification of device removal\n"
- "Please shutdown LNET to allow this to proceed\n");
- /* Can't remove network from underneath LNET for now, so I have
- * to ignore this */
- return 0;
- }
-}
-
-int
-kiblnd_check_txs (kib_conn_t *conn, struct list_head *txs)
-{
- kib_tx_t *tx;
- struct list_head *ttmp;
- int timed_out = 0;
-
- spin_lock(&conn->ibc_lock);
-
- list_for_each (ttmp, txs) {
- tx = list_entry (ttmp, kib_tx_t, tx_list);
-
- if (txs != &conn->ibc_active_txs) {
- LASSERT (tx->tx_queued);
- } else {
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_waiting || tx->tx_sending != 0);
- }
-
- if (time_after_eq (jiffies, tx->tx_deadline)) {
- timed_out = 1;
- break;
- }
- }
-
- spin_unlock(&conn->ibc_lock);
- return timed_out;
-}
-
-int
-kiblnd_conn_timed_out (kib_conn_t *conn)
-{
- return kiblnd_check_txs(conn, &conn->ibc_tx_queue) ||
- kiblnd_check_txs(conn, &conn->ibc_tx_noops) ||
- kiblnd_check_txs(conn, &conn->ibc_tx_queue_rsrvd) ||
- kiblnd_check_txs(conn, &conn->ibc_tx_queue_nocred) ||
- kiblnd_check_txs(conn, &conn->ibc_active_txs);
-}
-
-void
-kiblnd_check_conns (int idx)
-{
- struct list_head *peers = &kiblnd_data.kib_peers[idx];
- struct list_head *ptmp;
- kib_peer_t *peer;
- kib_conn_t *conn;
- struct list_head *ctmp;
- unsigned long flags;
-
- again:
- /* NB. We expect to have a look at all the peers and not find any
- * rdmas to time out, so we just use a shared lock while we
- * take a look... */
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- list_for_each (ptmp, peers) {
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
-
- list_for_each (ctmp, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- LASSERT (conn->ibc_state == IBLND_CONN_ESTABLISHED);
-
- /* In case we have enough credits to return via a
- * NOOP, but there were no non-blocking tx descs
- * free to do it last time... */
- kiblnd_check_sends(conn);
-
- if (!kiblnd_conn_timed_out(conn))
- continue;
-
- /* Handle timeout by closing the whole connection. We
- * can only be sure RDMA activity has ceased once the
- * QP has been modified. */
-
- kiblnd_conn_addref(conn); /* 1 ref for me... */
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
- flags);
-
- CERROR("Timed out RDMA with %s\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kiblnd_close_conn(conn, -ETIMEDOUT);
- kiblnd_conn_decref(conn); /* ...until here */
-
- /* start again now I've dropped the lock */
- goto again;
- }
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-}
-
-void
-kiblnd_disconnect_conn (kib_conn_t *conn)
-{
- LASSERT (!in_interrupt());
- LASSERT (current == kiblnd_data.kib_connd);
- LASSERT (conn->ibc_state == IBLND_CONN_CLOSING);
-
- rdma_disconnect(conn->ibc_cmid);
- kiblnd_finalise_conn(conn);
-
- kiblnd_peer_notify(conn->ibc_peer);
-}
-
-int
-kiblnd_connd (void *arg)
-{
- wait_queue_t wait;
- unsigned long flags;
- kib_conn_t *conn;
- int timeout;
- int i;
- int dropped_lock;
- int peer_index = 0;
- unsigned long deadline = jiffies;
-
- cfs_daemonize ("kiblnd_connd");
- cfs_block_allsigs ();
-
- init_waitqueue_entry (&wait, current);
- kiblnd_data.kib_connd = current;
-
- spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
-
- while (!kiblnd_data.kib_shutdown) {
-
- dropped_lock = 0;
-
- if (!list_empty (&kiblnd_data.kib_connd_zombies)) {
- conn = list_entry (kiblnd_data.kib_connd_zombies.next,
- kib_conn_t, ibc_list);
- list_del (&conn->ibc_list);
-
- spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags);
- dropped_lock = 1;
-
- kiblnd_destroy_conn(conn);
-
- spin_lock_irqsave (&kiblnd_data.kib_connd_lock, flags);
- }
-
- if (!list_empty (&kiblnd_data.kib_connd_conns)) {
- conn = list_entry (kiblnd_data.kib_connd_conns.next,
- kib_conn_t, ibc_list);
- list_del (&conn->ibc_list);
-
- spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags);
- dropped_lock = 1;
-
- kiblnd_disconnect_conn(conn);
- kiblnd_conn_decref(conn);
-
- spin_lock_irqsave (&kiblnd_data.kib_connd_lock, flags);
- }
-
- /* careful with the jiffy wrap... */
- timeout = (int)(deadline - jiffies);
- if (timeout <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = kiblnd_data.kib_peer_hash_size;
-
- spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
- dropped_lock = 1;
-
- /* Time to check for RDMA timeouts on a few more
- * peers: I do checks every 'p' seconds on a
- * proportion of the peer table and I need to check
- * every connection 'n' times within a timeout
- * interval, to ensure I detect a timeout on any
- * connection within (n+1)/n times the timeout
- * interval. */
-
- if (*kiblnd_tunables.kib_timeout > n * p)
- chunk = (chunk * n * p) /
- *kiblnd_tunables.kib_timeout;
- if (chunk == 0)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kiblnd_check_conns(peer_index);
- peer_index = (peer_index + 1) %
- kiblnd_data.kib_peer_hash_size;
- }
-
- deadline += p * HZ;
- spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
- }
-
- if (dropped_lock)
- continue;
-
- /* Nothing to do for 'timeout' */
- set_current_state (TASK_INTERRUPTIBLE);
- add_wait_queue (&kiblnd_data.kib_connd_waitq, &wait);
- spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags);
-
- schedule_timeout (timeout);
-
- set_current_state (TASK_RUNNING);
- remove_wait_queue (&kiblnd_data.kib_connd_waitq, &wait);
- spin_lock_irqsave (&kiblnd_data.kib_connd_lock, flags);
- }
-
- spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags);
-
- kiblnd_thread_fini();
- return (0);
-}
-
-void
-kiblnd_qp_event(struct ib_event *event, void *arg)
-{
- kib_conn_t *conn = arg;
-
- switch (event->event) {
- case IB_EVENT_COMM_EST:
- CDEBUG(D_NET, "%s established\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
-
- default:
- CERROR("%s: Async QP event type %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
- return;
- }
-}
-
-void
-kiblnd_complete (struct ib_wc *wc)
-{
- switch (kiblnd_wreqid2type(wc->wr_id)) {
- default:
- LBUG();
-
- case IBLND_WID_RDMA:
- /* We only get RDMA completion notification if it fails. All
- * subsequent work items, including the final SEND will fail
- * too. However we can't print out any more info about the
- * failing RDMA because 'tx' might be back on the idle list or
- * even reused already if we didn't manage to post all our work
- * items */
- CDEBUG(D_NETERROR, "RDMA (tx: %p) failed: %d\n",
- kiblnd_wreqid2ptr(wc->wr_id), wc->status);
- return;
-
- case IBLND_WID_TX:
- kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status);
- return;
-
- case IBLND_WID_RX:
- kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status,
- wc->byte_len);
- return;
- }
-}
-
-void
-kiblnd_cq_completion (struct ib_cq *cq, void *arg)
-{
- /* NB I'm not allowed to schedule this conn once its refcount has
- * reached 0. Since fundamentally I'm racing with scheduler threads
- * consuming my CQ I could be called after all completions have
- * occurred. But in this case, ibc_nrx == 0 && ibc_nsends_posted == 0
- * and this CQ is about to be destroyed so I NOOP. */
- kib_conn_t *conn = (kib_conn_t *)arg;
- unsigned long flags;
-
- LASSERT (cq == conn->ibc_cq);
-
- spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags);
-
- conn->ibc_ready = 1;
-
- if (!conn->ibc_scheduled &&
- (conn->ibc_nrx > 0 ||
- conn->ibc_nsends_posted > 0)) {
- kiblnd_conn_addref(conn); /* +1 ref for sched_conns */
- conn->ibc_scheduled = 1;
- list_add_tail(&conn->ibc_sched_list,
- &kiblnd_data.kib_sched_conns);
- wake_up(&kiblnd_data.kib_sched_waitq);
- }
-
- spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags);
-}
-
-void
-kiblnd_cq_event(struct ib_event *event, void *arg)
-{
- kib_conn_t *conn = arg;
-
- CERROR("%s: async CQ event type %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
-}
-
-int
-kiblnd_scheduler(void *arg)
-{
- long id = (long)arg;
- wait_queue_t wait;
- char name[16];
- unsigned long flags;
- kib_conn_t *conn;
- struct ib_wc wc;
- int rc;
- int did_something;
- int busy_loops = 0;
-
- snprintf(name, sizeof(name), "kiblnd_sd_%02ld", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- init_waitqueue_entry(&wait, current);
-
- spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags);
-
- while (!kiblnd_data.kib_shutdown) {
- if (busy_loops++ >= IBLND_RESCHED) {
- spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock,
- flags);
-
- our_cond_resched();
- busy_loops = 0;
-
- spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags);
- }
-
- did_something = 0;
-
- if (!list_empty(&kiblnd_data.kib_sched_conns)) {
- conn = list_entry(kiblnd_data.kib_sched_conns.next,
- kib_conn_t, ibc_sched_list);
- /* take over kib_sched_conns' ref on conn... */
- LASSERT(conn->ibc_scheduled);
- list_del(&conn->ibc_sched_list);
- conn->ibc_ready = 0;
-
- spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock,
- flags);
-
- rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
- if (rc == 0) {
- rc = ib_req_notify_cq(conn->ibc_cq,
- IB_CQ_NEXT_COMP);
- LASSERT (rc >= 0);
-
- rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
- }
-
- LASSERT (rc >= 0);
-
- spin_lock_irqsave(&kiblnd_data.kib_sched_lock,
- flags);
-
- if (rc != 0 || conn->ibc_ready) {
- /* There may be another completion waiting; get
- * another scheduler to check while I handle
- * this one... */
- kiblnd_conn_addref(conn); /* +1 ref for sched_conns */
- list_add_tail(&conn->ibc_sched_list,
- &kiblnd_data.kib_sched_conns);
- wake_up(&kiblnd_data.kib_sched_waitq);
- } else {
- conn->ibc_scheduled = 0;
- }
-
- if (rc != 0) {
- spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock,
- flags);
-
- kiblnd_complete(&wc);
-
- spin_lock_irqsave(&kiblnd_data.kib_sched_lock,
- flags);
- }
-
- kiblnd_conn_decref(conn); /* ...drop my ref from above */
- did_something = 1;
- }
-
- if (did_something)
- continue;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&kiblnd_data.kib_sched_waitq, &wait);
- spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags);
-
- schedule();
- busy_loops = 0;
-
- remove_wait_queue(&kiblnd_data.kib_sched_waitq, &wait);
- set_current_state(TASK_RUNNING);
- spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags);
- }
-
- spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags);
-
- kiblnd_thread_fini();
- return (0);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2006 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "o2iblnd.h"
-
-static int service = 987;
-CFS_MODULE_PARM(service, "i", int, 0444,
- "service number (within RDMA_PS_TCP)");
-
-static int cksum = 0;
-CFS_MODULE_PARM(cksum, "i", int, 0644,
- "set non-zero to enable message (not RDMA) checksums");
-
-static int timeout = 50;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
- "timeout (seconds)");
-
-static int ntx = 256;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
- "# of message descriptors");
-
-static int credits = 64;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "# concurrent sends");
-
-static int peer_credits = 8;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
- "# concurrent sends to 1 peer");
-
-static char *ipif_name = "ib0";
-CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
- "IPoIB interface name");
-
-static int retry_count = 5;
-CFS_MODULE_PARM(retry_count, "i", int, 0644,
- "Retransmissions when no ACK received");
-
-static int rnr_retry_count = 6;
-CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
- "RNR retransmissions");
-
-static int keepalive = 100;
-CFS_MODULE_PARM(keepalive, "i", int, 0644,
- "Idle time in seconds before sending a keepalive");
-
-static int ib_mtu = 0;
-CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
- "IB MTU 256/512/1024/2048/4096");
-
-#if IBLND_MAP_ON_DEMAND
-static int concurrent_sends = IBLND_RX_MSGS;
-#else
-static int concurrent_sends = IBLND_MSG_QUEUE_SIZE;
-#endif
-CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
- "send work-queue sizing");
-
-#if IBLND_MAP_ON_DEMAND
-static int fmr_pool_size = 512;
-CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
- "size of the fmr pool (>= ntx)");
-
-static int fmr_flush_trigger = 384;
-CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
- "# dirty FMRs that triggers pool flush");
-
-static int fmr_cache = 1;
-CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
- "non-zero to enable FMR caching");
-#endif
-
-kib_tunables_t kiblnd_tunables = {
- .kib_service = &service,
- .kib_cksum = &cksum,
- .kib_timeout = &timeout,
- .kib_keepalive = &keepalive,
- .kib_ntx = &ntx,
- .kib_credits = &credits,
- .kib_peercredits = &peer_credits,
- .kib_default_ipif = &ipif_name,
- .kib_retry_count = &retry_count,
- .kib_rnr_retry_count = &rnr_retry_count,
- .kib_concurrent_sends = &concurrent_sends,
- .kib_ib_mtu = &ib_mtu,
-#if IBLND_MAP_ON_DEMAND
- .kib_fmr_pool_size = &fmr_pool_size,
- .kib_fmr_flush_trigger = &fmr_flush_trigger,
- .kib_fmr_cache = &fmr_cache,
-#endif
-};
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
-static char ipif_basename_space[32];
-
-static cfs_sysctl_table_t kiblnd_ctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "service",
- .data = &service,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 2,
- .procname = "cksum",
- .data = &cksum,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 3,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 4,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 5,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 6,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 7,
- .procname = "ipif_name",
- .data = ipif_basename_space,
- .maxlen = sizeof(ipif_basename_space),
- .mode = 0444,
- .proc_handler = &proc_dostring
- },
- {
- .ctl_name = 8,
- .procname = "retry_count",
- .data = &retry_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 9,
- .procname = "rnr_retry_count",
- .data = &rnr_retry_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 10,
- .procname = "keepalive",
- .data = &keepalive,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 11,
- .procname = "concurrent_sends",
- .data = &concurrent_sends,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 12,
- .procname = "ib_mtu",
- .data = &ib_mtu,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
-#if IBLND_MAP_ON_DEMAND
- {
- .ctl_name = 13,
- .procname = "fmr_pool_size",
- .data = &fmr_pool_size,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 14,
- .procname = "fmr_flush_trigger",
- .data = &fmr_flush_trigger,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 15,
- .procname = "fmr_cache",
- .data = &fmr_cache,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
-#endif
- {0}
-};
-
-static cfs_sysctl_table_t kiblnd_top_ctl_table[] = {
- {
- .ctl_name = 203,
- .procname = "o2iblnd",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = kiblnd_ctl_table
- },
- {0}
-};
-
-void
-kiblnd_initstrtunable(char *space, char *str, int size)
-{
- strncpy(space, str, size);
- space[size-1] = 0;
-}
-
-void
-kiblnd_sysctl_init (void)
-{
- kiblnd_initstrtunable(ipif_basename_space, ipif_name,
- sizeof(ipif_basename_space));
-
- kiblnd_tunables.kib_sysctl =
- cfs_register_sysctl_table(kiblnd_top_ctl_table, 0);
-
- if (kiblnd_tunables.kib_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-}
-
-void
-kiblnd_sysctl_fini (void)
-{
- if (kiblnd_tunables.kib_sysctl != NULL)
- cfs_unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
-}
-
-#else
-
-void
-kiblnd_sysctl_init (void)
-{
-}
-
-void
-kiblnd_sysctl_fini (void)
-{
-}
-
-#endif
-
-int
-kiblnd_tunables_init (void)
-{
- kiblnd_sysctl_init();
-
- if (*kiblnd_tunables.kib_concurrent_sends > IBLND_RX_MSGS)
- *kiblnd_tunables.kib_concurrent_sends = IBLND_RX_MSGS;
- if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE)
- *kiblnd_tunables.kib_concurrent_sends = IBLND_MSG_QUEUE_SIZE;
-
- return 0;
-}
-
-void
-kiblnd_tunables_fini (void)
-{
- kiblnd_sysctl_fini();
-}
-
-
-
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
+++ /dev/null
-MODULES := kopeniblnd
-kopeniblnd-objs := openiblnd.o openiblnd_cb.o openiblnd_modparams.o
-
-EXTRA_POST_CFLAGS := @OPENIBCPPFLAGS@
-
-@INCLUDE_RULES@
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if BUILD_OPENIBLND
-modulenet_DATA = kopeniblnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kopeniblnd-objs:%.o=%.c) openiblnd.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "openiblnd.h"
-
-lnd_t the_kiblnd = {
-#ifdef USING_TSAPI
- .lnd_type = CIBLND,
-#else
- .lnd_type = OPENIBLND,
-#endif
- .lnd_startup = kibnal_startup,
- .lnd_shutdown = kibnal_shutdown,
- .lnd_ctl = kibnal_ctl,
- .lnd_send = kibnal_send,
- .lnd_recv = kibnal_recv,
- .lnd_eager_recv = kibnal_eager_recv,
- .lnd_accept = kibnal_accept,
-};
-
-kib_data_t kibnal_data;
-
-__u32
-kibnal_cksum (void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return (sum == 0) ? 1 : sum;
-}
-
-void
-kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
-{
- msg->ibm_type = type;
- msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
-}
-
-void
-kibnal_pack_msg(kib_msg_t *msg, int version, int credits,
- lnet_nid_t dstnid, __u64 dststamp)
-{
- /* CAVEAT EMPTOR! all message fields not set here should have been
- * initialised previously. */
- msg->ibm_magic = IBNAL_MSG_MAGIC;
- msg->ibm_version = version;
- /* ibm_type */
- msg->ibm_credits = credits;
- /* ibm_nob */
- msg->ibm_cksum = 0;
- msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid,
- dstnid);
- msg->ibm_srcstamp = kibnal_data.kib_incarnation;
- msg->ibm_dstnid = dstnid;
- msg->ibm_dststamp = dststamp;
-
- if (*kibnal_tunables.kib_cksum) {
- /* NB ibm_cksum zero while computing cksum */
- msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
- }
-}
-
-int
-kibnal_unpack_msg(kib_msg_t *msg, int expected_version, int nob)
-{
- const int hdr_size = offsetof(kib_msg_t, ibm_u);
- __u32 msg_cksum;
- int msg_version;
- int flip;
- int msg_nob;
-
- if (nob < 6) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
- flip = 0;
- } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
- flip = 1;
- } else {
- CERROR("Bad magic: %08x\n", msg->ibm_magic);
- return -EPROTO;
- }
-
- msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
- if ((expected_version == 0) ?
- (msg_version != IBNAL_MSG_VERSION &&
- msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) :
- (msg_version != expected_version)) {
- CERROR("Bad version: %x\n", msg_version);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
- if (msg_nob > nob) {
- CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
- return -EPROTO;
- }
-
- /* checksum must be computed with ibm_cksum zero and BEFORE anything
- * gets flipped */
- msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
- msg->ibm_cksum = 0;
- if (msg_cksum != 0 &&
- msg_cksum != kibnal_cksum(msg, msg_nob)) {
- CERROR("Bad checksum\n");
- return -EPROTO;
- }
- msg->ibm_cksum = msg_cksum;
-
- if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
- msg->ibm_version = msg_version;
- LASSERT (sizeof(msg->ibm_type) == 1);
- LASSERT (sizeof(msg->ibm_credits) == 1);
- msg->ibm_nob = msg_nob;
- __swab64s(&msg->ibm_srcnid);
- __swab64s(&msg->ibm_srcstamp);
- __swab64s(&msg->ibm_dstnid);
- __swab64s(&msg->ibm_dststamp);
- }
-
- if (msg->ibm_srcnid == LNET_NID_ANY) {
- CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
- return -EPROTO;
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Unknown message type %x\n", msg->ibm_type);
- return -EPROTO;
-
- case IBNAL_MSG_SVCQRY:
- case IBNAL_MSG_NOOP:
- break;
-
- case IBNAL_MSG_SVCRSP:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.svcrsp)) {
- CERROR("Short SVCRSP: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.svcrsp)));
- return -EPROTO;
- }
- if (flip) {
- __swab64s(&msg->ibm_u.svcrsp.ibsr_svc_id);
- __swab16s(&msg->ibm_u.svcrsp.ibsr_svc_pkey);
- }
- break;
-
- case IBNAL_MSG_CONNREQ:
- case IBNAL_MSG_CONNACK:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
- CERROR("Short CONNREQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
- return -EPROTO;
- }
- if (flip)
- __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
- break;
-
- case IBNAL_MSG_IMMEDIATE:
- if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
- CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
- return -EPROTO;
- }
- break;
-
- case IBNAL_MSG_PUT_RDMA:
- case IBNAL_MSG_GET_RDMA:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.rdma)) {
- CERROR("Short RDMA req: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.rdma)));
- return -EPROTO;
- }
- if (flip) {
- __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_key);
- __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_nob);
- __swab64s(&msg->ibm_u.rdma.ibrm_desc.rd_addr);
- }
- break;
-
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
- CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.completion)));
- return -EPROTO;
- }
- if (flip)
- __swab32s(&msg->ibm_u.completion.ibcm_status);
- break;
- }
- return 0;
-}
-
-int
-kibnal_make_svcqry (kib_conn_t *conn)
-{
- kib_peer_t *peer = conn->ibc_peer;
- int version = IBNAL_MSG_VERSION;
- int msg_version;
- kib_msg_t *msg;
- struct socket *sock;
- int rc;
- int nob;
-
- LASSERT (conn->ibc_connreq != NULL);
- msg = &conn->ibc_connreq->cr_msg;
-
- again:
- kibnal_init_msg(msg, IBNAL_MSG_SVCQRY, 0);
- kibnal_pack_msg(msg, version, 0, peer->ibp_nid, 0);
-
- rc = lnet_connect(&sock, peer->ibp_nid,
- 0, peer->ibp_ip, peer->ibp_port);
- if (rc != 0)
- return -ECONNABORTED;
-
- rc = libcfs_sock_write(sock, msg, msg->ibm_nob,
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Error %d sending svcqry to %s at %u.%u.%u.%u/%d\n",
- rc, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- goto out;
- }
-
- /* The first 6 bytes are invariably MAGIC + proto version */
- rc = libcfs_sock_read(sock, msg, 6, *kibnal_tunables.kib_timeout);
- if (rc != 0) {
- CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n",
- rc, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- goto out;
- }
-
- if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
- msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
- CERROR("Bad magic: %08x from %s at %u.%u.%u.%u/%d\n",
- msg->ibm_magic, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- rc = -EPROTO;
- goto out;
- }
-
- msg_version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ?
- msg->ibm_version : __swab16(msg->ibm_version);
- if (msg_version != version) {
- if (version == IBNAL_MSG_VERSION) {
- /* retry with previous version */
- libcfs_sock_release(sock);
- version = IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD;
- goto again;
- }
-
- CERROR("Bad version %x from %s at %u.%u.%u.%u/%d\n",
- msg_version, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- rc = -EPROTO;
- goto out;
- }
-
- /* Read in the rest of the message now we know the expected format */
- nob = offsetof(kib_msg_t, ibm_u) + sizeof(kib_svcrsp_t);
- rc = libcfs_sock_read(sock, ((char *)msg) + 6, nob - 6,
- *kibnal_tunables.kib_timeout);
- if (rc != 0) {
- CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n",
- rc, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- goto out;
- }
-
- rc = kibnal_unpack_msg(msg, version, nob);
- if (rc != 0) {
- CERROR("Error %d unpacking svcrsp from %s at %u.%u.%u.%u/%d\n",
- rc, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- goto out;
- }
-
- if (msg->ibm_type != IBNAL_MSG_SVCRSP) {
- CERROR("Unexpected response type %d from %s at %u.%u.%u.%u/%d\n",
- msg->ibm_type, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- rc = -EPROTO;
- goto out;
- }
-
- if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
- msg->ibm_dstnid) ||
- msg->ibm_dststamp != kibnal_data.kib_incarnation) {
- CERROR("Unexpected dst NID/stamp %s/"LPX64" from "
- "%s at %u.%u.%u.%u/%d\n",
- libcfs_nid2str(msg->ibm_dstnid), msg->ibm_dststamp,
- libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip),
- peer->ibp_port);
- rc = -EPROTO;
- goto out;
- }
-
- if (!lnet_ptlcompat_matchnid(peer->ibp_nid, msg->ibm_srcnid)) {
- CERROR("Unexpected src NID %s from %s at %u.%u.%u.%u/%d\n",
- libcfs_nid2str(msg->ibm_srcnid),
- libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- rc = -EPROTO;
- goto out;
- }
-
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_connreq->cr_svcrsp = msg->ibm_u.svcrsp;
- conn->ibc_version = version;
-
- out:
- libcfs_sock_release(sock);
- return rc;
-}
-
-void
-kibnal_handle_svcqry (struct socket *sock)
-{
- __u32 peer_ip;
- unsigned int peer_port;
- kib_msg_t *msg;
- __u64 srcnid;
- __u64 srcstamp;
- int version;
- int reject = 0;
- int rc;
-
- rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- if (rc != 0) {
- CERROR("Can't get peer's IP: %d\n", rc);
- return;
- }
-
- LIBCFS_ALLOC(msg, sizeof(*msg));
- if (msg == NULL) {
- CERROR("Can't allocate msgs for %u.%u.%u.%u/%d\n",
- HIPQUAD(peer_ip), peer_port);
- return;
- }
-
- rc = libcfs_sock_read(sock, &msg->ibm_magic, sizeof(msg->ibm_magic),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Error %d receiving svcqry(1) from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
- msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
- /* Unexpected magic! */
- if (the_lnet.ln_ptlcompat == 0) {
- if (msg->ibm_magic == LNET_PROTO_MAGIC ||
- msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) {
- /* future protocol version compatibility!
- * When LNET unifies protocols over all LNDs,
- * the first thing sent will be a version
- * query. I send back a reply in my current
- * protocol to tell her I'm "old" */
- kibnal_init_msg(msg, 0, 0);
- kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0,
- LNET_NID_ANY, 0);
- reject = 1;
- goto reply;
- }
-
- CERROR ("Bad magic(1) %#08x (%#08x expected) from "
- "%u.%u.%u.%u/%d\n", msg->ibm_magic,
- IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- /* When portals compatibility is set, I may be passed a new
- * connection "blindly" by the acceptor, and I have to
- * determine if my peer has sent an acceptor connection request
- * or not. */
- rc = lnet_accept(kibnal_data.kib_ni, sock, msg->ibm_magic);
- if (rc != 0)
- goto out;
-
- /* It was an acceptor connection request!
- * Now I should see my magic... */
- rc = libcfs_sock_read(sock, &msg->ibm_magic,
- sizeof(msg->ibm_magic),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Error %d receiving svcqry(2) from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
- msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
- CERROR ("Bad magic(2) %#08x (%#08x expected) from "
- "%u.%u.%u.%u/%d\n", msg->ibm_magic,
- IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
- }
-
- /* Now check version */
-
- rc = libcfs_sock_read(sock, &msg->ibm_version, sizeof(msg->ibm_version),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Error %d receiving svcqry(3) from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ?
- msg->ibm_version : __swab16(msg->ibm_version);
- /* Peer is a different protocol version: reply in my current protocol
- * to tell her I'm "old" */
- if (version != IBNAL_MSG_VERSION &&
- version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- kibnal_init_msg(msg, 0, 0);
- kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0, LNET_NID_ANY, 0);
- reject = 1;
- goto reply;
- }
-
- /* Now read in all the rest */
- rc = libcfs_sock_read(sock, &msg->ibm_type,
- offsetof(kib_msg_t, ibm_u) -
- offsetof(kib_msg_t, ibm_type),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Error %d receiving svcqry(4) from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- rc = kibnal_unpack_msg(msg, version, offsetof(kib_msg_t, ibm_u));
- if (rc != 0) {
- CERROR("Error %d unpacking svcqry from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- if (msg->ibm_type != IBNAL_MSG_SVCQRY) {
- CERROR("Unexpected message %d from %u.%u.%u.%u/%d\n",
- msg->ibm_type, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
- msg->ibm_dstnid)) {
- CERROR("Unexpected dstnid %s: expected %s from %u.%u.%u.%u/%d\n",
- libcfs_nid2str(msg->ibm_dstnid),
- libcfs_nid2str(kibnal_data.kib_ni->ni_nid),
- HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- srcnid = msg->ibm_srcnid;
- srcstamp = msg->ibm_srcstamp;
-
- kibnal_init_msg(msg, IBNAL_MSG_SVCRSP, sizeof(msg->ibm_u.svcrsp));
-
- msg->ibm_u.svcrsp.ibsr_svc_id = kibnal_data.kib_svc_id;
- memcpy(msg->ibm_u.svcrsp.ibsr_svc_gid, kibnal_data.kib_svc_gid,
- sizeof(kibnal_data.kib_svc_gid));
- msg->ibm_u.svcrsp.ibsr_svc_pkey = kibnal_data.kib_svc_pkey;
-
- kibnal_pack_msg(msg, version, 0, srcnid, srcstamp);
-
- reply:
- rc = libcfs_sock_write (sock, msg, msg->ibm_nob,
- lnet_acceptor_timeout());
- if (!reject && rc != 0) {
- /* Only complain if we're not rejecting */
- CERROR("Error %d replying to svcqry from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- out:
- LIBCFS_FREE(msg, sizeof(*msg));
-}
-
-void
-kibnal_free_acceptsock (kib_acceptsock_t *as)
-{
- libcfs_sock_release(as->ibas_sock);
- LIBCFS_FREE(as, sizeof(*as));
-}
-
-int
-kibnal_accept(lnet_ni_t *ni, struct socket *sock)
-{
- kib_acceptsock_t *as;
- unsigned long flags;
-
- LIBCFS_ALLOC(as, sizeof(*as));
- if (as == NULL) {
- CERROR("Out of Memory\n");
- return -ENOMEM;
- }
-
- as->ibas_sock = sock;
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- list_add_tail(&as->ibas_list, &kibnal_data.kib_connd_acceptq);
- wake_up(&kibnal_data.kib_connd_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
- return 0;
-}
-
-int
-kibnal_start_ib_listener (void)
-{
- int rc;
-
- LASSERT (kibnal_data.kib_listen_handle == NULL);
-
- kibnal_data.kib_svc_id = ib_cm_service_assign();
- CDEBUG(D_NET, "svc id "LPX64"\n", kibnal_data.kib_svc_id);
-
- rc = ib_cached_gid_get(kibnal_data.kib_device,
- kibnal_data.kib_port, 0,
- kibnal_data.kib_svc_gid);
- if (rc != 0) {
- CERROR("Can't get port %d GID: %d\n",
- kibnal_data.kib_port, rc);
- return rc;
- }
-
- rc = ib_cached_pkey_get(kibnal_data.kib_device,
- kibnal_data.kib_port, 0,
- &kibnal_data.kib_svc_pkey);
- if (rc != 0) {
- CERROR ("Can't get port %d PKEY: %d\n",
- kibnal_data.kib_port, rc);
- return rc;
- }
-
- rc = ib_cm_listen(kibnal_data.kib_svc_id,
- TS_IB_CM_SERVICE_EXACT_MASK,
- kibnal_passive_conn_callback, NULL,
- &kibnal_data.kib_listen_handle);
- if (rc != 0) {
- kibnal_data.kib_listen_handle = NULL;
- CERROR ("Can't create IB listener: %d\n", rc);
- return rc;
- }
-
- LASSERT (kibnal_data.kib_listen_handle != NULL);
- return 0;
-}
-
-void
-kibnal_stop_ib_listener (void)
-{
- int rc;
-
- LASSERT (kibnal_data.kib_listen_handle != NULL);
-
- rc = ib_cm_listen_stop (kibnal_data.kib_listen_handle);
- if (rc != 0)
- CERROR("Error stopping IB listener: %d\n", rc);
-
- kibnal_data.kib_listen_handle = NULL;
-}
-
-int
-kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- unsigned long flags;
- int rc;
-
- LASSERT (nid != LNET_NID_ANY);
-
- LIBCFS_ALLOC(peer, sizeof (*peer));
- if (peer == NULL) {
- CERROR("Cannot allocate peer\n");
- return -ENOMEM;
- }
-
- memset(peer, 0, sizeof(*peer)); /* zero flags etc */
-
- peer->ibp_nid = nid;
- atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */
-
- INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */
- INIT_LIST_HEAD (&peer->ibp_conns);
- INIT_LIST_HEAD (&peer->ibp_tx_queue);
- INIT_LIST_HEAD (&peer->ibp_connd_list); /* not queued for connecting */
-
- peer->ibp_error = 0;
- peer->ibp_last_alive = cfs_time_current();
- peer->ibp_reconnect_interval = 0; /* OK to connect at any time */
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (atomic_read(&kibnal_data.kib_npeers) >=
- *kibnal_tunables.kib_concurrent_peers) {
- rc = -EOVERFLOW; /* !! but at least it distinguishes */
- } else if (kibnal_data.kib_nonewpeers) {
- rc = -ESHUTDOWN; /* shutdown has started */
- } else {
- rc = 0;
- /* npeers only grows with kib_global_lock held */
- atomic_inc(&kibnal_data.kib_npeers);
- }
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- if (rc != 0) {
- CERROR("Can't create peer: %s\n",
- (rc == -ESHUTDOWN) ? "shutting down" :
- "too many peers");
- LIBCFS_FREE(peer, sizeof(*peer));
- } else {
- *peerp = peer;
- }
-
- return rc;
-}
-
-void
-kibnal_destroy_peer (kib_peer_t *peer)
-{
- CDEBUG (D_NET, "peer %s %p deleted\n",
- libcfs_nid2str(peer->ibp_nid), peer);
-
- LASSERT (atomic_read (&peer->ibp_refcount) == 0);
- LASSERT (peer->ibp_persistence == 0);
- LASSERT (!kibnal_peer_active(peer));
- LASSERT (peer->ibp_connecting == 0);
- LASSERT (peer->ibp_accepting == 0);
- LASSERT (list_empty (&peer->ibp_connd_list));
- LASSERT (list_empty (&peer->ibp_conns));
- LASSERT (list_empty (&peer->ibp_tx_queue));
-
- LIBCFS_FREE (peer, sizeof (*peer));
-
- /* NB a peer's connections keep a reference on their peer until
- * they are destroyed, so we can be assured that _all_ state to do
- * with this peer has been cleaned up when its refcount drops to
- * zero. */
- atomic_dec(&kibnal_data.kib_npeers);
-}
-
-kib_peer_t *
-kibnal_find_peer_locked (lnet_nid_t nid)
-{
- struct list_head *peer_list = kibnal_nid2peerlist (nid);
- struct list_head *tmp;
- kib_peer_t *peer;
-
- list_for_each (tmp, peer_list) {
-
- peer = list_entry (tmp, kib_peer_t, ibp_list);
-
- LASSERT (peer->ibp_persistence != 0 || /* persistent peer */
- peer->ibp_connecting != 0 || /* creating conns */
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns)); /* active conn */
-
- if (peer->ibp_nid != nid)
- continue;
-
- return (peer);
- }
- return (NULL);
-}
-
-kib_peer_t *
-kibnal_get_peer (lnet_nid_t nid)
-{
- kib_peer_t *peer;
- unsigned long flags;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL) /* +1 ref for caller? */
- kibnal_peer_addref(peer);
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- return (peer);
-}
-
-void
-kibnal_unlink_peer_locked (kib_peer_t *peer)
-{
- LASSERT (peer->ibp_persistence == 0);
- LASSERT (list_empty(&peer->ibp_conns));
-
- LASSERT (kibnal_peer_active(peer));
- list_del_init (&peer->ibp_list);
- /* lose peerlist's ref */
- kibnal_peer_decref(peer);
-}
-
-int
-kibnal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, int *portp,
- int *persistencep)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- unsigned long flags;
- int i;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-
- list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- if (index-- > 0)
- continue;
-
- *nidp = peer->ibp_nid;
- *ipp = peer->ibp_ip;
- *portp = peer->ibp_port;
- *persistencep = peer->ibp_persistence;
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (0);
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (-ENOENT);
-}
-
-int
-kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port)
-{
- unsigned long flags;
- kib_peer_t *peer;
- kib_peer_t *peer2;
- int rc;
-
- if (nid == LNET_NID_ANY)
- return (-EINVAL);
-
- rc = kibnal_create_peer (&peer, nid);
- if (rc != 0)
- return rc;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- /* I'm always called with a reference on kibnal_data.kib_ni
- * so shutdown can't have started */
- LASSERT (kibnal_data.kib_nonewpeers == 0);
-
- peer2 = kibnal_find_peer_locked (nid);
- if (peer2 != NULL) {
- kibnal_peer_decref(peer);
- peer = peer2;
- } else {
- /* peer table takes existing ref on peer */
- list_add_tail (&peer->ibp_list,
- kibnal_nid2peerlist (nid));
- }
-
- peer->ibp_ip = ip;
- peer->ibp_port = port;
- peer->ibp_persistence++;
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
- return (0);
-}
-
-void
-kibnal_del_peer_locked (kib_peer_t *peer)
-{
- struct list_head *ctmp;
- struct list_head *cnxt;
- kib_conn_t *conn;
-
- peer->ibp_persistence = 0;
-
- if (list_empty(&peer->ibp_conns)) {
- kibnal_unlink_peer_locked(peer);
- } else {
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, kib_conn_t, ibc_list);
-
- kibnal_close_conn_locked (conn, 0);
- }
- /* NB peer is no longer persistent; closing its last conn
- * unlinked it. */
- }
- /* NB peer now unlinked; might even be freed if the peer table had the
- * last ref on it. */
-}
-
-int
-kibnal_del_peer (lnet_nid_t nid)
-{
- unsigned long flags;
- CFS_LIST_HEAD (zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- kib_peer_t *peer;
- int lo;
- int hi;
- int i;
- int rc = -ENOENT;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
- else {
- lo = 0;
- hi = kibnal_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
- continue;
-
- if (!list_empty(&peer->ibp_tx_queue)) {
- LASSERT (list_empty(&peer->ibp_conns));
-
- list_splice_init(&peer->ibp_tx_queue, &zombies);
- }
-
- kibnal_del_peer_locked (peer);
- rc = 0; /* matched something */
- }
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- kibnal_txlist_done(&zombies, -EIO);
-
- return (rc);
-}
-
-kib_conn_t *
-kibnal_get_conn_by_idx (int index)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- kib_conn_t *conn;
- struct list_head *ctmp;
- unsigned long flags;
- int i;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
- list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence > 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- list_for_each (ctmp, &peer->ibp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
- kibnal_conn_addref(conn);
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (conn);
- }
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (NULL);
-}
-
-kib_conn_t *
-kibnal_create_conn (void)
-{
- kib_conn_t *conn;
- int i;
- __u64 vaddr = 0;
- __u64 vaddr_base;
- int page_offset;
- int ipage;
- int rc;
- union {
- struct ib_qp_create_param qp_create;
- struct ib_qp_attribute qp_attr;
- } params;
-
- LIBCFS_ALLOC (conn, sizeof (*conn));
- if (conn == NULL) {
- CERROR ("Can't allocate connection\n");
- return (NULL);
- }
-
- /* zero flags, NULL pointers etc... */
- memset (conn, 0, sizeof (*conn));
-
- INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred);
- INIT_LIST_HEAD (&conn->ibc_tx_queue);
- INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
- INIT_LIST_HEAD (&conn->ibc_active_txs);
- spin_lock_init (&conn->ibc_lock);
-
- atomic_inc (&kibnal_data.kib_nconns);
- /* well not really, but I call destroy() on failure, which decrements */
-
- LIBCFS_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
- if (conn->ibc_rxs == NULL)
- goto failed;
- memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
- rc = kibnal_alloc_pages(&conn->ibc_rx_pages,
- IBNAL_RX_MSG_PAGES,
- IB_ACCESS_LOCAL_WRITE);
- if (rc != 0)
- goto failed;
-
- vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr;
-
- for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
- struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
- kib_rx_t *rx = &conn->ibc_rxs[i];
-
- rx->rx_conn = conn;
- rx->rx_vaddr = vaddr;
- rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset);
-
- vaddr += IBNAL_MSG_SIZE;
- LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
-
- page_offset += IBNAL_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBNAL_RX_MSG_PAGES);
- }
- }
-
- /* We can post up to IBNAL_RX_MSGS, which may also include an
- * additional RDMA work item */
-
- params.qp_create = (struct ib_qp_create_param) {
- .limit = {
- .max_outstanding_send_request = 2 * IBNAL_RX_MSGS,
- .max_outstanding_receive_request = IBNAL_RX_MSGS,
- .max_send_gather_element = 1,
- .max_receive_scatter_element = 1,
- },
- .pd = kibnal_data.kib_pd,
- .send_queue = kibnal_data.kib_cq,
- .receive_queue = kibnal_data.kib_cq,
- .send_policy = IB_WQ_SIGNAL_SELECTABLE,
- .receive_policy = IB_WQ_SIGNAL_SELECTABLE,
- .rd_domain = 0,
- .transport = IB_TRANSPORT_RC,
- .device_specific = NULL,
- };
-
- rc = ib_qp_create (¶ms.qp_create, &conn->ibc_qp, &conn->ibc_qpn);
- if (rc != 0) {
- CERROR ("Failed to create queue pair: %d\n", rc);
- goto failed;
- }
-
- /* Mark QP created */
- conn->ibc_state = IBNAL_CONN_INIT_QP;
-
- params.qp_attr = (struct ib_qp_attribute) {
- .state = IB_QP_STATE_INIT,
- .port = kibnal_data.kib_port,
- .enable_rdma_read = 1,
- .enable_rdma_write = 1,
- .valid_fields = (IB_QP_ATTRIBUTE_STATE |
- IB_QP_ATTRIBUTE_PORT |
- IB_QP_ATTRIBUTE_PKEY_INDEX |
- IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE),
- };
- rc = ib_qp_modify(conn->ibc_qp, ¶ms.qp_attr);
- if (rc != 0) {
- CERROR ("Failed to modify queue pair: %d\n", rc);
- goto failed;
- }
-
- /* 1 ref for caller */
- atomic_set (&conn->ibc_refcount, 1);
- return (conn);
-
- failed:
- kibnal_destroy_conn (conn);
- return (NULL);
-}
-
-void
-kibnal_destroy_conn (kib_conn_t *conn)
-{
- int rc;
-
- CDEBUG (D_NET, "connection %p\n", conn);
-
- LASSERT (atomic_read (&conn->ibc_refcount) == 0);
- LASSERT (list_empty(&conn->ibc_tx_queue));
- LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
- LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
- LASSERT (list_empty(&conn->ibc_active_txs));
- LASSERT (conn->ibc_nsends_posted == 0);
- LASSERT (conn->ibc_connreq == NULL);
-
- switch (conn->ibc_state) {
- case IBNAL_CONN_ZOMBIE:
- /* called after connection sequence initiated */
-
- case IBNAL_CONN_INIT_QP:
- rc = ib_qp_destroy(conn->ibc_qp);
- if (rc != 0)
- CERROR("Can't destroy QP: %d\n", rc);
- /* fall through */
-
- case IBNAL_CONN_INIT_NOTHING:
- break;
-
- default:
- LASSERT (0);
- }
-
- if (conn->ibc_rx_pages != NULL)
- kibnal_free_pages(conn->ibc_rx_pages);
-
- if (conn->ibc_rxs != NULL)
- LIBCFS_FREE(conn->ibc_rxs,
- IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
- if (conn->ibc_peer != NULL)
- kibnal_peer_decref(conn->ibc_peer);
-
- LIBCFS_FREE(conn, sizeof (*conn));
-
- atomic_dec(&kibnal_data.kib_nconns);
-
- if (atomic_read (&kibnal_data.kib_nconns) == 0 &&
- kibnal_data.kib_shutdown) {
- /* I just nuked the last connection on shutdown; wake up
- * everyone so they can exit. */
- wake_up_all(&kibnal_data.kib_sched_waitq);
- wake_up_all(&kibnal_data.kib_reaper_waitq);
- }
-}
-
-int
-kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- count++;
- kibnal_close_conn_locked (conn, why);
- }
-
- return (count);
-}
-
-int
-kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- if (conn->ibc_incarnation == incarnation)
- continue;
-
- CDEBUG(D_NET, "Closing stale conn %p nid: %s"
- " incarnation:"LPX64"("LPX64")\n", conn,
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_incarnation, incarnation);
-
- count++;
- kibnal_close_conn_locked (conn, -ESTALE);
- }
-
- return (count);
-}
-
-int
-kibnal_close_matching_conns (lnet_nid_t nid)
-{
- unsigned long flags;
- kib_peer_t *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- int count = 0;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
- else {
- lo = 0;
- hi = kibnal_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
- continue;
-
- count += kibnal_close_peer_conns_locked (peer, 0);
- }
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- /* wildcards always succeed */
- if (nid == LNET_NID_ANY)
- return (0);
-
- return (count == 0 ? -ENOENT : 0);
-}
-
-int
-kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
-
- LASSERT (ni == kibnal_data.kib_ni);
-
- switch(cmd) {
- case IOC_LIBCFS_GET_PEER: {
- lnet_nid_t nid = 0;
- __u32 ip = 0;
- int port = 0;
- int share_count = 0;
-
- rc = kibnal_get_peer_info(data->ioc_count,
- &nid, &ip, &port, &share_count);
- data->ioc_nid = nid;
- data->ioc_count = share_count;
- data->ioc_u32[0] = ip;
- data->ioc_u32[1] = port;
- break;
- }
- case IOC_LIBCFS_ADD_PEER: {
- rc = kibnal_add_persistent_peer (data->ioc_nid,
- data->ioc_u32[0], /* IP */
- data->ioc_u32[1]); /* port */
- break;
- }
- case IOC_LIBCFS_DEL_PEER: {
- rc = kibnal_del_peer (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_GET_CONN: {
- kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count);
-
- if (conn == NULL)
- rc = -ENOENT;
- else {
- rc = 0;
- data->ioc_nid = conn->ibc_peer->ibp_nid;
- kibnal_conn_decref(conn);
- }
- break;
- }
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- rc = kibnal_close_matching_conns (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_REGISTER_MYNID: {
- /* Ignore if this is a noop */
- if (data->ioc_nid == ni->ni_nid) {
- rc = 0;
- } else {
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- rc = -EINVAL;
- }
- break;
- }
- }
-
- return rc;
-}
-
-void
-kibnal_free_pages (kib_pages_t *p)
-{
- int npages = p->ibp_npages;
- int rc;
- int i;
-
- if (p->ibp_mapped) {
- rc = ib_memory_deregister(p->ibp_handle);
- if (rc != 0)
- CERROR ("Deregister error: %d\n", rc);
- }
-
- for (i = 0; i < npages; i++)
- if (p->ibp_pages[i] != NULL)
- __free_page(p->ibp_pages[i]);
-
- LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
-}
-
-int
-kibnal_alloc_pages (kib_pages_t **pp, int npages, int access)
-{
- kib_pages_t *p;
- struct ib_physical_buffer *phys_pages;
- int i;
- int rc;
-
- LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
- if (p == NULL) {
- CERROR ("Can't allocate buffer %d\n", npages);
- return (-ENOMEM);
- }
-
- memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
- p->ibp_npages = npages;
-
- for (i = 0; i < npages; i++) {
- p->ibp_pages[i] = alloc_page (GFP_KERNEL);
- if (p->ibp_pages[i] == NULL) {
- CERROR ("Can't allocate page %d of %d\n", i, npages);
- kibnal_free_pages(p);
- return (-ENOMEM);
- }
- }
-
- LIBCFS_ALLOC(phys_pages, npages * sizeof(*phys_pages));
- if (phys_pages == NULL) {
- CERROR ("Can't allocate physarray for %d pages\n", npages);
- kibnal_free_pages(p);
- return (-ENOMEM);
- }
-
- for (i = 0; i < npages; i++) {
- phys_pages[i].size = PAGE_SIZE;
- phys_pages[i].address =
- lnet_page2phys(p->ibp_pages[i]);
- }
-
- p->ibp_vaddr = 0;
- rc = ib_memory_register_physical(kibnal_data.kib_pd,
- phys_pages, npages,
- &p->ibp_vaddr,
- npages * PAGE_SIZE, 0,
- access,
- &p->ibp_handle,
- &p->ibp_lkey,
- &p->ibp_rkey);
-
- LIBCFS_FREE(phys_pages, npages * sizeof(*phys_pages));
-
- if (rc != 0) {
- CERROR ("Error %d mapping %d pages\n", rc, npages);
- kibnal_free_pages(p);
- return (rc);
- }
-
- p->ibp_mapped = 1;
- *pp = p;
- return (0);
-}
-
-int
-kibnal_setup_tx_descs (void)
-{
- int ipage = 0;
- int page_offset = 0;
- __u64 vaddr;
- __u64 vaddr_base;
- struct page *page;
- kib_tx_t *tx;
- int i;
- int rc;
-
- /* pre-mapped messages are not bigger than 1 page */
- LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
-
- /* No fancy arithmetic when we do the buffer calculations */
- LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
-
- rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
- IBNAL_TX_MSG_PAGES(),
- 0); /* local read access only */
- if (rc != 0)
- return (rc);
-
- vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
- tx = &kibnal_data.kib_tx_descs[i];
-
- memset (tx, 0, sizeof(*tx)); /* zero flags etc */
-
- tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset);
- tx->tx_vaddr = vaddr;
- tx->tx_mapped = KIB_TX_UNMAPPED;
-
- CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n",
- i, tx, tx->tx_msg, tx->tx_vaddr);
-
- list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
-
- vaddr += IBNAL_MSG_SIZE;
- LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES());
-
- page_offset += IBNAL_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
- }
- }
-
- return (0);
-}
-
-void
-kibnal_shutdown (lnet_ni_t *ni)
-{
- int i;
- int rc;
- unsigned long flags;
-
- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- LASSERT(ni == kibnal_data.kib_ni);
- LASSERT(ni->ni_data == &kibnal_data);
-
- switch (kibnal_data.kib_init) {
- default:
- CERROR ("Unexpected state %d\n", kibnal_data.kib_init);
- LBUG();
-
- case IBNAL_INIT_ALL:
- /* Prevent new peers from being created */
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- kibnal_data.kib_nonewpeers = 1;
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_stop_ib_listener();
-
- /* Remove all existing peers from the peer table */
- kibnal_del_peer(LNET_NID_ANY);
-
- /* Wait for pending conn reqs to be handled */
- i = 2;
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
- while (!list_empty(&kibnal_data.kib_connd_acceptq)) {
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock,
- flags);
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
- "waiting for conn reqs to clean up\n");
- cfs_pause(cfs_time_seconds(1));
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
- }
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
-
- /* Wait for all peer state to clean up */
- i = 2;
- while (atomic_read(&kibnal_data.kib_npeers) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d peers to close down\n",
- atomic_read(&kibnal_data.kib_npeers));
- cfs_pause(cfs_time_seconds(1));
- }
- /* fall through */
-
- case IBNAL_INIT_CQ:
- rc = ib_cq_destroy (kibnal_data.kib_cq);
- if (rc != 0)
- CERROR ("Destroy CQ error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_TXD:
- kibnal_free_pages (kibnal_data.kib_tx_pages);
- /* fall through */
-#if IBNAL_FMR
- case IBNAL_INIT_FMR:
- rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool);
- if (rc != 0)
- CERROR ("Destroy FMR pool error: %d\n", rc);
- /* fall through */
-#endif
- case IBNAL_INIT_PD:
- rc = ib_pd_destroy(kibnal_data.kib_pd);
- if (rc != 0)
- CERROR ("Destroy PD error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_DATA:
- /* Module refcount only gets to zero when all peers
- * have been closed so all lists must be empty */
- LASSERT (atomic_read(&kibnal_data.kib_npeers) == 0);
- LASSERT (kibnal_data.kib_peers != NULL);
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
- LASSERT (list_empty (&kibnal_data.kib_peers[i]));
- }
- LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
- LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
- LASSERT (list_empty (&kibnal_data.kib_sched_txq));
- LASSERT (list_empty (&kibnal_data.kib_reaper_conns));
- LASSERT (list_empty (&kibnal_data.kib_connd_peers));
- LASSERT (list_empty (&kibnal_data.kib_connd_acceptq));
-
- /* flag threads to terminate; wake and wait for them to die */
- kibnal_data.kib_shutdown = 1;
- wake_up_all (&kibnal_data.kib_sched_waitq);
- wake_up_all (&kibnal_data.kib_reaper_waitq);
- wake_up_all (&kibnal_data.kib_connd_waitq);
-
- i = 2;
- while (atomic_read (&kibnal_data.kib_nthreads) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d threads to terminate\n",
- atomic_read (&kibnal_data.kib_nthreads));
- cfs_pause(cfs_time_seconds(1));
- }
- /* fall through */
-
- case IBNAL_INIT_NOTHING:
- break;
- }
-
- if (kibnal_data.kib_tx_descs != NULL)
- LIBCFS_FREE (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
-
- if (kibnal_data.kib_peers != NULL)
- LIBCFS_FREE (kibnal_data.kib_peers,
- sizeof (struct list_head) *
- kibnal_data.kib_peer_hash_size);
-
- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- kibnal_data.kib_init = IBNAL_INIT_NOTHING;
- PORTAL_MODULE_UNUSE;
-}
-
-int
-kibnal_get_ipoibidx(void)
-{
- /* NB single threaded! */
- static struct ib_port_properties port_props;
-
- int ipoibidx = 0;
- int devidx;
- int port;
- int rc;
- struct ib_device *device;
-
- for (devidx = 0; devidx <= kibnal_data.kib_hca_idx; devidx++) {
- device = ib_device_get_by_index(devidx);
-
- if (device == NULL) {
- CERROR("Can't get IB device %d\n", devidx);
- return -1;
- }
-
- for (port = 1; port <= 2; port++) {
- if (devidx == kibnal_data.kib_hca_idx &&
- port == kibnal_data.kib_port)
- return ipoibidx;
-
- rc = ib_port_properties_get(device, port,
- &port_props);
- if (rc == 0)
- ipoibidx++;
- }
- }
-
- LBUG();
- return -1;
-}
-
-int
-kibnal_startup (lnet_ni_t *ni)
-{
- char ipif_name[32];
- __u32 ip;
- __u32 netmask;
- int up;
- struct timeval tv;
- int rc;
- int hca;
- int port;
- int i;
- int nob;
-
- LASSERT (ni->ni_lnd == &the_kiblnd);
-
- /* Only 1 instance supported */
- if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) {
- CERROR ("Only 1 instance supported\n");
- return -EPERM;
- }
-
- if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) {
- CERROR ("Can't set credits(%d) > ntx(%d)\n",
- *kibnal_tunables.kib_credits,
- *kibnal_tunables.kib_ntx);
- return -EINVAL;
- }
-
- memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */
-
- ni->ni_maxtxcredits = *kibnal_tunables.kib_credits;
- ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits;
-
- CLASSERT (LNET_MAX_INTERFACES > 1);
-
-
- kibnal_data.kib_hca_idx = 0; /* default: first HCA */
- kibnal_data.kib_port = 0; /* any port */
-
- if (ni->ni_interfaces[0] != NULL) {
- /* hca.port specified in 'networks=openib(h.p)' */
- if (ni->ni_interfaces[1] != NULL) {
- CERROR("Multiple interfaces not supported\n");
- return -EPERM;
- }
-
- nob = strlen(ni->ni_interfaces[0]);
- i = sscanf(ni->ni_interfaces[0], "%d.%d%n", &hca, &port, &nob);
- if (i >= 2 && nob == strlen(ni->ni_interfaces[0])) {
- kibnal_data.kib_hca_idx = hca;
- kibnal_data.kib_port = port;
- } else {
- nob = strlen(ni->ni_interfaces[0]);
- i = sscanf(ni->ni_interfaces[0], "%d%n", &hca, &nob);
-
- if (i >= 1 && nob == strlen(ni->ni_interfaces[0])) {
- kibnal_data.kib_hca_idx = hca;
- } else {
- CERROR("Can't parse interface '%s'\n",
- ni->ni_interfaces[0]);
- return -EINVAL;
- }
- }
- }
-
- kibnal_data.kib_ni = ni;
- ni->ni_data = &kibnal_data;
-
- do_gettimeofday(&tv);
- kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
- PORTAL_MODULE_USE;
-
- rwlock_init(&kibnal_data.kib_global_lock);
-
- kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
- LIBCFS_ALLOC (kibnal_data.kib_peers,
- sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
- if (kibnal_data.kib_peers == NULL) {
- goto failed;
- }
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++)
- INIT_LIST_HEAD(&kibnal_data.kib_peers[i]);
-
- spin_lock_init (&kibnal_data.kib_reaper_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_reaper_conns);
- init_waitqueue_head (&kibnal_data.kib_reaper_waitq);
-
- spin_lock_init (&kibnal_data.kib_connd_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_acceptq);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
- init_waitqueue_head (&kibnal_data.kib_connd_waitq);
-
- spin_lock_init (&kibnal_data.kib_sched_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_sched_txq);
- INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq);
- init_waitqueue_head (&kibnal_data.kib_sched_waitq);
-
- spin_lock_init (&kibnal_data.kib_tx_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
-
- LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
- if (kibnal_data.kib_tx_descs == NULL) {
- CERROR ("Can't allocate tx descs\n");
- goto failed;
- }
-
- /* lists/ptrs/locks initialised */
- kibnal_data.kib_init = IBNAL_INIT_DATA;
- /*****************************************************/
-
- for (i = 0; i < IBNAL_N_SCHED; i++) {
- rc = kibnal_thread_start (kibnal_scheduler,
- (void *)((unsigned long)i));
- if (rc != 0) {
- CERROR("Can't spawn openibnal scheduler[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- /* must have at least 2 connds to remain responsive to svcqry while
- * connecting */
- if (*kibnal_tunables.kib_n_connd < 2)
- *kibnal_tunables.kib_n_connd = 2;
-
-
- for (i = 0; i < *kibnal_tunables.kib_n_connd; i++) {
- rc = kibnal_thread_start (kibnal_connd,
- (void *)((unsigned long)i));
- if (rc != 0) {
- CERROR("Can't spawn openibnal connd[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- rc = kibnal_thread_start (kibnal_reaper, NULL);
- if (rc != 0) {
- CERROR ("Can't spawn openibnal reaper: %d\n", rc);
- goto failed;
- }
-
- kibnal_data.kib_device = ib_device_get_by_index(kibnal_data.kib_hca_idx);
- if (kibnal_data.kib_device == NULL) {
- CERROR ("Can't open ib device %d\n",
- kibnal_data.kib_hca_idx);
- goto failed;
- }
-
- rc = ib_device_properties_get(kibnal_data.kib_device,
- &kibnal_data.kib_device_props);
- if (rc != 0) {
- CERROR ("Can't get device props: %d\n", rc);
- goto failed;
- }
-
- CDEBUG(D_NET, "Max Initiator: %d Max Responder %d\n",
- kibnal_data.kib_device_props.max_initiator_per_qp,
- kibnal_data.kib_device_props.max_responder_per_qp);
-
- if (kibnal_data.kib_port != 0) {
- rc = ib_port_properties_get(kibnal_data.kib_device,
- kibnal_data.kib_port,
- &kibnal_data.kib_port_props);
- if (rc != 0) {
- CERROR("Error %d open port %d on HCA %d\n", rc,
- kibnal_data.kib_port,
- kibnal_data.kib_hca_idx);
- goto failed;
- }
- } else {
- for (i = 1; i <= 2; i++) {
- rc = ib_port_properties_get(kibnal_data.kib_device, i,
- &kibnal_data.kib_port_props);
- if (rc == 0) {
- kibnal_data.kib_port = i;
- break;
- }
- }
- if (kibnal_data.kib_port == 0) {
- CERROR ("Can't find a port\n");
- goto failed;
- }
- }
-
- i = kibnal_get_ipoibidx();
- if (i < 0)
- goto failed;
-
- snprintf(ipif_name, sizeof(ipif_name), "%s%d",
- *kibnal_tunables.kib_ipif_basename, i);
- if (strlen(ipif_name) == sizeof(ipif_name) - 1) {
- CERROR("IPoIB interface name %s truncated\n", ipif_name);
- return -EINVAL;
- }
-
- rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask);
- if (rc != 0) {
- CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc);
- goto failed;
- }
-
- if (!up) {
- CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name);
- goto failed;
- }
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
-
- rc = ib_pd_create(kibnal_data.kib_device,
- NULL, &kibnal_data.kib_pd);
- if (rc != 0) {
- CERROR ("Can't create PD: %d\n", rc);
- goto failed;
- }
-
- /* flag PD initialised */
- kibnal_data.kib_init = IBNAL_INIT_PD;
- /*****************************************************/
-#if IBNAL_FMR
- {
- const int pool_size = *kibnal_tunables.kib_ntx;
- struct ib_fmr_pool_param params = {
- .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE,
- .access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ),
- .pool_size = pool_size,
- .dirty_watermark = (pool_size * 3)/4,
- .flush_function = NULL,
- .flush_arg = NULL,
- .cache = 1,
- };
- rc = ib_fmr_pool_create(kibnal_data.kib_pd, ¶ms,
- &kibnal_data.kib_fmr_pool);
- if (rc != 0) {
- CERROR ("Can't create FMR pool size %d: %d\n",
- pool_size, rc);
- goto failed;
- }
- }
-
- /* flag FMR pool initialised */
- kibnal_data.kib_init = IBNAL_INIT_FMR;
-#endif
- /*****************************************************/
-
- rc = kibnal_setup_tx_descs();
- if (rc != 0) {
- CERROR ("Can't register tx descs: %d\n", rc);
- goto failed;
- }
-
- /* flag TX descs initialised */
- kibnal_data.kib_init = IBNAL_INIT_TXD;
- /*****************************************************/
-
- {
- struct ib_cq_callback callback = {
- .context = IBNAL_CALLBACK_CTXT,
- .policy = IB_CQ_PROVIDER_REARM,
- .function = {
- .entry = kibnal_callback,
- },
- .arg = NULL,
- };
- int nentries = IBNAL_CQ_ENTRIES();
-
- rc = ib_cq_create (kibnal_data.kib_device,
- &nentries, &callback, NULL,
- &kibnal_data.kib_cq);
- if (rc != 0) {
- CERROR ("Can't create CQ: %d\n", rc);
- goto failed;
- }
-
- /* I only want solicited events */
- rc = ib_cq_request_notification(kibnal_data.kib_cq, 1);
- LASSERT (rc == 0);
- }
-
- /* flag CQ initialised */
- kibnal_data.kib_init = IBNAL_INIT_CQ;
- /*****************************************************/
-
- rc = kibnal_start_ib_listener();
- if (rc != 0)
- goto failed;
-
- /* flag everything initialised */
- kibnal_data.kib_init = IBNAL_INIT_ALL;
- /*****************************************************/
-
- return 0;
-
- failed:
- kibnal_shutdown(ni);
- return -ENETDOWN;
-}
-
-void __exit
-kibnal_module_fini (void)
-{
- lnet_unregister_lnd(&the_kiblnd);
- kibnal_tunables_fini();
-}
-
-int __init
-kibnal_module_init (void)
-{
- int rc;
-
- rc = kibnal_tunables_init();
- if (rc != 0)
- return rc;
-
- lnet_register_lnd(&the_kiblnd);
-
- return (0);
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-#ifdef USING_TSAPI
-MODULE_DESCRIPTION("Kernel Cisco IB LND v1.00");
-#else
-MODULE_DESCRIPTION("Kernel OpenIB(gen1) LND v1.00");
-#endif
-MODULE_LICENSE("GPL");
-
-module_init(kibnal_module_init);
-module_exit(kibnal_module_fini);
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-
-#include <ts_ib_core.h>
-#include <ts_ib_cm.h>
-#include <ts_ib_sa_client.h>
-
-#ifndef USING_TSAPI
-
-/* OpenIB Gen1 */
-typedef struct ib_qp ib_qp_t;
-typedef struct ib_mr ib_mr_t;
-typedef struct ib_fmr ib_fmr_t;
-typedef struct ib_pd ib_pd_t;
-typedef struct ib_cq ib_cq_t;
-typedef struct ib_fmr_pool ib_fmr_pool_t;
-
-#else
-
-/* Cisco (topspin) */
-typedef void ib_qp_t;
-typedef void ib_mr_t;
-typedef void ib_fmr_t;
-typedef void ib_pd_t;
-typedef void ib_cq_t;
-typedef void ib_fmr_pool_t;
-
-#define IB_ACCESS_LOCAL_WRITE TS_IB_ACCESS_LOCAL_WRITE
-#define IB_WQ_SIGNAL_SELECTABLE TS_IB_ACCESS_LOCAL_WRITE
-#define IB_TRANSPORT_RC TS_IB_TRANSPORT_RC
-#define IB_QP_STATE_INIT TS_IB_QP_STATE_INIT
-#define IB_QP_ATTRIBUTE_STATE TS_IB_QP_ATTRIBUTE_STATE
-#define IB_QP_ATTRIBUTE_PORT TS_IB_QP_ATTRIBUTE_PORT
-#define IB_QP_ATTRIBUTE_PKEY_INDEX TS_IB_QP_ATTRIBUTE_PKEY_INDEX
-#define IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE
-#define IB_ACCESS_LOCAL_WRITE TS_IB_ACCESS_LOCAL_WRITE
-#define IB_ACCESS_REMOTE_WRITE TS_IB_ACCESS_REMOTE_WRITE
-#define IB_ACCESS_REMOTE_READ TS_IB_ACCESS_REMOTE_READ
-#define IB_CQ_CALLBACK_INTERRU TS_IB_CQ_CALLBACK_INTERRUPTPT
-#define IB_CQ_PROVIDER_REARM TS_IB_CQ_PROVIDER_REARM
-#define IB_CQ_CALLBACK_INTERRUPT TS_IB_CQ_CALLBACK_INTERRUPT
-#define IB_COMPLETION_STATUS_SUCCESS TS_IB_COMPLETION_STATUS_SUCCESS
-#define IB_OP_SEND TS_IB_OP_SEND
-#define IB_OP_RDMA_WRITE TS_IB_OP_RDMA_WRITE
-#define IB_OP_RDMA_READ TS_IB_OP_RDMA_READ
-
-#endif
-
-#ifdef CONFIG_SMP
-# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */
-#else
-# define IBNAL_N_SCHED 1 /* # schedulers */
-#endif
-
-#define IBNAL_FMR 1
-//#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_PROCESS
-#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_INTERRUPT
-
-
-/* tunables fixed at compile time */
-#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */
-#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */
-#define IBNAL_CREDIT_HIGHWATER 6 /* when to eagerly return credits */
-#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */
-#define IBNAL_RDMA_BASE 0x0eeb0000
-
-/* QP tunables */
-#define IBNAL_RETRY 7 /* # times to retry */
-#define IBNAL_RNR_RETRY 7 /* */
-#define IBNAL_CM_RETRY 7 /* # times to retry connection */
-#define IBNAL_FLOW_CONTROL 1
-#define IBNAL_RESPONDER_RESOURCES 8
-
-/************************/
-/* derived constants... */
-
-/* TX messages (shared by all connections) */
-#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx)
-#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE)
-#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)
-
-/* RX messages (per connection) */
-#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE * 2)
-#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE)
-#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
-
-/* we may have up to 2 completions per transmit +
- 1 completion per receive, per connection */
-#define IBNAL_CQ_ENTRIES() ((2*IBNAL_TX_MSGS()) + \
- (IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers))
-
-typedef struct
-{
- char **kib_ipif_basename; /* IPoIB interface base name */
- int *kib_n_connd; /* # connection daemons */
- int *kib_min_reconnect_interval; /* min connect retry seconds... */
- int *kib_max_reconnect_interval; /* max connect retry seconds */
- int *kib_concurrent_peers; /* max # peers */
- int *kib_cksum; /* checksum kib_msg_t? */
- int *kib_timeout; /* comms timeout (seconds) */
- int *kib_keepalive; /* keepalive (seconds) */
- int *kib_ntx; /* # tx descs */
- int *kib_credits; /* # concurrent sends */
- int *kib_peercredits; /* # concurrent sends to 1 peer */
-
- cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */
-} kib_tunables_t;
-
-typedef struct
-{
- int ibp_npages; /* # pages */
- int ibp_mapped; /* mapped? */
- __u64 ibp_vaddr; /* mapped region vaddr */
- __u32 ibp_lkey; /* mapped region lkey */
- __u32 ibp_rkey; /* mapped region rkey */
- ib_mr_t *ibp_handle; /* mapped region handle */
- struct page *ibp_pages[0];
-} kib_pages_t;
-
-typedef struct
-{
- int kib_init; /* initialisation state */
- __u64 kib_incarnation; /* which one am I */
- int kib_shutdown; /* shut down? */
- atomic_t kib_nthreads; /* # live threads */
- lnet_ni_t *kib_ni; /* _the_ openib interface */
-
- __u64 kib_svc_id; /* service number I listen on */
- tTS_IB_GID kib_svc_gid; /* device/port GID */
- __u16 kib_svc_pkey; /* device/port pkey */
-
- void *kib_listen_handle; /* IB listen handle */
-
- rwlock_t kib_global_lock; /* stabilize peer/conn ops */
-
- struct list_head *kib_peers; /* hash table of all my known peers */
- int kib_peer_hash_size; /* size of kib_peers */
- int kib_nonewpeers; /* prevent new peers? */
- atomic_t kib_npeers; /* # peers extant */
- atomic_t kib_nconns; /* # connections extant */
-
- struct list_head kib_reaper_conns; /* connections to reap */
- wait_queue_head_t kib_reaper_waitq; /* reaper sleeps here */
- unsigned long kib_reaper_waketime; /* when reaper will wake */
- spinlock_t kib_reaper_lock; /* serialise */
-
- struct list_head kib_connd_peers; /* peers waiting for a connection */
- struct list_head kib_connd_acceptq; /* accepted sockets to handle */
- wait_queue_head_t kib_connd_waitq; /* connection daemons sleep here */
- int kib_connd_connecting; /* # connds connecting */
- spinlock_t kib_connd_lock; /* serialise */
-
- wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */
- struct list_head kib_sched_txq; /* tx requiring attention */
- struct list_head kib_sched_rxq; /* rx requiring attention */
- spinlock_t kib_sched_lock; /* serialise */
-
- struct kib_tx *kib_tx_descs; /* all the tx descriptors */
- kib_pages_t *kib_tx_pages; /* premapped tx msg pages */
-
- struct list_head kib_idle_txs; /* idle tx descriptors */
- __u64 kib_next_tx_cookie; /* RDMA completion cookie */
- spinlock_t kib_tx_lock; /* serialise */
-
- int kib_hca_idx; /* my HCA number */
- struct ib_device *kib_device; /* "the" device */
- struct ib_device_properties kib_device_props; /* its properties */
- int kib_port; /* port on the device */
- struct ib_port_properties kib_port_props; /* its properties */
- ib_pd_t *kib_pd; /* protection domain */
-#if IBNAL_FMR
- ib_fmr_pool_t *kib_fmr_pool; /* fast memory region pool */
-#endif
- ib_cq_t *kib_cq; /* completion queue */
-
-} kib_data_t;
-
-#define IBNAL_INIT_NOTHING 0
-#define IBNAL_INIT_DATA 1
-#define IBNAL_INIT_LIB 2
-#define IBNAL_INIT_PD 3
-#define IBNAL_INIT_FMR 4
-#define IBNAL_INIT_TXD 5
-#define IBNAL_INIT_CQ 6
-#define IBNAL_INIT_ALL 7
-
-typedef struct kib_acceptsock /* accepted socket queued for connd */
-{
- struct list_head ibas_list; /* queue for attention */
- struct socket *ibas_sock; /* the accepted socket */
-} kib_acceptsock_t;
-
-/************************************************************************
- * IB Wire message format.
- * These are sent in sender's byte order (i.e. receiver flips).
- * They may be sent via TCP/IP (service ID,GID,PKEY query/response),
- * as private data in the connection request/response, or "normally".
- */
-
-typedef struct kib_svcrsp /* service response */
-{
- __u64 ibsr_svc_id; /* service's id */
- __u8 ibsr_svc_gid[16]; /* service's gid */
- __u16 ibsr_svc_pkey; /* service's pkey */
-} WIRE_ATTR kib_svcrsp_t;
-
-typedef struct kib_connparams
-{
- __u32 ibcp_queue_depth;
-} WIRE_ATTR kib_connparams_t;
-
-typedef struct
-{
- union {
- ib_mr_t *mr;
- ib_fmr_t *fmr;
- } md_handle;
- __u32 md_lkey;
- __u32 md_rkey;
- __u64 md_addr;
-} kib_md_t;
-
-typedef struct
-{
- __u32 rd_key; /* remote key */
- __u32 rd_nob; /* # of bytes */
- __u64 rd_addr; /* remote io vaddr */
-} WIRE_ATTR kib_rdma_desc_t;
-
-typedef struct
-{
- lnet_hdr_t ibim_hdr; /* portals header */
- char ibim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR kib_immediate_msg_t;
-
-typedef struct
-{
- lnet_hdr_t ibrm_hdr; /* portals header */
- __u64 ibrm_cookie; /* opaque completion cookie */
- kib_rdma_desc_t ibrm_desc; /* where to suck/blow */
-} WIRE_ATTR kib_rdma_msg_t;
-
-typedef struct
-{
- __u64 ibcm_cookie; /* opaque completion cookie */
- __u32 ibcm_status; /* completion status */
-} WIRE_ATTR kib_completion_msg_t;
-
-typedef struct
-{
- /* First 2 fields fixed FOR ALL TIME */
- __u32 ibm_magic; /* I'm an openibnal message */
- __u16 ibm_version; /* this is my version number */
-
- __u8 ibm_type; /* msg type */
- __u8 ibm_credits; /* returned credits */
- __u32 ibm_nob; /* # bytes in whole message */
- __u32 ibm_cksum; /* checksum (0 == no checksum) */
- __u64 ibm_srcnid; /* sender's NID */
- __u64 ibm_srcstamp; /* sender's incarnation */
- __u64 ibm_dstnid; /* destination's NID */
- __u64 ibm_dststamp; /* destination's incarnation */
- union {
- kib_svcrsp_t svcrsp;
- kib_connparams_t connparams;
- kib_immediate_msg_t immediate;
- kib_rdma_msg_t rdma;
- kib_completion_msg_t completion;
- } WIRE_ATTR ibm_u;
-} WIRE_ATTR kib_msg_t;
-
-#define IBNAL_MSG_MAGIC LNET_PROTO_OPENIB_MAGIC /* unique magic */
-#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 2 /* previous protocol version */
-#define IBNAL_MSG_VERSION 3 /* current protocol version */
-
-#define IBNAL_MSG_SVCQRY 0xb0 /* service query */
-#define IBNAL_MSG_SVCRSP 0xb1 /* service response */
-#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */
-#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */
-#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */
-#define IBNAL_MSG_IMMEDIATE 0xd1 /* portals hdr + payload */
-#define IBNAL_MSG_PUT_RDMA 0xd2 /* portals PUT hdr + source rdma desc */
-#define IBNAL_MSG_PUT_DONE 0xd3 /* signal PUT rdma completion */
-#define IBNAL_MSG_GET_RDMA 0xd4 /* portals GET hdr + sink rdma desc */
-#define IBNAL_MSG_GET_DONE 0xd5 /* signal GET rdma completion */
-
-/***********************************************************************/
-
-typedef struct kib_rx /* receive message */
-{
- struct list_head rx_list; /* queue for attention */
- struct kib_conn *rx_conn; /* owning conn */
- int rx_nob; /* # bytes received (-1 while posted) */
- __u64 rx_vaddr; /* pre-mapped buffer (hca vaddr) */
- kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */
- struct ib_receive_param rx_sp; /* receive work item */
- struct ib_gather_scatter rx_gl; /* and it's memory */
-} kib_rx_t;
-
-typedef struct kib_tx /* transmit message */
-{
- struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */
- struct kib_conn *tx_conn; /* owning conn */
- int tx_mapped; /* mapped for RDMA? */
- int tx_sending; /* # tx callbacks outstanding */
- int tx_status; /* completion status */
- unsigned long tx_deadline; /* completion deadline */
- int tx_passive_rdma; /* peer sucks/blows */
- int tx_passive_rdma_wait; /* waiting for peer to complete */
- __u64 tx_passive_rdma_cookie; /* completion cookie */
- lnet_msg_t *tx_lntmsg[2]; /* ptl msgs to finalize on completion */
- kib_md_t tx_md; /* RDMA mapping (active/passive) */
- __u64 tx_vaddr; /* pre-mapped buffer (hca vaddr) */
- kib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */
- int tx_nsp; /* # send work items */
- struct ib_send_param tx_sp[2]; /* send work items... */
- struct ib_gather_scatter tx_gl[2]; /* ...and their memory */
-} kib_tx_t;
-
-#define KIB_TX_UNMAPPED 0
-#define KIB_TX_MAPPED 1
-#define KIB_TX_MAPPED_FMR 2
-
-typedef struct kib_connreq
-{
- /* active connection-in-progress state */
- struct kib_conn *cr_conn;
- kib_msg_t cr_msg;
- __u64 cr_tid;
- tTS_IB_GID cr_gid;
- kib_svcrsp_t cr_svcrsp;
- struct ib_path_record cr_path;
- struct ib_cm_active_param cr_connparam;
-} kib_connreq_t;
-
-typedef struct kib_conn
-{
- struct kib_peer *ibc_peer; /* owning peer */
- struct list_head ibc_list; /* stash on peer's conn list */
- __u64 ibc_incarnation; /* which instance of the peer */
- int ibc_version; /* peer protocol version */
- atomic_t ibc_refcount; /* # users */
- int ibc_state; /* what's happening */
- int ibc_nsends_posted; /* # uncompleted sends */
- int ibc_credits; /* # credits I have */
- int ibc_outstanding_credits; /* # credits to return */
- int ibc_reserved_credits; /* # credits for ACK/DONE msgs */
- unsigned long ibc_last_send; /* time of last send */
- struct list_head ibc_tx_queue_nocred; /* sends that don't need a credit */
- struct list_head ibc_tx_queue_rsrvd; /* sends that need a reserved cred */
- struct list_head ibc_tx_queue; /* send queue */
- struct list_head ibc_active_txs; /* active tx awaiting completion */
- spinlock_t ibc_lock; /* serialise */
- kib_rx_t *ibc_rxs; /* the rx descs */
- kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */
- ib_qp_t *ibc_qp; /* queue pair */
- __u32 ibc_qpn; /* queue pair number */
- tTS_IB_CM_COMM_ID ibc_comm_id; /* connection ID? */
- kib_connreq_t *ibc_connreq; /* connection request state */
-} kib_conn_t;
-
-#define IBNAL_CONN_INIT_NOTHING 0 /* initial state */
-#define IBNAL_CONN_INIT_QP 1 /* ibc_qp set up */
-#define IBNAL_CONN_CONNECTING 2 /* started to connect */
-#define IBNAL_CONN_ESTABLISHED 3 /* connection established */
-#define IBNAL_CONN_DEATHROW 4 /* waiting to be closed */
-#define IBNAL_CONN_ZOMBIE 5 /* waiting to be freed */
-
-typedef struct kib_peer
-{
- struct list_head ibp_list; /* stash on global peer list */
- struct list_head ibp_connd_list; /* schedule on kib_connd_peers */
- lnet_nid_t ibp_nid; /* who's on the other end(s) */
- __u32 ibp_ip; /* IP to query for peer conn params */
- int ibp_port; /* port to qery for peer conn params */
- __u64 ibp_incarnation; /* peer's incarnation */
- atomic_t ibp_refcount; /* # users */
- int ibp_persistence; /* "known" peer refs */
- struct list_head ibp_conns; /* all active connections */
- struct list_head ibp_tx_queue; /* msgs waiting for a conn */
- int ibp_connecting; /* current active connection attempts */
- int ibp_accepting; /* current passive connection attempts */
- unsigned long ibp_reconnect_time; /* when reconnect may be attempted */
- unsigned long ibp_reconnect_interval; /* exponential backoff */
- int ibp_error; /* errno on closing this peer */
- cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */
-} kib_peer_t;
-
-extern kib_data_t kibnal_data;
-extern kib_tunables_t kibnal_tunables;
-
-/******************************************************************************/
-
-/* these are purposely avoiding using local vars so they don't increase
- * stack consumption. */
-
-#define kibnal_conn_addref(conn) \
-do { \
- CDEBUG(D_NET, "conn[%p] (%d)++\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \
- atomic_inc(&(conn)->ibc_refcount); \
-} while (0)
-
-#define kibnal_conn_decref(conn) \
-do { \
- unsigned long flags; \
- \
- CDEBUG(D_NET, "conn[%p] (%d)--\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \
- if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \
- spin_lock_irqsave(&kibnal_data.kib_reaper_lock, flags); \
- list_add_tail(&(conn)->ibc_list, \
- &kibnal_data.kib_reaper_conns); \
- wake_up(&kibnal_data.kib_reaper_waitq); \
- spin_unlock_irqrestore(&kibnal_data.kib_reaper_lock, flags); \
- } \
-} while (0)
-
-#define kibnal_peer_addref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \
- atomic_inc(&(peer)->ibp_refcount); \
-} while (0)
-
-#define kibnal_peer_decref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \
- if (atomic_dec_and_test(&(peer)->ibp_refcount)) \
- kibnal_destroy_peer(peer); \
-} while (0)
-
-/******************************************************************************/
-
-static inline struct list_head *
-kibnal_nid2peerlist (lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size;
-
- return (&kibnal_data.kib_peers [hash]);
-}
-
-static inline int
-kibnal_peer_active(kib_peer_t *peer)
-{
- /* Am I in the peer hash table? */
- return (!list_empty(&peer->ibp_list));
-}
-
-static inline void
-kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
-{
- struct list_head *q;
-
- LASSERT (tx->tx_nsp > 0); /* work items set up */
- LASSERT (tx->tx_conn == NULL); /* only set here */
-
- kibnal_conn_addref(conn);
- tx->tx_conn = conn;
- tx->tx_deadline = jiffies + *kibnal_tunables.kib_timeout * HZ;
-
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- /* All messages have simple credit control */
- q = &conn->ibc_tx_queue;
- } else {
- LASSERT (conn->ibc_version == IBNAL_MSG_VERSION);
-
- switch (tx->tx_msg->ibm_type) {
- case IBNAL_MSG_PUT_RDMA:
- case IBNAL_MSG_GET_RDMA:
- /* RDMA request: reserve a buffer for the RDMA reply
- * before sending */
- q = &conn->ibc_tx_queue_rsrvd;
- break;
-
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- /* RDMA completion: no credits; peer has reserved a
- * reply buffer */
- q = &conn->ibc_tx_queue_nocred;
- break;
-
- case IBNAL_MSG_NOOP:
- case IBNAL_MSG_IMMEDIATE:
- /* Otherwise: consume a credit before sending */
- q = &conn->ibc_tx_queue;
- break;
-
- default:
- LBUG();
- q = NULL;
- }
- }
-
- list_add_tail(&tx->tx_list, q);
-}
-
-static inline int
-kibnal_send_keepalive(kib_conn_t *conn)
-{
- return (*kibnal_tunables.kib_keepalive > 0) &&
- time_after(jiffies, conn->ibc_last_send +
- *kibnal_tunables.kib_keepalive*HZ);
-}
-
-/* CAVEAT EMPTOR:
- * We rely on tx/rx descriptor alignment to allow us to use the lowest bit
- * of the work request id as a flag to determine if the completion is for a
- * transmit or a receive. It seems that that the CQ entry's 'op' field
- * isn't always set correctly on completions that occur after QP teardown. */
-
-static inline __u64
-kibnal_ptr2wreqid (void *ptr, int isrx)
-{
- unsigned long lptr = (unsigned long)ptr;
-
- LASSERT ((lptr & 1) == 0);
- return (__u64)(lptr | (isrx ? 1 : 0));
-}
-
-static inline void *
-kibnal_wreqid2ptr (__u64 wreqid)
-{
- return (void *)(((unsigned long)wreqid) & ~1UL);
-}
-
-static inline int
-kibnal_wreqid_is_rx (__u64 wreqid)
-{
- return (wreqid & 1) != 0;
-}
-
-#if (IB_NTXRXPARAMS == 3)
-static inline int
-kibnal_ib_send(ib_qp_t *qp, struct ib_send_param *p)
-{
- return ib_send(qp, p, 1);
-}
-
-static inline int
-kibnal_ib_receive(ib_qp_t *qp, struct ib_receive_param *p)
-{
- return ib_receive(qp, p, 1);
-}
-#elif (IB_NTXRXPARAMS == 4)
-static inline int
-kibnal_ib_send(ib_qp_t *qp, struct ib_send_param *p)
-{
- return ib_send(qp, p, 1, NULL);
-}
-
-static inline int
-kibnal_ib_receive(ib_qp_t *qp, struct ib_receive_param *p)
-{
- return ib_receive(qp, p, 1, NULL);
-}
-#else
- #error "IB_NTXRXPARAMS not set correctly"
-#endif
-
-int kibnal_startup (lnet_ni_t *ni);
-void kibnal_shutdown (lnet_ni_t *ni);
-int kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int kibnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kibnal_eager_recv (lnet_ni_t *ni, void *private,
- lnet_msg_t *lntmsg, void **new_private);
-int kibnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-int kibnal_accept(lnet_ni_t *ni, struct socket *sock);
-
-extern void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob);
-extern void kibnal_pack_msg(kib_msg_t *msg, int version, int credits,
- lnet_nid_t dstnid, __u64 dststamp);
-extern int kibnal_unpack_msg(kib_msg_t *msg, int expected_version, int nob);
-extern void kibnal_handle_svcqry (struct socket *sock);
-extern int kibnal_make_svcqry (kib_conn_t *conn);
-extern void kibnal_free_acceptsock (kib_acceptsock_t *as);
-extern int kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid);
-extern void kibnal_destroy_peer (kib_peer_t *peer);
-extern int kibnal_add_persistent_peer(lnet_nid_t nid, __u32 ip, int port);
-extern int kibnal_del_peer (lnet_nid_t nid);
-extern kib_peer_t *kibnal_find_peer_locked (lnet_nid_t nid);
-extern void kibnal_unlink_peer_locked (kib_peer_t *peer);
-extern void kibnal_peer_alive(kib_peer_t *peer);
-extern int kibnal_close_stale_conns_locked (kib_peer_t *peer,
- __u64 incarnation);
-extern kib_conn_t *kibnal_create_conn (void);
-extern void kibnal_destroy_conn (kib_conn_t *conn);
-extern int kibnal_alloc_pages (kib_pages_t **pp, int npages, int access);
-extern void kibnal_free_pages (kib_pages_t *p);
-
-extern void kibnal_check_sends (kib_conn_t *conn);
-
-extern tTS_IB_CM_CALLBACK_RETURN
-kibnal_bad_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid,
- void *param, void *arg);
-extern tTS_IB_CM_CALLBACK_RETURN
-kibnal_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid,
- void *param, void *arg);
-extern tTS_IB_CM_CALLBACK_RETURN
-kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid,
- void *param, void *arg);
-
-extern void kibnal_close_conn_locked (kib_conn_t *conn, int error);
-extern void kibnal_destroy_conn (kib_conn_t *conn);
-extern int kibnal_thread_start (int (*fn)(void *arg), void *arg);
-extern int kibnal_scheduler(void *arg);
-extern int kibnal_connd (void *arg);
-extern int kibnal_reaper (void *arg);
-extern void kibnal_callback (ib_cq_t *cq, struct ib_cq_entry *e, void *arg);
-extern void kibnal_txlist_done (struct list_head *txlist, int status);
-extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob);
-extern int kibnal_close_conn (kib_conn_t *conn, int why);
-extern void kibnal_start_active_rdma (int type, int status,
- kib_rx_t *rx, lnet_msg_t *lntmsg,
- unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- int offset, int nob);
-
-extern int kibnal_tunables_init(void);
-extern void kibnal_tunables_fini(void);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "openiblnd.h"
-
-/*
- * LIB functions follow
- *
- */
-void
-kibnal_schedule_tx_done (kib_tx_t *tx)
-{
- unsigned long flags;
-
- spin_lock_irqsave (&kibnal_data.kib_sched_lock, flags);
-
- list_add_tail(&tx->tx_list, &kibnal_data.kib_sched_txq);
- wake_up (&kibnal_data.kib_sched_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-}
-
-void
-kibnal_tx_done (kib_tx_t *tx)
-{
- lnet_msg_t *lntmsg[2];
- unsigned long flags;
- int i;
- int rc;
-
- LASSERT (tx->tx_sending == 0); /* mustn't be awaiting callback */
- LASSERT (!tx->tx_passive_rdma_wait); /* mustn't be awaiting RDMA */
-
- if (in_interrupt()) {
- /* can't deregister memory/flush FMAs/finalize in IRQ context... */
- kibnal_schedule_tx_done(tx);
- return;
- }
-
- switch (tx->tx_mapped) {
- default:
- LBUG();
-
- case KIB_TX_UNMAPPED:
- break;
-
- case KIB_TX_MAPPED:
- rc = ib_memory_deregister(tx->tx_md.md_handle.mr);
- LASSERT (rc == 0);
- tx->tx_mapped = KIB_TX_UNMAPPED;
- break;
-
-#if IBNAL_FMR
- case KIB_TX_MAPPED_FMR:
- rc = ib_fmr_deregister(tx->tx_md.md_handle.fmr);
- LASSERT (rc == 0);
-
-#ifndef USING_TSAPI
- /* Somewhat belt-and-braces since the tx's conn has closed if
- * this was a passive RDMA waiting to complete... */
- if (tx->tx_status != 0)
- ib_fmr_pool_force_flush(kibnal_data.kib_fmr_pool);
-#endif
- tx->tx_mapped = KIB_TX_UNMAPPED;
- break;
-#endif
- }
-
- /* tx may have up to 2 ptlmsgs to finalise */
- lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
- lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
- rc = tx->tx_status;
-
- if (tx->tx_conn != NULL) {
- kibnal_conn_decref(tx->tx_conn);
- tx->tx_conn = NULL;
- }
-
- tx->tx_nsp = 0;
- tx->tx_passive_rdma = 0;
- tx->tx_status = 0;
-
- spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags);
-
- list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_txs);
-
- spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags);
-
- /* delay finalize until my descs have been freed */
- for (i = 0; i < 2; i++) {
- if (lntmsg[i] == NULL)
- continue;
-
- lnet_finalize (kibnal_data.kib_ni, lntmsg[i], rc);
- }
-}
-
-kib_tx_t *
-kibnal_get_idle_tx (void)
-{
- unsigned long flags;
- kib_tx_t *tx;
-
- spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags);
-
- if (list_empty (&kibnal_data.kib_idle_txs)) {
- spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags);
- return NULL;
- }
-
- tx = list_entry (kibnal_data.kib_idle_txs.next, kib_tx_t, tx_list);
- list_del (&tx->tx_list);
-
- /* Allocate a new passive RDMA completion cookie. It might not be
- * needed, but we've got a lock right now and we're unlikely to
- * wrap... */
- tx->tx_passive_rdma_cookie = kibnal_data.kib_next_tx_cookie++;
-
- spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags);
-
- LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
- LASSERT (tx->tx_nsp == 0);
- LASSERT (tx->tx_sending == 0);
- LASSERT (tx->tx_status == 0);
- LASSERT (tx->tx_conn == NULL);
- LASSERT (!tx->tx_passive_rdma);
- LASSERT (!tx->tx_passive_rdma_wait);
- LASSERT (tx->tx_lntmsg[0] == NULL);
- LASSERT (tx->tx_lntmsg[1] == NULL);
-
- return tx;
-}
-
-void
-kibnal_complete_passive_rdma(kib_conn_t *conn, __u64 cookie, int status)
-{
- struct list_head *ttmp;
- unsigned long flags;
- int idle;
-
- spin_lock_irqsave (&conn->ibc_lock, flags);
-
- list_for_each (ttmp, &conn->ibc_active_txs) {
- kib_tx_t *tx = list_entry(ttmp, kib_tx_t, tx_list);
-
- LASSERT (tx->tx_passive_rdma ||
- !tx->tx_passive_rdma_wait);
-
- LASSERT (tx->tx_passive_rdma_wait ||
- tx->tx_sending != 0);
-
- if (!tx->tx_passive_rdma_wait ||
- tx->tx_passive_rdma_cookie != cookie)
- continue;
-
- CDEBUG(D_NET, "Complete %p "LPD64": %d\n", tx, cookie, status);
-
- /* XXX Set mlength of reply here */
-
- tx->tx_status = status;
- tx->tx_passive_rdma_wait = 0;
- idle = (tx->tx_sending == 0);
-
- if (idle)
- list_del (&tx->tx_list);
-
- spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
- /* I could be racing with tx callbacks. It's whoever
- * _makes_ tx idle that frees it */
- if (idle)
- kibnal_tx_done (tx);
- return;
- }
-
- spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
- CERROR ("Unmatched (late?) RDMA completion "LPX64" from %s\n",
- cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-}
-
-void
-kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit)
-{
- kib_conn_t *conn = rx->rx_conn;
- int rc;
- unsigned long flags;
-
- LASSERT(!rsrvd_credit ||
- conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
-
- rx->rx_gl = (struct ib_gather_scatter) {
- .address = rx->rx_vaddr,
- .length = IBNAL_MSG_SIZE,
- .key = conn->ibc_rx_pages->ibp_lkey,
- };
-
- rx->rx_sp = (struct ib_receive_param) {
- .work_request_id = kibnal_ptr2wreqid(rx, 1),
- .scatter_list = &rx->rx_gl,
- .num_scatter_entries = 1,
- .device_specific = NULL,
- .signaled = 1,
- };
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
- LASSERT (rx->rx_nob >= 0); /* not posted */
- rx->rx_nob = -1; /* is now */
- mb();
-
- if (conn->ibc_state != IBNAL_CONN_ESTABLISHED)
- rc = -ECONNABORTED;
- else
- rc = kibnal_ib_receive(conn->ibc_qp, &rx->rx_sp);
-
- if (rc == 0) {
- if (credit || rsrvd_credit) {
- spin_lock_irqsave(&conn->ibc_lock, flags);
-
- if (credit)
- conn->ibc_outstanding_credits++;
- if (rsrvd_credit)
- conn->ibc_reserved_credits++;
-
- spin_unlock_irqrestore(&conn->ibc_lock, flags);
-
- kibnal_check_sends(conn);
- }
- return;
- }
-
- if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
- CERROR ("Error posting receive -> %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kibnal_close_conn (rx->rx_conn, rc);
- } else {
- CDEBUG (D_NET, "Error posting receive -> %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- }
-
- /* Drop rx's ref */
- kibnal_conn_decref(conn);
-}
-
-void
-kibnal_rx_callback (struct ib_cq_entry *e)
-{
- kib_rx_t *rx = (kib_rx_t *)kibnal_wreqid2ptr(e->work_request_id);
- kib_msg_t *msg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- int credits;
- unsigned long flags;
- int rc;
- int err = -ECONNABORTED;
-
- CDEBUG (D_NET, "rx %p conn %p\n", rx, conn);
- LASSERT (rx->rx_nob < 0); /* was posted */
- rx->rx_nob = 0; /* isn't now */
- mb();
-
- /* receives complete with error in any case after we've started
- * closing the QP */
- if (conn->ibc_state >= IBNAL_CONN_DEATHROW)
- goto failed;
-
- /* We don't post receives until the conn is established */
- LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED);
-
- if (e->status != IB_COMPLETION_STATUS_SUCCESS) {
- CERROR("Rx from %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), e->status);
- goto failed;
- }
-
- LASSERT (e->bytes_transferred >= 0);
- rx->rx_nob = e->bytes_transferred;
- mb();
-
- rc = kibnal_unpack_msg(msg, conn->ibc_version, rx->rx_nob);
- if (rc != 0) {
- CERROR ("Error %d unpacking rx from %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- goto failed;
- }
-
- if (!lnet_ptlcompat_matchnid(conn->ibc_peer->ibp_nid,
- msg->ibm_srcnid) ||
- !lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
- msg->ibm_dstnid) ||
- msg->ibm_srcstamp != conn->ibc_incarnation ||
- msg->ibm_dststamp != kibnal_data.kib_incarnation) {
- CERROR ("Stale rx from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- err = -ESTALE;
- goto failed;
- }
-
- /* Have I received credits that will let me send? */
- credits = msg->ibm_credits;
- if (credits != 0) {
- spin_lock_irqsave(&conn->ibc_lock, flags);
- conn->ibc_credits += credits;
- spin_unlock_irqrestore(&conn->ibc_lock, flags);
-
- kibnal_check_sends(conn);
- }
-
- switch (msg->ibm_type) {
- case IBNAL_MSG_NOOP:
- kibnal_post_rx (rx, 1, 0);
- return;
-
- case IBNAL_MSG_IMMEDIATE:
- break;
-
- case IBNAL_MSG_PUT_RDMA:
- case IBNAL_MSG_GET_RDMA:
- CDEBUG(D_NET, "%d RDMA: cookie "LPX64", key %x, addr "LPX64", nob %d\n",
- msg->ibm_type, msg->ibm_u.rdma.ibrm_cookie,
- msg->ibm_u.rdma.ibrm_desc.rd_key,
- msg->ibm_u.rdma.ibrm_desc.rd_addr,
- msg->ibm_u.rdma.ibrm_desc.rd_nob);
- break;
-
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- CDEBUG(D_NET, "%d DONE: cookie "LPX64", status %d\n",
- msg->ibm_type, msg->ibm_u.completion.ibcm_cookie,
- msg->ibm_u.completion.ibcm_status);
-
- kibnal_complete_passive_rdma (conn,
- msg->ibm_u.completion.ibcm_cookie,
- msg->ibm_u.completion.ibcm_status);
-
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- kibnal_post_rx (rx, 1, 0);
- } else {
- /* this reply buffer was pre-reserved */
- kibnal_post_rx (rx, 0, 1);
- }
- return;
-
- default:
- CERROR ("Bad msg type %x from %s\n",
- msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- goto failed;
- }
-
- kibnal_peer_alive(conn->ibc_peer);
-
- /* schedule for kibnal_rx() in thread context */
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
-
- list_add_tail (&rx->rx_list, &kibnal_data.kib_sched_rxq);
- wake_up (&kibnal_data.kib_sched_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
- return;
-
- failed:
- CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
- kibnal_close_conn(conn, err);
-
- /* Don't re-post rx & drop its ref on conn */
- kibnal_conn_decref(conn);
-}
-
-void
-kibnal_rx (kib_rx_t *rx)
-{
- int rc = 0;
- kib_msg_t *msg = rx->rx_msg;
-
- switch (msg->ibm_type) {
- case IBNAL_MSG_GET_RDMA:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.rdma.ibrm_hdr,
- msg->ibm_srcnid, rx, 1);
- break;
-
- case IBNAL_MSG_PUT_RDMA:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.rdma.ibrm_hdr,
- msg->ibm_srcnid, rx, 1);
- break;
-
- case IBNAL_MSG_IMMEDIATE:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr,
- msg->ibm_srcnid, rx, 0);
- break;
-
- default:
- LBUG();
- break;
- }
-
- if (rc < 0) {
- kibnal_close_conn(rx->rx_conn, rc);
- kibnal_post_rx (rx, 1, 0);
- }
-}
-
-#if 0
-int
-kibnal_kvaddr_to_phys (unsigned long vaddr, __u64 *physp)
-{
- struct page *page;
-
- if (vaddr >= VMALLOC_START &&
- vaddr < VMALLOC_END)
- page = vmalloc_to_page ((void *)vaddr);
-#ifdef CONFIG_HIGHMEM
- else if (vaddr >= PKMAP_BASE &&
- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
- page = vmalloc_to_page ((void *)vaddr);
- /* in 2.4 ^ just walks the page tables */
-#endif
- else
- page = virt_to_page (vaddr);
-
- if (page == NULL ||
- !VALID_PAGE (page))
- return (-EFAULT);
-
- *physp = lnet_page2phys(page) + (vaddr & (PAGE_SIZE - 1));
- return (0);
-}
-#endif
-
-int
-kibnal_map_iov (kib_tx_t *tx, int access,
- unsigned int niov, struct iovec *iov, int offset, int nob)
-
-{
- void *vaddr;
- int rc;
-
- LASSERT (nob > 0);
- LASSERT (niov > 0);
- LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- if (nob > iov->iov_len - offset) {
- CERROR ("Can't map multiple vaddr fragments\n");
- return (-EMSGSIZE);
- }
-
- vaddr = (void *)(((unsigned long)iov->iov_base) + offset);
- tx->tx_md.md_addr = (__u64)((unsigned long)vaddr);
-
- rc = ib_memory_register (kibnal_data.kib_pd,
- vaddr, nob,
- access,
- &tx->tx_md.md_handle.mr,
- &tx->tx_md.md_lkey,
- &tx->tx_md.md_rkey);
-
- if (rc != 0) {
- CERROR ("Can't map vaddr: %d\n", rc);
- return (rc);
- }
-
- tx->tx_mapped = KIB_TX_MAPPED;
- return (0);
-}
-
-int
-kibnal_map_kiov (kib_tx_t *tx, int access,
- int nkiov, lnet_kiov_t *kiov,
- int offset, int nob)
-{
-#if IBNAL_FMR
- __u64 *phys;
- const int mapped = KIB_TX_MAPPED_FMR;
-#else
- struct ib_physical_buffer *phys;
- const int mapped = KIB_TX_MAPPED;
-#endif
- int page_offset;
- int nphys;
- int resid;
- int phys_size;
- int rc;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT (nob > 0);
- LASSERT (nkiov > 0);
- LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
-
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- nkiov--;
- kiov++;
- LASSERT (nkiov > 0);
- }
-
- phys_size = nkiov * sizeof (*phys);
- LIBCFS_ALLOC(phys, phys_size);
- if (phys == NULL) {
- CERROR ("Can't allocate tmp phys\n");
- return (-ENOMEM);
- }
-
- page_offset = kiov->kiov_offset + offset;
-#if IBNAL_FMR
- phys[0] = lnet_page2phys(kiov->kiov_page);
-#else
- phys[0].address = lnet_page2phys(kiov->kiov_page);
- phys[0].size = PAGE_SIZE;
-#endif
- nphys = 1;
- resid = nob - (kiov->kiov_len - offset);
-
- while (resid > 0) {
- kiov++;
- nkiov--;
- LASSERT (nkiov > 0);
-
- if (kiov->kiov_offset != 0 ||
- ((resid > PAGE_SIZE) &&
- kiov->kiov_len < PAGE_SIZE)) {
- int i;
- /* Can't have gaps */
- CERROR ("Can't make payload contiguous in I/O VM:"
- "page %d, offset %d, len %d \n", nphys,
- kiov->kiov_offset, kiov->kiov_len);
-
- for (i = -nphys; i < nkiov; i++)
- {
- CERROR("kiov[%d] %p +%d for %d\n",
- i, kiov[i].kiov_page, kiov[i].kiov_offset, kiov[i].kiov_len);
- }
-
- rc = -EINVAL;
- goto out;
- }
-
- if (nphys == LNET_MAX_IOV) {
- CERROR ("payload too big (%d)\n", nphys);
- rc = -EMSGSIZE;
- goto out;
- }
-
- LASSERT (nphys * sizeof (*phys) < phys_size);
-#if IBNAL_FMR
- phys[nphys] = lnet_page2phys(kiov->kiov_page);
-#else
- phys[nphys].address = lnet_page2phys(kiov->kiov_page);
- phys[nphys].size = PAGE_SIZE;
-#endif
- nphys++;
-
- resid -= PAGE_SIZE;
- }
-
- tx->tx_md.md_addr = IBNAL_RDMA_BASE;
-
-#if IBNAL_FMR
- rc = ib_fmr_register_physical (kibnal_data.kib_fmr_pool,
- phys, nphys,
- &tx->tx_md.md_addr,
- page_offset,
- &tx->tx_md.md_handle.fmr,
- &tx->tx_md.md_lkey,
- &tx->tx_md.md_rkey);
-#else
- rc = ib_memory_register_physical (kibnal_data.kib_pd,
- phys, nphys,
- &tx->tx_md.md_addr,
- nob, page_offset,
- access,
- &tx->tx_md.md_handle.mr,
- &tx->tx_md.md_lkey,
- &tx->tx_md.md_rkey);
-#endif
- if (rc == 0) {
- CDEBUG(D_NET, "Mapped %d pages %d bytes @ offset %d: lkey %x, rkey %x\n",
- nphys, nob, page_offset, tx->tx_md.md_lkey, tx->tx_md.md_rkey);
- tx->tx_mapped = mapped;
- } else {
- CERROR ("Can't map phys: %d\n", rc);
- rc = -EFAULT;
- }
-
- out:
- LIBCFS_FREE(phys, phys_size);
- return (rc);
-}
-
-kib_conn_t *
-kibnal_find_conn_locked (kib_peer_t *peer)
-{
- struct list_head *tmp;
-
- /* just return the first connection */
- list_for_each (tmp, &peer->ibp_conns) {
- return (list_entry(tmp, kib_conn_t, ibc_list));
- }
-
- return (NULL);
-}
-
-void
-kibnal_check_sends (kib_conn_t *conn)
-{
- unsigned long flags;
- kib_tx_t *tx;
- int rc;
- int i;
- int consume_credit;
- int done;
- int nwork;
-
- spin_lock_irqsave (&conn->ibc_lock, flags);
-
- LASSERT (conn->ibc_nsends_posted <= IBNAL_RX_MSGS);
- LASSERT (conn->ibc_reserved_credits >= 0);
-
- while (conn->ibc_reserved_credits > 0 &&
- !list_empty(&conn->ibc_tx_queue_rsrvd)) {
- LASSERT (conn->ibc_version !=
- IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
- tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
- kib_tx_t, tx_list);
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
- conn->ibc_reserved_credits--;
- }
-
- if (list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER ||
- kibnal_send_keepalive(conn))) {
- spin_unlock_irqrestore(&conn->ibc_lock, flags);
-
- tx = kibnal_get_idle_tx();
- if (tx != NULL)
- kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0);
-
- spin_lock_irqsave(&conn->ibc_lock, flags);
-
- if (tx != NULL)
- kibnal_queue_tx_locked(tx, conn);
- }
-
- for (;;) {
- if (!list_empty(&conn->ibc_tx_queue_nocred)) {
- LASSERT (conn->ibc_version !=
- IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
- tx = list_entry(conn->ibc_tx_queue_nocred.next,
- kib_tx_t, tx_list);
- consume_credit = 0;
- } else if (!list_empty (&conn->ibc_tx_queue)) {
- tx = list_entry (conn->ibc_tx_queue.next,
- kib_tx_t, tx_list);
- consume_credit = 1;
- } else {
- /* nothing waiting */
- break;
- }
-
- /* We rely on this for QP sizing */
- LASSERT (tx->tx_nsp > 0 && tx->tx_nsp <= 2);
-
- LASSERT (conn->ibc_outstanding_credits >= 0);
- LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE);
- LASSERT (conn->ibc_credits >= 0);
- LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE);
-
- /* Not on ibc_rdma_queue */
- LASSERT (!tx->tx_passive_rdma_wait);
-
- if (conn->ibc_nsends_posted == IBNAL_RX_MSGS)
- break;
-
- if (consume_credit) {
- if (conn->ibc_credits == 0) /* no credits */
- break;
-
- if (conn->ibc_credits == 1 && /* last credit reserved for */
- conn->ibc_outstanding_credits == 0) /* giving back credits */
- break;
- }
-
- list_del (&tx->tx_list);
-
- if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP &&
- (!list_empty(&conn->ibc_tx_queue) ||
- !list_empty(&conn->ibc_tx_queue_nocred) ||
- (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER &&
- !kibnal_send_keepalive(conn)))) {
- /* redundant NOOP */
- spin_unlock_irqrestore(&conn->ibc_lock, flags);
- kibnal_tx_done(tx);
- spin_lock_irqsave(&conn->ibc_lock, flags);
- continue;
- }
-
- kibnal_pack_msg(tx->tx_msg, conn->ibc_version,
- conn->ibc_outstanding_credits,
- conn->ibc_peer->ibp_nid, conn->ibc_incarnation);
-
- conn->ibc_outstanding_credits = 0;
- conn->ibc_nsends_posted++;
- if (consume_credit)
- conn->ibc_credits--;
-
- tx->tx_sending = tx->tx_nsp;
- tx->tx_passive_rdma_wait = tx->tx_passive_rdma;
- list_add (&tx->tx_list, &conn->ibc_active_txs);
-
- spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
- /* NB the gap between removing tx from the queue and sending it
- * allows message re-ordering to occur */
-
- LASSERT (tx->tx_nsp > 0);
-
- rc = -ECONNABORTED;
- nwork = 0;
- if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
- tx->tx_status = 0;
- /* Driver only accepts 1 item at a time */
- for (i = 0; i < tx->tx_nsp; i++) {
- rc = kibnal_ib_send(conn->ibc_qp, &tx->tx_sp[i]);
- if (rc != 0)
- break;
- nwork++;
- }
- }
-
- conn->ibc_last_send = jiffies;
-
- spin_lock_irqsave (&conn->ibc_lock, flags);
- if (rc != 0) {
- /* NB credits are transferred in the actual
- * message, which can only be the last work item */
- conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits;
- if (consume_credit)
- conn->ibc_credits++;
- conn->ibc_nsends_posted--;
-
- tx->tx_status = rc;
- tx->tx_passive_rdma_wait = 0;
- tx->tx_sending -= tx->tx_nsp - nwork;
-
- done = (tx->tx_sending == 0);
- if (done)
- list_del (&tx->tx_list);
-
- spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
- if (conn->ibc_state == IBNAL_CONN_ESTABLISHED)
- CERROR ("Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- else
- CDEBUG (D_NET, "Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- kibnal_close_conn (conn, rc);
-
- if (done)
- kibnal_tx_done (tx);
- return;
- }
-
- }
-
- spin_unlock_irqrestore (&conn->ibc_lock, flags);
-}
-
-void
-kibnal_tx_callback (struct ib_cq_entry *e)
-{
- kib_tx_t *tx = (kib_tx_t *)kibnal_wreqid2ptr(e->work_request_id);
- kib_conn_t *conn;
- unsigned long flags;
- int idle;
-
- conn = tx->tx_conn;
- LASSERT (conn != NULL);
- LASSERT (tx->tx_sending != 0);
-
- spin_lock_irqsave(&conn->ibc_lock, flags);
-
- CDEBUG(D_NET, "conn %p tx %p [%d/%d]: %d\n", conn, tx,
- tx->tx_nsp - tx->tx_sending, tx->tx_nsp,
- e->status);
-
- /* I could be racing with rdma completion. Whoever makes 'tx' idle
- * gets to free it, which also drops its ref on 'conn'. If it's
- * not me, then I take an extra ref on conn so it can't disappear
- * under me. */
-
- tx->tx_sending--;
- idle = (tx->tx_sending == 0) && /* This is the final callback */
- (!tx->tx_passive_rdma_wait); /* Not waiting for RDMA completion */
- if (idle)
- list_del(&tx->tx_list);
-
- kibnal_conn_addref(conn);
-
- if (tx->tx_sending == 0)
- conn->ibc_nsends_posted--;
-
- if (e->status != IB_COMPLETION_STATUS_SUCCESS &&
- tx->tx_status == 0)
- tx->tx_status = -ECONNABORTED;
-
- spin_unlock_irqrestore(&conn->ibc_lock, flags);
-
- if (idle)
- kibnal_tx_done (tx);
-
- if (e->status != IB_COMPLETION_STATUS_SUCCESS) {
- CDEBUG (D_NETERROR, "Tx completion to %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), e->status);
- kibnal_close_conn (conn, -ENETDOWN);
- } else {
- kibnal_peer_alive(conn->ibc_peer);
- /* can I shovel some more sends out the door? */
- kibnal_check_sends(conn);
- }
-
- kibnal_conn_decref(conn);
-}
-
-void
-kibnal_callback (ib_cq_t *cq, struct ib_cq_entry *e, void *arg)
-{
- if (kibnal_wreqid_is_rx(e->work_request_id))
- kibnal_rx_callback (e);
- else
- kibnal_tx_callback (e);
-}
-
-void
-kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob)
-{
- struct ib_gather_scatter *gl = &tx->tx_gl[tx->tx_nsp];
- struct ib_send_param *sp = &tx->tx_sp[tx->tx_nsp];
- int fence;
- int nob = offsetof (kib_msg_t, ibm_u) + body_nob;
-
- LASSERT (tx->tx_nsp >= 0 &&
- tx->tx_nsp < sizeof(tx->tx_sp)/sizeof(tx->tx_sp[0]));
- LASSERT (nob <= IBNAL_MSG_SIZE);
-
- kibnal_init_msg(tx->tx_msg, type, body_nob);
-
- /* Fence the message if it's bundled with an RDMA read */
- fence = (tx->tx_nsp > 0) &&
- (type == IBNAL_MSG_PUT_DONE);
-
- *gl = (struct ib_gather_scatter) {
- .address = tx->tx_vaddr,
- .length = nob,
- .key = kibnal_data.kib_tx_pages->ibp_lkey,
- };
-
- /* NB If this is an RDMA read, the completion message must wait for
- * the RDMA to complete. Sends wait for previous RDMA writes
- * anyway... */
- *sp = (struct ib_send_param) {
- .work_request_id = kibnal_ptr2wreqid(tx, 0),
- .op = IB_OP_SEND,
- .gather_list = gl,
- .num_gather_entries = 1,
- .device_specific = NULL,
- .solicited_event = 1,
- .signaled = 1,
- .immediate_data_valid = 0,
- .fence = fence,
- .inline_data = 0,
- };
-
- tx->tx_nsp++;
-}
-
-void
-kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&conn->ibc_lock, flags);
-
- kibnal_queue_tx_locked (tx, conn);
-
- spin_unlock_irqrestore(&conn->ibc_lock, flags);
-
- kibnal_check_sends(conn);
-}
-
-void
-kibnal_schedule_active_connect_locked (kib_peer_t *peer)
-{
- /* Called with exclusive kib_global_lock */
-
- peer->ibp_connecting++;
- kibnal_peer_addref(peer); /* extra ref for connd */
-
- spin_lock (&kibnal_data.kib_connd_lock);
-
- LASSERT (list_empty(&peer->ibp_connd_list));
- list_add_tail (&peer->ibp_connd_list,
- &kibnal_data.kib_connd_peers);
- wake_up (&kibnal_data.kib_connd_waitq);
-
- spin_unlock (&kibnal_data.kib_connd_lock);
-}
-
-void
-kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid)
-{
- unsigned long flags;
- kib_peer_t *peer;
- kib_conn_t *conn;
- int retry;
- int rc;
- rwlock_t *g_lock = &kibnal_data.kib_global_lock;
-
- /* If I get here, I've committed to send, so I complete the tx with
- * failure on any problems */
-
- LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */
- LASSERT (tx->tx_nsp > 0); /* work items have been set up */
-
- for (retry = 0; ; retry = 1) {
- read_lock_irqsave(g_lock, flags);
-
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL) {
- conn = kibnal_find_conn_locked (peer);
- if (conn != NULL) {
- kibnal_conn_addref(conn); /* 1 ref for me...*/
- read_unlock_irqrestore(g_lock, flags);
-
- kibnal_queue_tx (tx, conn);
- kibnal_conn_decref(conn); /* ...until here */
- return;
- }
- }
-
- /* Making one or more connections; I'll need a write lock... */
- read_unlock(g_lock);
- write_lock(g_lock);
-
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL)
- break;
-
- write_unlock_irqrestore (g_lock, flags);
-
- if (retry) {
- CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
- tx->tx_status = -EHOSTUNREACH;
- kibnal_tx_done (tx);
- return;
- }
-
- rc = kibnal_add_persistent_peer(nid, LNET_NIDADDR(nid),
- lnet_acceptor_port());
- if (rc != 0) {
- CERROR("Can't add peer %s: %d\n",
- libcfs_nid2str(nid), rc);
- tx->tx_status = rc;
- kibnal_tx_done(tx);
- return;
- }
- }
-
- conn = kibnal_find_conn_locked (peer);
- if (conn != NULL) {
- /* Connection exists; queue message on it */
- kibnal_conn_addref(conn); /* +1 ref from me... */
- write_unlock_irqrestore (g_lock, flags);
-
- kibnal_queue_tx (tx, conn);
- kibnal_conn_decref(conn); /* ...until here */
- return;
- }
-
- if (peer->ibp_connecting == 0 &&
- peer->ibp_accepting == 0) {
- if (!(peer->ibp_reconnect_interval == 0 || /* first attempt */
- time_after_eq(jiffies, peer->ibp_reconnect_time))) {
- write_unlock_irqrestore (g_lock, flags);
- tx->tx_status = -EHOSTUNREACH;
- kibnal_tx_done (tx);
- return;
- }
-
- kibnal_schedule_active_connect_locked(peer);
- }
-
- /* A connection is being established; queue the message... */
- list_add_tail (&tx->tx_list, &peer->ibp_tx_queue);
-
- write_unlock_irqrestore (g_lock, flags);
-}
-
-void
-kibnal_txlist_done (struct list_head *txlist, int status)
-{
- kib_tx_t *tx;
-
- while (!list_empty(txlist)) {
- tx = list_entry (txlist->next, kib_tx_t, tx_list);
-
- list_del (&tx->tx_list);
- /* complete now */
- tx->tx_status = status;
- kibnal_tx_done (tx);
- }
-}
-
-int
-kibnal_start_passive_rdma (int type, lnet_msg_t *lntmsg,
- int niov, struct iovec *iov, lnet_kiov_t *kiov,
- int nob)
-{
- lnet_nid_t nid = lntmsg->msg_target.nid;
- kib_tx_t *tx;
- kib_msg_t *ibmsg;
- int rc;
- int access;
-
- LASSERT (type == IBNAL_MSG_PUT_RDMA ||
- type == IBNAL_MSG_GET_RDMA);
- LASSERT (nob > 0);
- LASSERT (!in_interrupt()); /* Mapping could block */
-
- if (type == IBNAL_MSG_PUT_RDMA) {
- access = IB_ACCESS_REMOTE_READ;
- } else {
- access = IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_LOCAL_WRITE;
- }
-
- tx = kibnal_get_idle_tx ();
- if (tx == NULL) {
- CERROR("Can't allocate %s txd for %s\n",
- (type == IBNAL_MSG_PUT_RDMA) ? "PUT/REPLY" : "GET",
- libcfs_nid2str(nid));
- return -ENOMEM;
- }
-
-
- if (iov != NULL)
- rc = kibnal_map_iov (tx, access, niov, iov, 0, nob);
- else
- rc = kibnal_map_kiov (tx, access, niov, kiov, 0, nob);
-
- if (rc != 0) {
- CERROR ("Can't map RDMA for %s: %d\n",
- libcfs_nid2str(nid), rc);
- goto failed;
- }
-
- if (type == IBNAL_MSG_GET_RDMA) {
- /* reply gets finalized when tx completes */
- tx->tx_lntmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni,
- lntmsg);
- if (tx->tx_lntmsg[1] == NULL) {
- CERROR ("Can't create reply for GET -> %s\n",
- libcfs_nid2str(nid));
- rc = -ENOMEM;
- goto failed;
- }
- }
-
- tx->tx_passive_rdma = 1;
-
- ibmsg = tx->tx_msg;
-
- ibmsg->ibm_u.rdma.ibrm_hdr = lntmsg->msg_hdr;
- ibmsg->ibm_u.rdma.ibrm_cookie = tx->tx_passive_rdma_cookie;
- ibmsg->ibm_u.rdma.ibrm_desc.rd_key = tx->tx_md.md_rkey;
- ibmsg->ibm_u.rdma.ibrm_desc.rd_addr = tx->tx_md.md_addr;
- ibmsg->ibm_u.rdma.ibrm_desc.rd_nob = nob;
-
- kibnal_init_tx_msg (tx, type, sizeof (kib_rdma_msg_t));
-
- CDEBUG(D_NET, "Passive: %p cookie "LPX64", key %x, addr "
- LPX64", nob %d\n",
- tx, tx->tx_passive_rdma_cookie, tx->tx_md.md_rkey,
- tx->tx_md.md_addr, nob);
-
- /* lntmsg gets finalized when tx completes. */
- tx->tx_lntmsg[0] = lntmsg;
-
- kibnal_launch_tx(tx, nid);
- return (0);
-
- failed:
- tx->tx_status = rc;
- kibnal_tx_done (tx);
- return (-EIO);
-}
-
-void
-kibnal_start_active_rdma (int type, int status,
- kib_rx_t *rx, lnet_msg_t *lntmsg,
- unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- int offset, int nob)
-{
- kib_msg_t *rxmsg = rx->rx_msg;
- kib_msg_t *txmsg;
- kib_tx_t *tx;
- int access;
- int rdma_op;
- int rc;
-
- CDEBUG(D_NET, "type %d, status %d, niov %d, offset %d, nob %d\n",
- type, status, niov, offset, nob);
-
- /* Called by scheduler */
- LASSERT (!in_interrupt ());
-
- /* Either all pages or all vaddrs */
- LASSERT (!(kiov != NULL && iov != NULL));
-
- /* No data if we're completing with failure */
- LASSERT (status == 0 || nob == 0);
-
- LASSERT (type == IBNAL_MSG_GET_DONE ||
- type == IBNAL_MSG_PUT_DONE);
-
- if (type == IBNAL_MSG_GET_DONE) {
- access = 0;
- rdma_op = IB_OP_RDMA_WRITE;
- LASSERT (rxmsg->ibm_type == IBNAL_MSG_GET_RDMA);
- } else {
- access = IB_ACCESS_LOCAL_WRITE;
- rdma_op = IB_OP_RDMA_READ;
- LASSERT (rxmsg->ibm_type == IBNAL_MSG_PUT_RDMA);
- }
-
- tx = kibnal_get_idle_tx ();
- if (tx == NULL) {
- CERROR ("tx descs exhausted on RDMA from %s"
- " completing locally with failure\n",
- libcfs_nid2str(rx->rx_conn->ibc_peer->ibp_nid));
- lnet_finalize (kibnal_data.kib_ni, lntmsg, -ENOMEM);
- return;
- }
- LASSERT (tx->tx_nsp == 0);
-
- if (nob != 0) {
- /* We actually need to transfer some data (the transfer
- * size could get truncated to zero when the incoming
- * message is matched) */
-
- if (kiov != NULL)
- rc = kibnal_map_kiov (tx, access,
- niov, kiov, offset, nob);
- else
- rc = kibnal_map_iov (tx, access,
- niov, iov, offset, nob);
-
- if (rc != 0) {
- CERROR ("Can't map RDMA -> %s: %d\n",
- libcfs_nid2str(rx->rx_conn->ibc_peer->ibp_nid),
- rc);
- /* We'll skip the RDMA and complete with failure. */
- status = rc;
- nob = 0;
- } else {
- tx->tx_gl[0] = (struct ib_gather_scatter) {
- .address = tx->tx_md.md_addr,
- .length = nob,
- .key = tx->tx_md.md_lkey,
- };
-
- tx->tx_sp[0] = (struct ib_send_param) {
- .work_request_id = kibnal_ptr2wreqid(tx, 0),
- .op = rdma_op,
- .gather_list = &tx->tx_gl[0],
- .num_gather_entries = 1,
- .remote_address = rxmsg->ibm_u.rdma.ibrm_desc.rd_addr,
- .rkey = rxmsg->ibm_u.rdma.ibrm_desc.rd_key,
- .device_specific = NULL,
- .solicited_event = 0,
- .signaled = 1,
- .immediate_data_valid = 0,
- .fence = 0,
- .inline_data = 0,
- };
-
- tx->tx_nsp = 1;
- }
- }
-
- txmsg = tx->tx_msg;
-
- txmsg->ibm_u.completion.ibcm_cookie = rxmsg->ibm_u.rdma.ibrm_cookie;
- txmsg->ibm_u.completion.ibcm_status = status;
-
- kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t));
-
- if (status == 0 && nob != 0) {
- LASSERT (tx->tx_nsp > 1);
- /* RDMA: lntmsg gets finalized when the tx completes. This
- * is after the completion message has been sent, which in
- * turn is after the RDMA has finished. */
- tx->tx_lntmsg[0] = lntmsg;
- } else {
- LASSERT (tx->tx_nsp == 1);
- /* No RDMA: local completion happens now! */
- CDEBUG(D_NET, "No data: immediate completion\n");
- lnet_finalize (kibnal_data.kib_ni, lntmsg,
- status == 0 ? 0 : -EIO);
- }
-
- kibnal_queue_tx(tx, rx->rx_conn);
-}
-
-int
-kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- lnet_hdr_t *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- kib_msg_t *ibmsg;
- kib_tx_t *tx;
- int nob;
-
- /* NB 'private' is different depending on what we're sending.... */
-
- CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT (payload_nob == 0 || payload_niov > 0);
- LASSERT (payload_niov <= LNET_MAX_IOV);
-
- /* Thread context if we're sending payload */
- LASSERT (!in_interrupt() || payload_niov == 0);
- /* payload is either all vaddrs or all pages */
- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
- switch (type) {
- default:
- LBUG();
- return (-EIO);
-
- case LNET_MSG_ACK:
- LASSERT (payload_nob == 0);
- break;
-
- case LNET_MSG_GET:
- if (routing || target_is_router)
- break; /* send IMMEDIATE */
-
- /* is the REPLY message too small for RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
- if (nob <= IBNAL_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
- return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, lntmsg,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.iov, NULL,
- lntmsg->msg_md->md_length);
-
- return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, lntmsg,
- lntmsg->msg_md->md_niov,
- NULL, lntmsg->msg_md->md_iov.kiov,
- lntmsg->msg_md->md_length);
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- /* Is the payload small enough not to need RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob <= IBNAL_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- return kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA, lntmsg,
- payload_niov,
- payload_iov, payload_kiov,
- payload_nob);
- }
-
- /* Send IMMEDIATE */
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR ("Can't send %d to %s: tx descs exhausted%s\n",
- type, libcfs_nid2str(target.nid),
- in_interrupt() ? " (intr)" : "");
- return (-ENOMEM);
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
-
- if (payload_kiov != NULL)
- lnet_copy_kiov2flat(IBNAL_MSG_SIZE, ibmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- else
- lnet_copy_iov2flat(IBNAL_MSG_SIZE, ibmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-
- kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE,
- offsetof(kib_immediate_msg_t,
- ibim_payload[payload_nob]));
-
- /* lntmsg gets finalized when tx completes */
- tx->tx_lntmsg[0] = lntmsg;
-
- kibnal_launch_tx(tx, target.nid);
- return (0);
-}
-
-int
-kibnal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- void **new_private)
-{
- kib_rx_t *rx = private;
- kib_conn_t *conn = rx->rx_conn;
-
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- /* Can't block if RDMA completions need normal credits */
- LCONSOLE_ERROR_MSG(0x12a,
- "Dropping message from %s: no buffers free. "
- "%s is running an old version of LNET that may "
- "deadlock if messages wait for buffers)\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return -EDEADLK;
- }
-
- *new_private = private;
- return 0;
-}
-
-int
-kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- kib_rx_t *rx = private;
- kib_msg_t *rxmsg = rx->rx_msg;
- int msg_nob;
- int rc = 0;
-
- LASSERT (mlen <= rlen);
- LASSERT (!in_interrupt ());
- /* Either all pages or all vaddrs */
- LASSERT (!(kiov != NULL && iov != NULL));
-
- switch (rxmsg->ibm_type) {
- default:
- LBUG();
-
- case IBNAL_MSG_IMMEDIATE:
- msg_nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]);
- if (msg_nob > rx->rx_nob) {
- CERROR ("Immediate message from %s too big: %d(%d)\n",
- libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
- msg_nob, rx->rx_nob);
- rc = -EPROTO;
- break;
- }
-
- if (kiov != NULL)
- lnet_copy_flat2kiov(
- niov, kiov, offset,
- IBNAL_MSG_SIZE, rxmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- mlen);
- else
- lnet_copy_flat2iov(
- niov, iov, offset,
- IBNAL_MSG_SIZE, rxmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- mlen);
-
- lnet_finalize (ni, lntmsg, 0);
- break;
-
- case IBNAL_MSG_GET_RDMA:
- if (lntmsg != NULL) {
- /* GET matched: RDMA lntmsg's payload */
- kibnal_start_active_rdma(IBNAL_MSG_GET_DONE, 0,
- rx, lntmsg,
- lntmsg->msg_niov,
- lntmsg->msg_iov,
- lntmsg->msg_kiov,
- lntmsg->msg_offset,
- lntmsg->msg_len);
- } else {
- /* GET didn't match anything */
- kibnal_start_active_rdma (IBNAL_MSG_GET_DONE, -ENODATA,
- rx, NULL, 0, NULL, NULL, 0, 0);
- }
- break;
-
- case IBNAL_MSG_PUT_RDMA:
- kibnal_start_active_rdma (IBNAL_MSG_PUT_DONE, 0, rx, lntmsg,
- niov, iov, kiov, offset, mlen);
- break;
- }
-
- kibnal_post_rx(rx, 1, 0);
- return rc;
-}
-
-int
-kibnal_thread_start (int (*fn)(void *arg), void *arg)
-{
- long pid = kernel_thread (fn, arg, 0);
-
- if (pid < 0)
- return ((int)pid);
-
- atomic_inc (&kibnal_data.kib_nthreads);
- return (0);
-}
-
-void
-kibnal_thread_fini (void)
-{
- atomic_dec (&kibnal_data.kib_nthreads);
-}
-
-void
-kibnal_peer_alive (kib_peer_t *peer)
-{
- /* This is racy, but everyone's only writing cfs_time_current() */
- peer->ibp_last_alive = cfs_time_current();
- mb();
-}
-
-void
-kibnal_peer_notify (kib_peer_t *peer)
-{
- time_t last_alive = 0;
- int error = 0;
- unsigned long flags;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (list_empty(&peer->ibp_conns) &&
- peer->ibp_accepting == 0 &&
- peer->ibp_connecting == 0 &&
- peer->ibp_error != 0) {
- error = peer->ibp_error;
- peer->ibp_error = 0;
- last_alive = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() -
- peer->ibp_last_alive);
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- if (error != 0)
- lnet_notify(kibnal_data.kib_ni, peer->ibp_nid, 0, last_alive);
-}
-
-void
-kibnal_close_conn_locked (kib_conn_t *conn, int error)
-{
- /* This just does the immmediate housekeeping, and schedules the
- * connection for the reaper to finish off.
- * Caller holds kib_global_lock exclusively in irq context */
- kib_peer_t *peer = conn->ibc_peer;
-
- CDEBUG (error == 0 ? D_NET : D_NETERROR,
- "closing conn to %s: error %d\n",
- libcfs_nid2str(peer->ibp_nid), error);
-
- LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED ||
- conn->ibc_state == IBNAL_CONN_CONNECTING);
-
- if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
- /* kib_reaper_conns takes ibc_list's ref */
- list_del (&conn->ibc_list);
- } else {
- /* new ref for kib_reaper_conns */
- kibnal_conn_addref(conn);
- }
-
- if (list_empty (&peer->ibp_conns)) { /* no more conns */
- if (peer->ibp_persistence == 0 && /* non-persistent peer */
- kibnal_peer_active(peer)) /* still in peer table */
- kibnal_unlink_peer_locked (peer);
-
- peer->ibp_error = error; /* set/clear error on last conn */
- }
-
- conn->ibc_state = IBNAL_CONN_DEATHROW;
-
- /* Schedule conn for closing/destruction */
- spin_lock (&kibnal_data.kib_reaper_lock);
-
- list_add_tail (&conn->ibc_list, &kibnal_data.kib_reaper_conns);
- wake_up (&kibnal_data.kib_reaper_waitq);
-
- spin_unlock (&kibnal_data.kib_reaper_lock);
-}
-
-int
-kibnal_close_conn (kib_conn_t *conn, int why)
-{
- unsigned long flags;
- int count = 0;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_CONNECTING);
-
- if (conn->ibc_state <= IBNAL_CONN_ESTABLISHED) {
- count = 1;
- kibnal_close_conn_locked (conn, why);
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
- return (count);
-}
-
-void
-kibnal_peer_connect_failed (kib_peer_t *peer, int active, int error)
-{
- LIST_HEAD (zombies);
- unsigned long flags;
-
- LASSERT(error != 0);
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (active) {
- LASSERT (peer->ibp_connecting != 0);
- peer->ibp_connecting--;
- } else {
- LASSERT (peer->ibp_accepting != 0);
- peer->ibp_accepting--;
- }
-
- if (peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0) {
- /* another connection attempt under way... */
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
- return;
- }
-
- if (list_empty(&peer->ibp_conns)) {
- /* Say when active connection can be re-attempted */
- peer->ibp_reconnect_interval *= 2;
- peer->ibp_reconnect_interval =
- MAX(peer->ibp_reconnect_interval,
- *kibnal_tunables.kib_min_reconnect_interval);
- peer->ibp_reconnect_interval =
- MIN(peer->ibp_reconnect_interval,
- *kibnal_tunables.kib_max_reconnect_interval);
-
- peer->ibp_reconnect_time = jiffies +
- peer->ibp_reconnect_interval * HZ;
-
- /* Take peer's blocked transmits; I'll complete
- * them with error */
- list_add(&zombies, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- if (kibnal_peer_active(peer) &&
- (peer->ibp_persistence == 0)) {
- /* failed connection attempt on non-persistent peer */
- kibnal_unlink_peer_locked (peer);
- }
-
- peer->ibp_error = error;
- } else {
- /* Can't have blocked transmits if there are connections */
- LASSERT (list_empty(&peer->ibp_tx_queue));
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- kibnal_peer_notify(peer);
-
- if (!list_empty (&zombies))
- CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kibnal_txlist_done(&zombies, -EHOSTUNREACH);
-}
-
-void
-kibnal_connreq_done (kib_conn_t *conn, int active, int status)
-{
- int state = conn->ibc_state;
- kib_peer_t *peer = conn->ibc_peer;
- kib_tx_t *tx;
- unsigned long flags;
- int rc;
- int i;
-
- if (conn->ibc_connreq != NULL) {
- LIBCFS_FREE (conn->ibc_connreq, sizeof (*conn->ibc_connreq));
- conn->ibc_connreq = NULL;
- }
-
- switch (state) {
- case IBNAL_CONN_CONNECTING:
- /* conn has a CM comm_id */
- if (status == 0) {
- /* Install common (active/passive) callback for
- * disconnect/idle notification */
- rc = tsIbCmCallbackModify(conn->ibc_comm_id,
- kibnal_conn_callback,
- conn);
- LASSERT (rc == 0);
- } else {
- /* LASSERT (no more CM callbacks) */
- rc = tsIbCmCallbackModify(conn->ibc_comm_id,
- kibnal_bad_conn_callback,
- conn);
- LASSERT (rc == 0);
- }
- break;
-
- case IBNAL_CONN_INIT_QP:
- LASSERT (status != 0);
- break;
-
- default:
- LBUG();
- }
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (active)
- LASSERT (peer->ibp_connecting != 0);
- else
- LASSERT (peer->ibp_accepting != 0);
-
- if (status == 0 && /* connection established */
- kibnal_peer_active(peer)) { /* peer not deleted */
-
- if (active)
- peer->ibp_connecting--;
- else
- peer->ibp_accepting--;
-
- conn->ibc_last_send = jiffies;
- conn->ibc_state = IBNAL_CONN_ESTABLISHED;
- kibnal_peer_alive(peer);
-
- /* +1 ref for ibc_list; caller(== CM)'s ref remains until
- * the IB_CM_IDLE callback */
- kibnal_conn_addref(conn);
- list_add (&conn->ibc_list, &peer->ibp_conns);
-
- peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */
-
- /* post blocked sends to the new connection */
- spin_lock (&conn->ibc_lock);
-
- while (!list_empty (&peer->ibp_tx_queue)) {
- tx = list_entry (peer->ibp_tx_queue.next,
- kib_tx_t, tx_list);
-
- list_del (&tx->tx_list);
-
- kibnal_queue_tx_locked (tx, conn);
- }
-
- spin_unlock (&conn->ibc_lock);
-
- /* Nuke any dangling conns from a different peer instance... */
- kibnal_close_stale_conns_locked (conn->ibc_peer,
- conn->ibc_incarnation);
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- /* queue up all the receives */
- for (i = 0; i < IBNAL_RX_MSGS; i++) {
- /* +1 ref for rx desc */
- kibnal_conn_addref(conn);
-
- CDEBUG(D_NET, "RX[%d] %p->%p - "LPX64"\n",
- i, &conn->ibc_rxs[i], conn->ibc_rxs[i].rx_msg,
- conn->ibc_rxs[i].rx_vaddr);
-
- kibnal_post_rx (&conn->ibc_rxs[i], 0, 0);
- }
-
- kibnal_check_sends (conn);
- return;
- }
-
- if (status == 0) {
- /* connection established, but peer was deleted. Schedule for
- * reaper to cm_disconnect... */
- status = -ECONNABORTED;
- kibnal_close_conn_locked (conn, status);
- } else {
- /* just waiting for refs to drain */
- conn->ibc_state = IBNAL_CONN_ZOMBIE;
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- kibnal_peer_connect_failed (conn->ibc_peer, active, status);
-}
-
-int
-kibnal_accept_connreq (kib_conn_t **connp, tTS_IB_CM_COMM_ID cid,
- kib_msg_t *msg, int nob)
-{
- kib_conn_t *conn;
- kib_peer_t *peer;
- kib_peer_t *peer2;
- unsigned long flags;
- int rc;
-
- rc = kibnal_unpack_msg(msg, 0, nob);
- if (rc != 0) {
- CERROR("Can't unpack connreq msg: %d\n", rc);
- return -EPROTO;
- }
-
- CDEBUG(D_NET, "connreq from %s\n", libcfs_nid2str(msg->ibm_srcnid));
-
- if (msg->ibm_type != IBNAL_MSG_CONNREQ) {
- CERROR("Unexpected connreq msg type: %x from %s\n",
- msg->ibm_type, libcfs_nid2str(msg->ibm_srcnid));
- return -EPROTO;
- }
-
- if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) {
- CERROR("Can't accept %s: bad queue depth %d (%d expected)\n",
- libcfs_nid2str(msg->ibm_srcnid),
- msg->ibm_u.connparams.ibcp_queue_depth,
- IBNAL_MSG_QUEUE_SIZE);
- return (-EPROTO);
- }
-
- conn = kibnal_create_conn();
- if (conn == NULL)
- return (-ENOMEM);
-
- /* assume 'nid' is a new peer */
- rc = kibnal_create_peer(&peer, msg->ibm_srcnid);
- if (rc != 0) {
- kibnal_conn_decref(conn);
- return (-ENOMEM);
- }
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (kibnal_data.kib_nonewpeers) {
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- CERROR ("Shutdown has started, drop connreq from %s\n",
- libcfs_nid2str(msg->ibm_srcnid));
- kibnal_conn_decref(conn);
- kibnal_peer_decref(peer);
- return -ESHUTDOWN;
- }
-
- /* Check I'm the same instance that gave the connection parameters.
- * NB If my incarnation changes after this, the peer will get nuked and
- * we'll spot that when the connection is finally added into the peer's
- * connlist */
- if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
- msg->ibm_dstnid) ||
- msg->ibm_dststamp != kibnal_data.kib_incarnation) {
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- CERROR("Stale connection params from %s\n",
- libcfs_nid2str(msg->ibm_srcnid));
- kibnal_conn_decref(conn);
- kibnal_peer_decref(peer);
- return -ESTALE;
- }
-
- peer2 = kibnal_find_peer_locked(msg->ibm_srcnid);
- if (peer2 == NULL) {
- /* Brand new peer */
- LASSERT (peer->ibp_accepting == 0);
-
- /* peer table takes my ref on peer */
- list_add_tail (&peer->ibp_list,
- kibnal_nid2peerlist(msg->ibm_srcnid));
- } else {
- /* tie-break connection race in favour of the higher NID */
- if (peer2->ibp_connecting != 0 &&
- msg->ibm_srcnid < kibnal_data.kib_ni->ni_nid) {
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- CWARN("Conn race %s\n",
- libcfs_nid2str(peer2->ibp_nid));
-
- kibnal_conn_decref(conn);
- kibnal_peer_decref(peer);
- return -EALREADY;
- }
-
- kibnal_peer_decref(peer);
- peer = peer2;
- }
-
- /* +1 ref for conn */
- kibnal_peer_addref(peer);
- peer->ibp_accepting++;
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- conn->ibc_peer = peer;
- conn->ibc_state = IBNAL_CONN_CONNECTING;
- conn->ibc_comm_id = cid;
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
- conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE;
- conn->ibc_version = msg->ibm_version;
-
- *connp = conn;
- return (0);
-}
-
-tTS_IB_CM_CALLBACK_RETURN
-kibnal_bad_conn_callback (tTS_IB_CM_EVENT event,
- tTS_IB_CM_COMM_ID cid,
- void *param,
- void *arg)
-{
- CERROR ("Unexpected event %d: conn %p\n", event, arg);
- LBUG ();
- return TS_IB_CM_CALLBACK_PROCEED;
-}
-
-void
-kibnal_abort_txs (kib_conn_t *conn, struct list_head *txs)
-{
- LIST_HEAD (zombies);
- struct list_head *tmp;
- struct list_head *nxt;
- kib_tx_t *tx;
- unsigned long flags;
-
- spin_lock_irqsave (&conn->ibc_lock, flags);
-
- list_for_each_safe (tmp, nxt, txs) {
- tx = list_entry (tmp, kib_tx_t, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT (tx->tx_passive_rdma ||
- !tx->tx_passive_rdma_wait);
-
- LASSERT (tx->tx_passive_rdma_wait ||
- tx->tx_sending != 0);
- } else {
- LASSERT (!tx->tx_passive_rdma_wait);
- LASSERT (tx->tx_sending == 0);
- }
-
- tx->tx_status = -ECONNABORTED;
- tx->tx_passive_rdma_wait = 0;
-
- if (tx->tx_sending == 0) {
- list_del (&tx->tx_list);
- list_add (&tx->tx_list, &zombies);
- }
- }
-
- spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
- kibnal_txlist_done (&zombies, -ECONNABORTED);
-}
-
-tTS_IB_CM_CALLBACK_RETURN
-kibnal_conn_callback (tTS_IB_CM_EVENT event,
- tTS_IB_CM_COMM_ID cid,
- void *param,
- void *arg)
-{
- kib_conn_t *conn = arg;
- int rc;
-
- /* Established Connection Notifier */
-
- switch (event) {
- default:
- CDEBUG(D_NETERROR, "Connection %p -> %s ERROR %d\n",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), event);
- kibnal_close_conn (conn, -ECONNABORTED);
- break;
-
- case TS_IB_CM_DISCONNECTED:
- CDEBUG(D_NETERROR, "Connection %p -> %s DISCONNECTED.\n",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_close_conn (conn, 0);
- break;
-
- case TS_IB_CM_IDLE:
- CDEBUG(D_NET, "Connection %p -> %s IDLE.\n",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- /* LASSERT (no further callbacks) */
- rc = tsIbCmCallbackModify(cid, kibnal_bad_conn_callback, conn);
- LASSERT (rc == 0);
-
- /* NB we wait until the connection has closed before
- * completing outstanding passive RDMAs so we can be sure
- * the network can't touch the mapped memory any more. */
-
- kibnal_abort_txs(conn, &conn->ibc_tx_queue);
- kibnal_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
- kibnal_abort_txs(conn, &conn->ibc_tx_queue_nocred);
- kibnal_abort_txs(conn, &conn->ibc_active_txs);
-
- kibnal_conn_decref(conn); /* Lose CM's ref */
- break;
- }
-
- return TS_IB_CM_CALLBACK_PROCEED;
-}
-
-tTS_IB_CM_CALLBACK_RETURN
-kibnal_passive_conn_callback (tTS_IB_CM_EVENT event,
- tTS_IB_CM_COMM_ID cid,
- void *param,
- void *arg)
-{
- kib_conn_t *conn = arg;
- int rc;
-
- switch (event) {
- default:
- if (conn == NULL) {
- /* no connection yet */
- CERROR ("Unexpected event: %d\n", event);
- return TS_IB_CM_CALLBACK_ABORT;
- }
-
- CERROR ("%s event %p -> %s: %d\n",
- (event == TS_IB_CM_IDLE) ? "IDLE" : "Unexpected",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), event);
- kibnal_connreq_done(conn, 0, -ECONNABORTED);
- kibnal_conn_decref(conn); /* drop CM's ref */
- return TS_IB_CM_CALLBACK_ABORT;
-
- case TS_IB_CM_REQ_RECEIVED: {
- struct ib_cm_req_received_param *req = param;
- kib_msg_t *msg = req->remote_private_data;
-
- LASSERT (conn == NULL);
-
- /* Don't really know srcnid until successful unpack */
- CDEBUG(D_NET, "REQ from ?%s?\n", libcfs_nid2str(msg->ibm_srcnid));
-
- rc = kibnal_accept_connreq(&conn, cid, msg,
- req->remote_private_data_len);
- if (rc != 0) {
- CERROR ("Can't accept ?%s?: %d\n",
- libcfs_nid2str(msg->ibm_srcnid), rc);
- return TS_IB_CM_CALLBACK_ABORT;
- }
-
- /* update 'arg' for next callback */
- rc = tsIbCmCallbackModify(cid, kibnal_passive_conn_callback, conn);
- LASSERT (rc == 0);
-
- msg = req->accept_param.reply_private_data;
- kibnal_init_msg(msg, IBNAL_MSG_CONNACK,
- sizeof(msg->ibm_u.connparams));
-
- msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE;
-
- kibnal_pack_msg(msg, conn->ibc_version, 0,
- conn->ibc_peer->ibp_nid,
- conn->ibc_incarnation);
-
- req->accept_param.qp = conn->ibc_qp;
- req->accept_param.reply_private_data_len = msg->ibm_nob;
- req->accept_param.responder_resources = IBNAL_RESPONDER_RESOURCES;
- req->accept_param.initiator_depth = IBNAL_RESPONDER_RESOURCES;
- req->accept_param.rnr_retry_count = IBNAL_RNR_RETRY;
- req->accept_param.flow_control = IBNAL_FLOW_CONTROL;
-
- CDEBUG(D_NET, "Proceeding\n");
- return TS_IB_CM_CALLBACK_PROCEED; /* CM takes my ref on conn */
- }
-
- case TS_IB_CM_ESTABLISHED:
- LASSERT (conn != NULL);
- CWARN("Connection %p -> %s ESTABLISHED.\n",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- kibnal_connreq_done(conn, 0, 0);
- return TS_IB_CM_CALLBACK_PROCEED;
- }
-}
-
-tTS_IB_CM_CALLBACK_RETURN
-kibnal_active_conn_callback (tTS_IB_CM_EVENT event,
- tTS_IB_CM_COMM_ID cid,
- void *param,
- void *arg)
-{
- kib_conn_t *conn = arg;
- unsigned long flags;
-
- switch (event) {
- case TS_IB_CM_REP_RECEIVED: {
- struct ib_cm_rep_received_param *rep = param;
- kib_msg_t *msg = rep->remote_private_data;
- int nob = rep->remote_private_data_len;
- int rc;
-
- rc = kibnal_unpack_msg(msg, conn->ibc_version, nob);
- if (rc != 0) {
- CERROR ("Error %d unpacking conn ack from %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_connreq_done(conn, 1, rc);
- kibnal_conn_decref(conn); /* drop CM's ref */
- return TS_IB_CM_CALLBACK_ABORT;
- }
-
- if (msg->ibm_type != IBNAL_MSG_CONNACK) {
- CERROR ("Unexpected conn ack type %d from %s\n",
- msg->ibm_type,
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_connreq_done(conn, 1, -EPROTO);
- kibnal_conn_decref(conn); /* drop CM's ref */
- return TS_IB_CM_CALLBACK_ABORT;
- }
-
- if (!lnet_ptlcompat_matchnid(conn->ibc_peer->ibp_nid,
- msg->ibm_srcnid) ||
- !lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
- msg->ibm_dstnid) ||
- msg->ibm_srcstamp != conn->ibc_incarnation ||
- msg->ibm_dststamp != kibnal_data.kib_incarnation) {
- CERROR("Stale conn ack from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_connreq_done(conn, 1, -ESTALE);
- kibnal_conn_decref(conn); /* drop CM's ref */
- return TS_IB_CM_CALLBACK_ABORT;
- }
-
- if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) {
- CERROR ("Bad queue depth %d from %s\n",
- msg->ibm_u.connparams.ibcp_queue_depth,
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_connreq_done(conn, 1, -EPROTO);
- kibnal_conn_decref(conn); /* drop CM's ref */
- return TS_IB_CM_CALLBACK_ABORT;
- }
-
- CDEBUG(D_NET, "Connection %p -> %s REP_RECEIVED.\n",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
- conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE;
- return TS_IB_CM_CALLBACK_PROCEED;
- }
-
- case TS_IB_CM_ESTABLISHED:
- CWARN("Connection %p -> %s ESTABLISHED\n",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- kibnal_connreq_done(conn, 1, 0);
- return TS_IB_CM_CALLBACK_PROCEED;
-
- case TS_IB_CM_IDLE:
- CDEBUG(D_NETERROR, "Connection %p -> %s IDLE\n",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /* I assume this connection attempt was rejected because the
- * peer found a stale QP; I'll just try again */
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- kibnal_schedule_active_connect_locked(conn->ibc_peer);
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_connreq_done(conn, 1, -ECONNABORTED);
- kibnal_conn_decref(conn); /* drop CM's ref */
- return TS_IB_CM_CALLBACK_ABORT;
-
- default:
- CDEBUG(D_NETERROR, "Connection %p -> %s ERROR %d\n",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), event);
- kibnal_connreq_done(conn, 1, -ECONNABORTED);
- kibnal_conn_decref(conn); /* drop CM's ref */
- return TS_IB_CM_CALLBACK_ABORT;
- }
-}
-
-int
-kibnal_pathreq_callback (tTS_IB_CLIENT_QUERY_TID tid, int status,
- struct ib_path_record *resp, int remaining,
- void *arg)
-{
- kib_conn_t *conn = arg;
- kib_peer_t *peer = conn->ibc_peer;
- kib_msg_t *msg = &conn->ibc_connreq->cr_msg;
-
- if (status != 0) {
- CDEBUG (D_NETERROR, "Pathreq %p -> %s failed: %d\n",
- conn, libcfs_nid2str(peer->ibp_nid), status);
- kibnal_connreq_done(conn, 1, status);
- kibnal_conn_decref(conn); /* drop callback's ref */
- return 1; /* non-zero prevents further callbacks */
- }
-
- conn->ibc_connreq->cr_path = *resp;
-
- kibnal_init_msg(msg, IBNAL_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
- msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE;
- kibnal_pack_msg(msg, conn->ibc_version, 0,
- peer->ibp_nid, conn->ibc_incarnation);
-
- conn->ibc_connreq->cr_connparam = (struct ib_cm_active_param) {
- .qp = conn->ibc_qp,
- .req_private_data = msg,
- .req_private_data_len = msg->ibm_nob,
- .responder_resources = IBNAL_RESPONDER_RESOURCES,
- .initiator_depth = IBNAL_RESPONDER_RESOURCES,
- .retry_count = IBNAL_RETRY,
- .rnr_retry_count = IBNAL_RNR_RETRY,
- .cm_response_timeout = *kibnal_tunables.kib_timeout,
- .max_cm_retries = IBNAL_CM_RETRY,
- .flow_control = IBNAL_FLOW_CONTROL,
- };
-
- /* XXX set timeout just like SDP!!!*/
- conn->ibc_connreq->cr_path.packet_life = 13;
-
- /* Flag I'm getting involved with the CM... */
- conn->ibc_state = IBNAL_CONN_CONNECTING;
-
- CDEBUG(D_NET, "Connecting to, service id "LPX64", on %s\n",
- conn->ibc_connreq->cr_svcrsp.ibsr_svc_id,
- libcfs_nid2str(peer->ibp_nid));
-
- /* kibnal_connect_callback gets my conn ref */
- status = ib_cm_connect (&conn->ibc_connreq->cr_connparam,
- &conn->ibc_connreq->cr_path, NULL,
- conn->ibc_connreq->cr_svcrsp.ibsr_svc_id, 0,
- kibnal_active_conn_callback, conn,
- &conn->ibc_comm_id);
- if (status != 0) {
- CERROR ("Connect %p -> %s failed: %d\n",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), status);
- /* Back out state change: I've not got a CM comm_id yet... */
- conn->ibc_state = IBNAL_CONN_INIT_QP;
- kibnal_connreq_done(conn, 1, status);
- kibnal_conn_decref(conn); /* Drop callback's ref */
- }
-
- return 1; /* non-zero to prevent further callbacks */
-}
-
-void
-kibnal_connect_peer (kib_peer_t *peer)
-{
- kib_conn_t *conn;
- int rc;
-
- conn = kibnal_create_conn();
- if (conn == NULL) {
- CERROR ("Can't allocate conn\n");
- kibnal_peer_connect_failed (peer, 1, -ENOMEM);
- return;
- }
-
- conn->ibc_peer = peer;
- kibnal_peer_addref(peer);
-
- LIBCFS_ALLOC (conn->ibc_connreq, sizeof (*conn->ibc_connreq));
- if (conn->ibc_connreq == NULL) {
- CERROR ("Can't allocate connreq\n");
- kibnal_connreq_done(conn, 1, -ENOMEM);
- kibnal_conn_decref(conn); /* drop my ref */
- return;
- }
-
- memset(conn->ibc_connreq, 0, sizeof (*conn->ibc_connreq));
-
- rc = kibnal_make_svcqry(conn);
- if (rc != 0) {
- kibnal_connreq_done (conn, 1, rc);
- kibnal_conn_decref(conn); /* drop my ref */
- return;
- }
-
- rc = ib_cached_gid_get(kibnal_data.kib_device,
- kibnal_data.kib_port, 0,
- conn->ibc_connreq->cr_gid);
- LASSERT (rc == 0);
-
- /* kibnal_pathreq_callback gets my conn ref */
- rc = tsIbPathRecordRequest (kibnal_data.kib_device,
- kibnal_data.kib_port,
- conn->ibc_connreq->cr_gid,
- conn->ibc_connreq->cr_svcrsp.ibsr_svc_gid,
- conn->ibc_connreq->cr_svcrsp.ibsr_svc_pkey,
- 0,
- *kibnal_tunables.kib_timeout * HZ,
- 0,
- kibnal_pathreq_callback, conn,
- &conn->ibc_connreq->cr_tid);
- if (rc == 0)
- return; /* callback now has my ref on conn */
-
- CERROR ("Path record request %p -> %s failed: %d\n",
- conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kibnal_connreq_done(conn, 1, rc);
- kibnal_conn_decref(conn); /* drop my ref */
-}
-
-int
-kibnal_check_txs (kib_conn_t *conn, struct list_head *txs)
-{
- kib_tx_t *tx;
- struct list_head *ttmp;
- unsigned long flags;
- int timed_out = 0;
-
- spin_lock_irqsave (&conn->ibc_lock, flags);
-
- list_for_each (ttmp, txs) {
- tx = list_entry (ttmp, kib_tx_t, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT (tx->tx_passive_rdma ||
- !tx->tx_passive_rdma_wait);
-
- LASSERT (tx->tx_passive_rdma_wait ||
- tx->tx_sending != 0);
- } else {
- LASSERT (!tx->tx_passive_rdma_wait);
- LASSERT (tx->tx_sending == 0);
- }
-
- if (time_after_eq (jiffies, tx->tx_deadline)) {
- timed_out = 1;
- break;
- }
- }
-
- spin_unlock_irqrestore (&conn->ibc_lock, flags);
- return timed_out;
-}
-
-int
-kibnal_conn_timed_out (kib_conn_t *conn)
-{
- return kibnal_check_txs(conn, &conn->ibc_tx_queue) ||
- kibnal_check_txs(conn, &conn->ibc_tx_queue_rsrvd) ||
- kibnal_check_txs(conn, &conn->ibc_tx_queue_nocred) ||
- kibnal_check_txs(conn, &conn->ibc_active_txs);
-}
-
-void
-kibnal_check_conns (int idx)
-{
- struct list_head *peers = &kibnal_data.kib_peers[idx];
- struct list_head *ptmp;
- kib_peer_t *peer;
- kib_conn_t *conn;
- struct list_head *ctmp;
- unsigned long flags;
-
- again:
- /* NB. We expect to have a look at all the peers and not find any
- * rdmas to time out, so we just use a shared lock while we
- * take a look... */
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- list_for_each (ptmp, peers) {
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
-
- list_for_each (ctmp, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED);
-
-
- /* In case we have enough credits to return via a
- * NOOP, but there were no non-blocking tx descs
- * free to do it last time... */
- kibnal_check_sends(conn);
-
- if (!kibnal_conn_timed_out(conn))
- continue;
-
- kibnal_conn_addref(conn);
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
-
- CERROR("Timed out RDMA with %s\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kibnal_close_conn (conn, -ETIMEDOUT);
- kibnal_conn_decref(conn);
-
- /* start again now I've dropped the lock */
- goto again;
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-}
-
-void
-kibnal_terminate_conn (kib_conn_t *conn)
-{
- int rc;
-
- CDEBUG(D_NET, "conn %p\n", conn);
- LASSERT (conn->ibc_state == IBNAL_CONN_DEATHROW);
- conn->ibc_state = IBNAL_CONN_ZOMBIE;
-
- rc = ib_cm_disconnect (conn->ibc_comm_id);
- if (rc != 0)
- CERROR ("Error %d disconnecting conn %p -> %s\n",
- rc, conn, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- kibnal_peer_notify(conn->ibc_peer);
-}
-
-int
-kibnal_reaper (void *arg)
-{
- wait_queue_t wait;
- unsigned long flags;
- kib_conn_t *conn;
- int timeout;
- int i;
- int peer_index = 0;
- unsigned long deadline = jiffies;
-
- cfs_daemonize ("kibnal_reaper");
- cfs_block_allsigs ();
-
- init_waitqueue_entry (&wait, current);
-
- spin_lock_irqsave (&kibnal_data.kib_reaper_lock, flags);
-
- while (!kibnal_data.kib_shutdown) {
- if (!list_empty (&kibnal_data.kib_reaper_conns)) {
- conn = list_entry (kibnal_data.kib_reaper_conns.next,
- kib_conn_t, ibc_list);
- list_del (&conn->ibc_list);
-
- spin_unlock_irqrestore (&kibnal_data.kib_reaper_lock, flags);
-
- switch (conn->ibc_state) {
- case IBNAL_CONN_DEATHROW:
- LASSERT (conn->ibc_comm_id != TS_IB_CM_COMM_ID_INVALID);
- /* Disconnect: conn becomes a zombie in the
- * callback and last ref reschedules it
- * here... */
- kibnal_terminate_conn(conn);
- kibnal_conn_decref(conn);
- break;
-
- case IBNAL_CONN_INIT_QP:
- case IBNAL_CONN_ZOMBIE:
- kibnal_destroy_conn (conn);
- break;
-
- default:
- CERROR ("Bad conn %p state: %d\n",
- conn, conn->ibc_state);
- LBUG();
- }
-
- spin_lock_irqsave (&kibnal_data.kib_reaper_lock, flags);
- continue;
- }
-
- spin_unlock_irqrestore (&kibnal_data.kib_reaper_lock, flags);
-
- /* careful with the jiffy wrap... */
- while ((timeout = (int)(deadline - jiffies)) <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = kibnal_data.kib_peer_hash_size;
-
- /* Time to check for RDMA timeouts on a few more
- * peers: I do checks every 'p' seconds on a
- * proportion of the peer table and I need to check
- * every connection 'n' times within a timeout
- * interval, to ensure I detect a timeout on any
- * connection within (n+1)/n times the timeout
- * interval. */
-
- if (*kibnal_tunables.kib_timeout > n * p)
- chunk = (chunk * n * p) /
- *kibnal_tunables.kib_timeout;
- if (chunk == 0)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kibnal_check_conns (peer_index);
- peer_index = (peer_index + 1) %
- kibnal_data.kib_peer_hash_size;
- }
-
- deadline += p * HZ;
- }
-
- kibnal_data.kib_reaper_waketime = jiffies + timeout;
-
- set_current_state (TASK_INTERRUPTIBLE);
- add_wait_queue (&kibnal_data.kib_reaper_waitq, &wait);
-
- schedule_timeout (timeout);
-
- set_current_state (TASK_RUNNING);
- remove_wait_queue (&kibnal_data.kib_reaper_waitq, &wait);
-
- spin_lock_irqsave (&kibnal_data.kib_reaper_lock, flags);
- }
-
- spin_unlock_irqrestore (&kibnal_data.kib_reaper_lock, flags);
-
- kibnal_thread_fini ();
- return (0);
-}
-
-int
-kibnal_connd (void *arg)
-{
- long id = (long)arg;
- char name[16];
- wait_queue_t wait;
- unsigned long flags;
- kib_peer_t *peer;
- kib_acceptsock_t *as;
- int did_something;
-
- snprintf(name, sizeof(name), "kibnal_connd_%02ld", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- init_waitqueue_entry (&wait, current);
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- while (!kibnal_data.kib_shutdown) {
- did_something = 0;
-
- if (!list_empty (&kibnal_data.kib_connd_acceptq)) {
- as = list_entry (kibnal_data.kib_connd_acceptq.next,
- kib_acceptsock_t, ibas_list);
- list_del (&as->ibas_list);
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- kibnal_handle_svcqry(as->ibas_sock);
- kibnal_free_acceptsock(as);
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
- did_something = 1;
- }
-
- /* Only handle an outgoing connection request if there is someone left
- * to handle an incoming svcqry */
- if (!list_empty (&kibnal_data.kib_connd_peers) &&
- ((kibnal_data.kib_connd_connecting + 1) <
- *kibnal_tunables.kib_n_connd)) {
- peer = list_entry (kibnal_data.kib_connd_peers.next,
- kib_peer_t, ibp_connd_list);
-
- list_del_init (&peer->ibp_connd_list);
- kibnal_data.kib_connd_connecting++;
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- kibnal_connect_peer (peer);
- kibnal_peer_decref(peer);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- did_something = 1;
- kibnal_data.kib_connd_connecting--;
- }
-
- if (did_something)
- continue;
-
- set_current_state (TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&kibnal_data.kib_connd_waitq, &wait);
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- schedule();
-
- set_current_state (TASK_RUNNING);
- remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- kibnal_thread_fini ();
- return (0);
-}
-
-int
-kibnal_scheduler(void *arg)
-{
- long id = (long)arg;
- char name[16];
- kib_rx_t *rx;
- kib_tx_t *tx;
- unsigned long flags;
- int rc;
- int counter = 0;
- int did_something;
-
- snprintf(name, sizeof(name), "kibnal_sd_%02ld", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
-
- while (!kibnal_data.kib_shutdown) {
- did_something = 0;
-
- while (!list_empty(&kibnal_data.kib_sched_txq)) {
- tx = list_entry(kibnal_data.kib_sched_txq.next,
- kib_tx_t, tx_list);
- list_del(&tx->tx_list);
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
- kibnal_tx_done(tx);
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock,
- flags);
- }
-
- if (!list_empty(&kibnal_data.kib_sched_rxq)) {
- rx = list_entry(kibnal_data.kib_sched_rxq.next,
- kib_rx_t, rx_list);
- list_del(&rx->rx_list);
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- kibnal_rx(rx);
-
- did_something = 1;
- spin_lock_irqsave(&kibnal_data.kib_sched_lock,
- flags);
- }
-
- /* nothing to do or hogging CPU */
- if (!did_something || counter++ == IBNAL_RESCHED) {
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
- counter = 0;
-
- if (!did_something) {
- rc = wait_event_interruptible_exclusive(
- kibnal_data.kib_sched_waitq,
- !list_empty(&kibnal_data.kib_sched_txq) ||
- !list_empty(&kibnal_data.kib_sched_rxq) ||
- kibnal_data.kib_shutdown);
- } else {
- our_cond_resched();
- }
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock,
- flags);
- }
- }
-
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-
- kibnal_thread_fini();
- return (0);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "openiblnd.h"
-
-static char *ipif_basename = "ib";
-CFS_MODULE_PARM(ipif_basename, "s", charp, 0444,
- "IPoIB interface base name");
-
-static int n_connd = 4;
-CFS_MODULE_PARM(n_connd, "i", int, 0444,
- "# of connection daemons");
-
-static int min_reconnect_interval = 1;
-CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644,
- "minimum connection retry interval (seconds)");
-
-static int max_reconnect_interval = 60;
-CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644,
- "maximum connection retry interval (seconds)");
-
-static int concurrent_peers = 1152;
-CFS_MODULE_PARM(concurrent_peers, "i", int, 0444,
- "maximum number of peers that may connect");
-
-static int cksum = 0;
-CFS_MODULE_PARM(cksum, "i", int, 0644,
- "set non-zero to enable message (not RDMA) checksums");
-
-static int timeout = 50;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
- "timeout (seconds)");
-
-static int ntx = 384;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
- "# of message descriptors");
-
-static int credits = 256;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "# concurrent sends");
-
-static int peer_credits = 16;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
- "# concurrent sends to 1 peer");
-
-static int keepalive = 100;
-CFS_MODULE_PARM(keepalive, "i", int, 0644,
- "Idle time in seconds before sending a keepalive");
-
-kib_tunables_t kibnal_tunables = {
- .kib_ipif_basename = &ipif_basename,
- .kib_n_connd = &n_connd,
- .kib_min_reconnect_interval = &min_reconnect_interval,
- .kib_max_reconnect_interval = &max_reconnect_interval,
- .kib_concurrent_peers = &concurrent_peers,
- .kib_cksum = &cksum,
- .kib_timeout = &timeout,
- .kib_ntx = &ntx,
- .kib_credits = &credits,
- .kib_peercredits = &peer_credits,
- .kib_keepalive = &keepalive,
-};
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
-static cfs_sysctl_table_t kibnal_ctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "ipif_basename",
- .data = &ipif_basename,
- .maxlen = 1024,
- .mode = 0444,
- .proc_handler = &proc_dostring
- },
- {
- .ctl_name = 2,
- .procname = "n_connd",
- .data = &n_connd,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 3,
- .procname = "min_reconnect_interval",
- .data = &min_reconnect_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 4,
- .procname = "max_reconnect_interval",
- .data = &max_reconnect_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 5,
- .procname = "concurrent_peers",
- .data = &concurrent_peers,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 6,
- .procname = "cksum",
- .data = &cksum,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 7,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 8,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 9,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 10,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 11,
- .procname = "keepalive",
- .data = &keepalive,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {0}
-};
-
-static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
- {
- .ctl_name = 203,
- .procname = "openibnal",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = kibnal_ctl_table
- },
- {0}
-};
-
-int
-kibnal_tunables_init ()
-{
- kibnal_tunables.kib_sysctl =
- cfs_register_sysctl_table(kibnal_top_ctl_table, 0);
-
- if (kibnal_tunables.kib_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-
- return 0;
-}
-
-void
-kibnal_tunables_fini ()
-{
- if (kibnal_tunables.kib_sysctl != NULL)
- cfs_unregister_sysctl_table(kibnal_tunables.kib_sysctl);
-}
-
-#else
-
-int
-kibnal_tunables_init ()
-{
- return 0;
-}
-
-void
-kibnal_tunables_fini ()
-{
-}
-
-#endif
+++ /dev/null
-.deps
-Makefile
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.*.cmd
-.tmp_versions
-.depend
-wirecheck
+++ /dev/null
-MODULES := kptllnd
-
-EXTRA_POST_CFLAGS := @PTLLNDCPPFLAGS@
-
-kptllnd-objs := ptllnd.o \
- ptllnd_cb.o \
- ptllnd_modparams.o \
- ptllnd_peer.o \
- ptllnd_rx_buf.o \
- ptllnd_tx.o \
- ptllnd_ptltrace.o
-
-@INCLUDE_RULES@
+++ /dev/null
-1. This version of the Portals LND is intended to work on the Cray XT3 using
- Cray Portals as a network transport.
-
-2. To enable the building of the Portals LND (ptllnd.ko) configure with the
- following option:
- ./configure --with-portals=<path-to-portals-headers>
-
-3. The following configuration options are supported
-
- ntx:
- The total number of message descritprs
-
- concurrent_peers:
- The maximum number of conncurent peers. Peers attemting
- to connect beyond the maximum will not be allowd.
-
- peer_hash_table_size:
- The number of hash table slots for the peers. This number
- should scale with concurrent_peers.
-
- cksum:
- Set to non-zero to enable message (not RDMA) checksums for
- outgoing packets. Incoming packets will always be checksumed
- if necssary, independnt of this value.
-
- timeout:
- The amount of time a request can linger in a peers active
- queue, before the peer is considered dead. Units: seconds.
-
- portal:
- The portal ID to use for the ptllnd traffic.
-
- rxb_npages:
- The number of pages in a RX Buffer.
-
- credits:
- The maximum total number of concurrent sends that are
- outstanding at any given instant.
-
- peercredits:
- The maximum number of concurrent sends that are
- outstanding to a single piere at any given instant.
-
- max_msg_size:
- The maximum immedate message size. This MUST be
- the same on all nodes in a cluster. A peer connecting
- with a diffrent max_msg_size will be rejected.
+++ /dev/null
-if MODULES
-if BUILD_PTLLND
-modulenet_DATA = kptllnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kptllnd-objs:%.o=%.c) ptllnd.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-#include "ptllnd.h"
-
-lnd_t kptllnd_lnd = {
- .lnd_type = PTLLND,
- .lnd_startup = kptllnd_startup,
- .lnd_shutdown = kptllnd_shutdown,
- .lnd_ctl = kptllnd_ctl,
- .lnd_send = kptllnd_send,
- .lnd_recv = kptllnd_recv,
- .lnd_eager_recv = kptllnd_eager_recv,
-};
-
-kptl_data_t kptllnd_data;
-
-char *
-kptllnd_ptlid2str(ptl_process_id_t id)
-{
- static char strs[64][32];
- static int idx = 0;
-
- unsigned long flags;
- char *str;
-
- spin_lock_irqsave(&kptllnd_data.kptl_ptlid2str_lock, flags);
- str = strs[idx++];
- if (idx >= sizeof(strs)/sizeof(strs[0]))
- idx = 0;
- spin_unlock_irqrestore(&kptllnd_data.kptl_ptlid2str_lock, flags);
-
- snprintf(str, sizeof(strs[0]), FMT_PTLID, id.pid, id.nid);
- return str;
-}
-
-void
-kptllnd_assert_wire_constants (void)
-{
- /* Wire protocol assertions generated by 'wirecheck'
- * running on Linux fedora 2.6.11-co-0.6.4 #1 Mon Jun 19 05:36:13 UTC 2006 i686 i686 i386 GNU
- * with gcc version 4.1.1 20060525 (Red Hat 4.1.1-1) */
-
-
- /* Constants... */
- CLASSERT (PTL_RESERVED_MATCHBITS == 0x100);
- CLASSERT (LNET_MSG_MATCHBITS == 0);
- CLASSERT (PTLLND_MSG_MAGIC == 0x50746C4E);
- CLASSERT (PTLLND_MSG_VERSION == 0x04);
- CLASSERT (PTLLND_RDMA_OK == 0x00);
- CLASSERT (PTLLND_RDMA_FAIL == 0x01);
- CLASSERT (PTLLND_MSG_TYPE_INVALID == 0x00);
- CLASSERT (PTLLND_MSG_TYPE_PUT == 0x01);
- CLASSERT (PTLLND_MSG_TYPE_GET == 0x02);
- CLASSERT (PTLLND_MSG_TYPE_IMMEDIATE == 0x03);
- CLASSERT (PTLLND_MSG_TYPE_NOOP == 0x04);
- CLASSERT (PTLLND_MSG_TYPE_HELLO == 0x05);
- CLASSERT (PTLLND_MSG_TYPE_NAK == 0x06);
-
- /* Checks for struct kptl_msg_t */
- CLASSERT ((int)sizeof(kptl_msg_t) == 136);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_magic) == 0);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_magic) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_version) == 4);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_version) == 2);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_type) == 6);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_type) == 1);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_credits) == 7);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_credits) == 1);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_nob) == 8);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_nob) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_cksum) == 12);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_cksum) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcnid) == 16);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcnid) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcstamp) == 24);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcstamp) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstnid) == 32);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstnid) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dststamp) == 40);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dststamp) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcpid) == 48);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcpid) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstpid) == 52);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstpid) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.immediate) == 56);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.immediate) == 72);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.rdma) == 56);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.rdma) == 80);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.hello) == 56);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.hello) == 12);
-
- /* Checks for struct kptl_immediate_msg_t */
- CLASSERT ((int)sizeof(kptl_immediate_msg_t) == 72);
- CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_hdr) == 0);
- CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_hdr) == 72);
- CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_payload[13]) == 85);
- CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_payload[13]) == 1);
-
- /* Checks for struct kptl_rdma_msg_t */
- CLASSERT ((int)sizeof(kptl_rdma_msg_t) == 80);
- CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_hdr) == 0);
- CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_hdr) == 72);
- CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_matchbits) == 72);
- CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_matchbits) == 8);
-
- /* Checks for struct kptl_hello_msg_t */
- CLASSERT ((int)sizeof(kptl_hello_msg_t) == 12);
- CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_matchbits) == 0);
- CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_matchbits) == 8);
- CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_max_msg_size) == 8);
- CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_max_msg_size) == 4);
-}
-
-const char *kptllnd_evtype2str(int type)
-{
-#define DO_TYPE(x) case x: return #x;
- switch(type)
- {
- DO_TYPE(PTL_EVENT_GET_START);
- DO_TYPE(PTL_EVENT_GET_END);
- DO_TYPE(PTL_EVENT_PUT_START);
- DO_TYPE(PTL_EVENT_PUT_END);
- DO_TYPE(PTL_EVENT_REPLY_START);
- DO_TYPE(PTL_EVENT_REPLY_END);
- DO_TYPE(PTL_EVENT_ACK);
- DO_TYPE(PTL_EVENT_SEND_START);
- DO_TYPE(PTL_EVENT_SEND_END);
- DO_TYPE(PTL_EVENT_UNLINK);
- default:
- return "<unknown event type>";
- }
-#undef DO_TYPE
-}
-
-const char *kptllnd_msgtype2str(int type)
-{
-#define DO_TYPE(x) case x: return #x;
- switch(type)
- {
- DO_TYPE(PTLLND_MSG_TYPE_INVALID);
- DO_TYPE(PTLLND_MSG_TYPE_PUT);
- DO_TYPE(PTLLND_MSG_TYPE_GET);
- DO_TYPE(PTLLND_MSG_TYPE_IMMEDIATE);
- DO_TYPE(PTLLND_MSG_TYPE_HELLO);
- DO_TYPE(PTLLND_MSG_TYPE_NOOP);
- DO_TYPE(PTLLND_MSG_TYPE_NAK);
- default:
- return "<unknown msg type>";
- }
-#undef DO_TYPE
-}
-
-const char *kptllnd_errtype2str(int type)
-{
-#define DO_TYPE(x) case x: return #x;
- switch(type)
- {
- DO_TYPE(PTL_OK);
- DO_TYPE(PTL_SEGV);
- DO_TYPE(PTL_NO_SPACE);
- DO_TYPE(PTL_ME_IN_USE);
- DO_TYPE(PTL_NAL_FAILED);
- DO_TYPE(PTL_NO_INIT);
- DO_TYPE(PTL_IFACE_DUP);
- DO_TYPE(PTL_IFACE_INVALID);
- DO_TYPE(PTL_HANDLE_INVALID);
- DO_TYPE(PTL_MD_INVALID);
- DO_TYPE(PTL_ME_INVALID);
- DO_TYPE(PTL_PROCESS_INVALID);
- DO_TYPE(PTL_PT_INDEX_INVALID);
- DO_TYPE(PTL_SR_INDEX_INVALID);
- DO_TYPE(PTL_EQ_INVALID);
- DO_TYPE(PTL_EQ_DROPPED);
- DO_TYPE(PTL_EQ_EMPTY);
- DO_TYPE(PTL_MD_NO_UPDATE);
- DO_TYPE(PTL_FAIL);
- DO_TYPE(PTL_AC_INDEX_INVALID);
- DO_TYPE(PTL_MD_ILLEGAL);
- DO_TYPE(PTL_ME_LIST_TOO_LONG);
- DO_TYPE(PTL_MD_IN_USE);
- DO_TYPE(PTL_NI_INVALID);
- DO_TYPE(PTL_PID_INVALID);
- DO_TYPE(PTL_PT_FULL);
- DO_TYPE(PTL_VAL_FAILED);
- DO_TYPE(PTL_NOT_IMPLEMENTED);
- DO_TYPE(PTL_NO_ACK);
- DO_TYPE(PTL_EQ_IN_USE);
- DO_TYPE(PTL_PID_IN_USE);
- DO_TYPE(PTL_INV_EQ_SIZE);
- DO_TYPE(PTL_AGAIN);
- default:
- return "<unknown event type>";
- }
-#undef DO_TYPE
-}
-
-__u32
-kptllnd_cksum (void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return (sum == 0) ? 1 : sum;
-}
-
-void
-kptllnd_init_msg(kptl_msg_t *msg, int type, int body_nob)
-{
- msg->ptlm_type = type;
- msg->ptlm_nob = (offsetof(kptl_msg_t, ptlm_u) + body_nob + 7) & ~7;
-
- LASSERT(msg->ptlm_nob <= *kptllnd_tunables.kptl_max_msg_size);
-}
-
-void
-kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer)
-{
- msg->ptlm_magic = PTLLND_MSG_MAGIC;
- msg->ptlm_version = PTLLND_MSG_VERSION;
- /* msg->ptlm_type Filled in kptllnd_init_msg() */
- msg->ptlm_credits = peer->peer_outstanding_credits;
- /* msg->ptlm_nob Filled in kptllnd_init_msg() */
- msg->ptlm_cksum = 0;
- msg->ptlm_srcnid = kptllnd_data.kptl_ni->ni_nid;
- msg->ptlm_srcstamp = peer->peer_myincarnation;
- msg->ptlm_dstnid = peer->peer_id.nid;
- msg->ptlm_dststamp = peer->peer_incarnation;
- msg->ptlm_srcpid = the_lnet.ln_pid;
- msg->ptlm_dstpid = peer->peer_id.pid;
-
- if (*kptllnd_tunables.kptl_checksum) {
- /* NB ptlm_cksum zero while computing cksum */
- msg->ptlm_cksum = kptllnd_cksum(msg,
- offsetof(kptl_msg_t, ptlm_u));
- }
-}
-
-int
-kptllnd_msg_unpack(kptl_msg_t *msg, int nob)
-{
- const int hdr_size = offsetof(kptl_msg_t, ptlm_u);
- __u32 msg_cksum;
- __u16 msg_version;
- int flip;
-
- /* 6 bytes are enough to have received magic + version */
- if (nob < 6) {
- CERROR("Very Short message: %d\n", nob);
- return -EPROTO;
- }
-
- /*
- * Determine if we need to flip
- */
- if (msg->ptlm_magic == PTLLND_MSG_MAGIC) {
- flip = 0;
- } else if (msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC)) {
- flip = 1;
- } else {
- CERROR("Bad magic: %08x\n", msg->ptlm_magic);
- return -EPROTO;
- }
-
- msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version;
-
- if (msg_version != PTLLND_MSG_VERSION) {
- CERROR("Bad version: got %04x expected %04x\n",
- (__u32)msg_version, PTLLND_MSG_VERSION);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CERROR("Short message: got %d, wanted at least %d\n",
- nob, hdr_size);
- return -EPROTO;
- }
-
- /* checksum must be computed with
- * 1) ptlm_cksum zero and
- * 2) BEFORE anything gets modified/flipped
- */
- msg_cksum = flip ? __swab32(msg->ptlm_cksum) : msg->ptlm_cksum;
- msg->ptlm_cksum = 0;
- if (msg_cksum != 0 &&
- msg_cksum != kptllnd_cksum(msg, hdr_size)) {
- CERROR("Bad checksum\n");
- return -EPROTO;
- }
-
- msg->ptlm_version = msg_version;
- msg->ptlm_cksum = msg_cksum;
-
- if (flip) {
- /* These two are 1 byte long so we don't swap them
- But check this assumtion*/
- CLASSERT (sizeof(msg->ptlm_type) == 1);
- CLASSERT (sizeof(msg->ptlm_credits) == 1);
- /* src & dst stamps are opaque cookies */
- __swab32s(&msg->ptlm_nob);
- __swab64s(&msg->ptlm_srcnid);
- __swab64s(&msg->ptlm_dstnid);
- __swab32s(&msg->ptlm_srcpid);
- __swab32s(&msg->ptlm_dstpid);
- }
-
- if (msg->ptlm_nob != nob) {
- CERROR("msg_nob corrupt: got 0x%08x, wanted %08x\n",
- msg->ptlm_nob, nob);
- return -EPROTO;
- }
-
- switch(msg->ptlm_type)
- {
- case PTLLND_MSG_TYPE_PUT:
- case PTLLND_MSG_TYPE_GET:
- if (nob < hdr_size + sizeof(kptl_rdma_msg_t)) {
- CERROR("Short rdma request: got %d, want %d\n",
- nob, hdr_size + (int)sizeof(kptl_rdma_msg_t));
- return -EPROTO;
- }
-
- if (flip)
- __swab64s(&msg->ptlm_u.rdma.kptlrm_matchbits);
-
- if (msg->ptlm_u.rdma.kptlrm_matchbits < PTL_RESERVED_MATCHBITS) {
- CERROR("Bad matchbits "LPX64"\n",
- msg->ptlm_u.rdma.kptlrm_matchbits);
- return -EPROTO;
- }
- break;
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- if (nob < offsetof(kptl_msg_t,
- ptlm_u.immediate.kptlim_payload)) {
- CERROR("Short immediate: got %d, want %d\n", nob,
- (int)offsetof(kptl_msg_t,
- ptlm_u.immediate.kptlim_payload));
- return -EPROTO;
- }
- /* Do nothing */
- break;
-
- case PTLLND_MSG_TYPE_NOOP:
- case PTLLND_MSG_TYPE_NAK:
- /* Do nothing */
- break;
-
- case PTLLND_MSG_TYPE_HELLO:
- if (nob < hdr_size + sizeof(kptl_hello_msg_t)) {
- CERROR("Short hello: got %d want %d\n",
- nob, hdr_size + (int)sizeof(kptl_hello_msg_t));
- return -EPROTO;
- }
- if (flip) {
- __swab64s(&msg->ptlm_u.hello.kptlhm_matchbits);
- __swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size);
- }
- break;
-
- default:
- CERROR("Bad message type: 0x%02x\n", (__u32)msg->ptlm_type);
- return -EPROTO;
- }
-
- return 0;
-}
-
-int
-kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
-
- CDEBUG(D_NET, ">>> kptllnd_ctl cmd=%u arg=%p\n", cmd, arg);
-
- /*
- * Validate that the context block is actually
- * pointing to this interface
- */
- LASSERT (ni == kptllnd_data.kptl_ni);
-
- switch(cmd) {
- case IOC_LIBCFS_DEL_PEER: {
- lnet_process_id_t id;
-
- id.nid = data->ioc_nid;
- id.pid = data->ioc_u32[1];
-
- rc = kptllnd_peer_del(id);
- break;
- }
-
- case IOC_LIBCFS_GET_PEER: {
- lnet_process_id_t id = {.nid = LNET_NID_ANY,
- .pid = LNET_PID_ANY};
- __u64 incarnation = 0;
- __u64 next_matchbits = 0;
- __u64 last_matchbits_seen = 0;
- int state = 0;
- int sent_hello = 0;
- int refcount = 0;
- int nsendq = 0;
- int nactiveq = 0;
- int credits = 0;
- int outstanding_credits = 0;
-
- rc = kptllnd_get_peer_info(data->ioc_count, &id,
- &state, &sent_hello,
- &refcount, &incarnation,
- &next_matchbits, &last_matchbits_seen,
- &nsendq, &nactiveq,
- &credits, &outstanding_credits);
- /* wince... */
- data->ioc_nid = id.nid;
- data->ioc_net = state;
- data->ioc_flags = sent_hello;
- data->ioc_count = refcount;
- data->ioc_u64[0] = incarnation;
- data->ioc_u32[0] = (__u32)next_matchbits;
- data->ioc_u32[1] = (__u32)(next_matchbits >> 32);
- data->ioc_u32[2] = (__u32)last_matchbits_seen;
- data->ioc_u32[3] = (__u32)(last_matchbits_seen >> 32);
- data->ioc_u32[4] = id.pid;
- data->ioc_u32[5] = (nsendq << 16) | nactiveq;
- data->ioc_u32[6] = (credits << 16) | outstanding_credits;
- break;
- }
-
- default:
- rc=-EINVAL;
- break;
- }
- CDEBUG(D_NET, "<<< kptllnd_ctl rc=%d\n", rc);
- return rc;
-}
-
-int
-kptllnd_startup (lnet_ni_t *ni)
-{
- int rc;
- int i;
- int spares;
- struct timeval tv;
- ptl_err_t ptl_rc;
-
- LASSERT (ni->ni_lnd == &kptllnd_lnd);
-
- if (kptllnd_data.kptl_init != PTLLND_INIT_NOTHING) {
- CERROR("Only 1 instance supported\n");
- return -EPERM;
- }
-
- if (*kptllnd_tunables.kptl_max_procs_per_node < 1) {
- CERROR("max_procs_per_node must be >= 1\n");
- return -EINVAL;
- }
-
- /* kptl_msg_t::ptlm_credits is only a __u8 */
- if (*kptllnd_tunables.kptl_peercredits > 255) {
- CERROR("kptl_peercredits must be <= 255\n");
- return -EINVAL;
- }
-
- *kptllnd_tunables.kptl_max_msg_size &= ~7;
- if (*kptllnd_tunables.kptl_max_msg_size < PTLLND_MIN_BUFFER_SIZE)
- *kptllnd_tunables.kptl_max_msg_size = PTLLND_MIN_BUFFER_SIZE;
-
- CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0);
- CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE);
-
- /*
- * zero pointers, flags etc
- * put everything into a known state.
- */
- memset (&kptllnd_data, 0, sizeof (kptllnd_data));
- kptllnd_data.kptl_eqh = PTL_INVALID_HANDLE;
- kptllnd_data.kptl_nih = PTL_INVALID_HANDLE;
-
- /*
- * Setup the sched locks/lists/waitq
- */
- spin_lock_init(&kptllnd_data.kptl_sched_lock);
- init_waitqueue_head(&kptllnd_data.kptl_sched_waitq);
- INIT_LIST_HEAD(&kptllnd_data.kptl_sched_txq);
- INIT_LIST_HEAD(&kptllnd_data.kptl_sched_rxq);
- INIT_LIST_HEAD(&kptllnd_data.kptl_sched_rxbq);
-
- /* init kptl_ptlid2str_lock before any call to kptllnd_ptlid2str */
- spin_lock_init(&kptllnd_data.kptl_ptlid2str_lock);
-
- /*
- * Setup the tx locks/lists
- */
- spin_lock_init(&kptllnd_data.kptl_tx_lock);
- INIT_LIST_HEAD(&kptllnd_data.kptl_idle_txs);
- atomic_set(&kptllnd_data.kptl_ntx, 0);
-
- /*
- * Uptick the module reference count
- */
- PORTAL_MODULE_USE;
-
- /*
- * Setup pointers between the ni and context data block
- */
- kptllnd_data.kptl_ni = ni;
- ni->ni_data = &kptllnd_data;
-
- /*
- * Setup Credits
- */
- ni->ni_maxtxcredits = *kptllnd_tunables.kptl_credits;
- ni->ni_peertxcredits = *kptllnd_tunables.kptl_peercredits;
-
- kptllnd_data.kptl_expected_peers =
- *kptllnd_tunables.kptl_max_nodes *
- *kptllnd_tunables.kptl_max_procs_per_node;
-
- /*
- * Initialize the Network interface instance
- * We use the default because we don't have any
- * way to choose a better interface.
- * Requested and actual limits are ignored.
- */
- ptl_rc = PtlNIInit(
-#ifdef _USING_LUSTRE_PORTALS_
- PTL_IFACE_DEFAULT,
-#else
- CRAY_KERN_NAL,
-#endif
- *kptllnd_tunables.kptl_pid, NULL, NULL,
- &kptllnd_data.kptl_nih);
-
- /*
- * Note: PTL_IFACE_DUP simply means that the requested
- * interface was already inited and that we're sharing it.
- * Which is ok.
- */
- if (ptl_rc != PTL_OK && ptl_rc != PTL_IFACE_DUP) {
- CERROR ("PtlNIInit: error %s(%d)\n",
- kptllnd_errtype2str(ptl_rc), ptl_rc);
- rc = -EINVAL;
- goto failed;
- }
-
- /* NB eq size irrelevant if using a callback */
- ptl_rc = PtlEQAlloc(kptllnd_data.kptl_nih,
- 8, /* size */
- kptllnd_eq_callback, /* handler callback */
- &kptllnd_data.kptl_eqh); /* output handle */
- if (ptl_rc != PTL_OK) {
- CERROR("PtlEQAlloc failed %s(%d)\n",
- kptllnd_errtype2str(ptl_rc), ptl_rc);
- rc = -ENOMEM;
- goto failed;
- }
-
- /*
- * Fetch the lower NID
- */
- ptl_rc = PtlGetId(kptllnd_data.kptl_nih,
- &kptllnd_data.kptl_portals_id);
- if (ptl_rc != PTL_OK) {
- CERROR ("PtlGetID: error %s(%d)\n",
- kptllnd_errtype2str(ptl_rc), ptl_rc);
- rc = -EINVAL;
- goto failed;
- }
-
- if (kptllnd_data.kptl_portals_id.pid != *kptllnd_tunables.kptl_pid) {
- /* The kernel ptllnd must have the expected PID */
- CERROR("Unexpected PID: %u (%u expected)\n",
- kptllnd_data.kptl_portals_id.pid,
- *kptllnd_tunables.kptl_pid);
- rc = -EINVAL;
- goto failed;
- }
-
- ni->ni_nid = kptllnd_ptl2lnetnid(kptllnd_data.kptl_portals_id.nid);
-
- CDEBUG(D_NET, "ptl id=%s, lnet id=%s\n",
- kptllnd_ptlid2str(kptllnd_data.kptl_portals_id),
- libcfs_nid2str(ni->ni_nid));
-
- /* Initialized the incarnation - it must be for-all-time unique, even
- * accounting for the fact that we increment it when we disconnect a
- * peer that's using it */
- do_gettimeofday(&tv);
- kptllnd_data.kptl_incarnation = (((__u64)tv.tv_sec) * 1000000) +
- tv.tv_usec;
- CDEBUG(D_NET, "Incarnation="LPX64"\n", kptllnd_data.kptl_incarnation);
-
- /*
- * Allocate and setup the peer hash table
- */
- rwlock_init(&kptllnd_data.kptl_peer_rw_lock);
- init_waitqueue_head(&kptllnd_data.kptl_watchdog_waitq);
- INIT_LIST_HEAD(&kptllnd_data.kptl_closing_peers);
- INIT_LIST_HEAD(&kptllnd_data.kptl_zombie_peers);
-
- kptllnd_data.kptl_peer_hash_size =
- *kptllnd_tunables.kptl_peer_hash_table_size;
- LIBCFS_ALLOC(kptllnd_data.kptl_peers,
- (kptllnd_data.kptl_peer_hash_size *
- sizeof(struct list_head)));
- if (kptllnd_data.kptl_peers == NULL) {
- CERROR("Failed to allocate space for peer hash table size=%d\n",
- kptllnd_data.kptl_peer_hash_size);
- rc = -ENOMEM;
- goto failed;
- }
- for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++)
- INIT_LIST_HEAD(&kptllnd_data.kptl_peers[i]);
-
- LIBCFS_ALLOC(kptllnd_data.kptl_nak_msg, offsetof(kptl_msg_t, ptlm_u));
- if (kptllnd_data.kptl_nak_msg == NULL) {
- CERROR("Can't allocate NAK msg\n");
- rc = -ENOMEM;
- goto failed;
- }
- memset(kptllnd_data.kptl_nak_msg, 0, offsetof(kptl_msg_t, ptlm_u));
- kptllnd_init_msg(kptllnd_data.kptl_nak_msg, PTLLND_MSG_TYPE_NAK, 0);
- kptllnd_data.kptl_nak_msg->ptlm_magic = PTLLND_MSG_MAGIC;
- kptllnd_data.kptl_nak_msg->ptlm_version = PTLLND_MSG_VERSION;
- kptllnd_data.kptl_nak_msg->ptlm_srcpid = the_lnet.ln_pid;
- kptllnd_data.kptl_nak_msg->ptlm_srcnid = ni->ni_nid;
- kptllnd_data.kptl_nak_msg->ptlm_srcstamp = kptllnd_data.kptl_incarnation;
- kptllnd_data.kptl_nak_msg->ptlm_dstpid = LNET_PID_ANY;
- kptllnd_data.kptl_nak_msg->ptlm_dstnid = LNET_NID_ANY;
-
- kptllnd_rx_buffer_pool_init(&kptllnd_data.kptl_rx_buffer_pool);
-
- kptllnd_data.kptl_rx_cache =
- cfs_mem_cache_create("ptllnd_rx",
- sizeof(kptl_rx_t) +
- *kptllnd_tunables.kptl_max_msg_size,
- 0, /* offset */
- 0); /* flags */
- if (kptllnd_data.kptl_rx_cache == NULL) {
- CERROR("Can't create slab for RX descriptors\n");
- rc = -ENOMEM;
- goto failed;
- }
-
- /* lists/ptrs/locks initialised */
- kptllnd_data.kptl_init = PTLLND_INIT_DATA;
-
- /*****************************************************/
-
- rc = kptllnd_setup_tx_descs();
- if (rc != 0) {
- CERROR("Can't pre-allocate %d TX descriptors: %d\n",
- *kptllnd_tunables.kptl_ntx, rc);
- goto failed;
- }
-
- /* Start the scheduler threads for handling incoming requests. No need
- * to advance the state because this will be automatically cleaned up
- * now that PTLNAT_INIT_DATA state has been entered */
- CDEBUG(D_NET, "starting %d scheduler threads\n", PTLLND_N_SCHED);
- for (i = 0; i < PTLLND_N_SCHED; i++) {
- rc = kptllnd_thread_start(kptllnd_scheduler, (void *)((long)i));
- if (rc != 0) {
- CERROR("Can't spawn scheduler[%d]: %d\n", i, rc);
- goto failed;
- }
- }
-
- rc = kptllnd_thread_start(kptllnd_watchdog, NULL);
- if (rc != 0) {
- CERROR("Can't spawn watchdog: %d\n", rc);
- goto failed;
- }
-
- /* Ensure that 'rxb_nspare' buffers can be off the net (being emptied)
- * and we will still have enough buffers posted for all our peers */
- spares = *kptllnd_tunables.kptl_rxb_nspare *
- ((*kptllnd_tunables.kptl_rxb_npages * PAGE_SIZE)/
- *kptllnd_tunables.kptl_max_msg_size);
-
- /* reserve and post the buffers */
- rc = kptllnd_rx_buffer_pool_reserve(&kptllnd_data.kptl_rx_buffer_pool,
- kptllnd_data.kptl_expected_peers +
- spares);
- if (rc != 0) {
- CERROR("Can't reserve RX Buffer pool: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- kptllnd_data.kptl_init = PTLLND_INIT_ALL;
-
- /*****************************************************/
-
- if (*kptllnd_tunables.kptl_checksum)
- CWARN("Checksumming enabled\n");
-
- CDEBUG(D_NET, "<<< kptllnd_startup SUCCESS\n");
- return 0;
-
- failed:
- CDEBUG(D_NET, "kptllnd_startup failed rc=%d\n", rc);
- kptllnd_shutdown(ni);
- return rc;
-}
-
-void
-kptllnd_shutdown (lnet_ni_t *ni)
-{
- int i;
- ptl_err_t prc;
- lnet_process_id_t process_id;
- unsigned long flags;
-
- CDEBUG(D_MALLOC, "before LND cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- LASSERT (ni == kptllnd_data.kptl_ni);
-
- switch (kptllnd_data.kptl_init) {
- default:
- LBUG();
-
- case PTLLND_INIT_ALL:
- case PTLLND_INIT_DATA:
- /* Stop receiving */
- kptllnd_rx_buffer_pool_fini(&kptllnd_data.kptl_rx_buffer_pool);
- LASSERT (list_empty(&kptllnd_data.kptl_sched_rxq));
- LASSERT (list_empty(&kptllnd_data.kptl_sched_rxbq));
-
- /* Hold peertable lock to interleave cleanly with peer birth/death */
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- LASSERT (kptllnd_data.kptl_shutdown == 0);
- kptllnd_data.kptl_shutdown = 1; /* phase 1 == destroy peers */
-
- /* no new peers possible now */
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock,
- flags);
-
- /* nuke all existing peers */
- process_id.nid = LNET_NID_ANY;
- process_id.pid = LNET_PID_ANY;
- kptllnd_peer_del(process_id);
-
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- LASSERT (kptllnd_data.kptl_n_active_peers == 0);
-
- i = 2;
- while (kptllnd_data.kptl_npeers != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
- "Waiting for %d peers to terminate\n",
- kptllnd_data.kptl_npeers);
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock,
- flags);
-
- cfs_pause(cfs_time_seconds(1));
-
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock,
- flags);
- }
-
- LASSERT(list_empty(&kptllnd_data.kptl_closing_peers));
- LASSERT(list_empty(&kptllnd_data.kptl_zombie_peers));
- LASSERT (kptllnd_data.kptl_peers != NULL);
- for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++)
- LASSERT (list_empty (&kptllnd_data.kptl_peers[i]));
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
- CDEBUG(D_NET, "All peers deleted\n");
-
- /* Shutdown phase 2: kill the daemons... */
- kptllnd_data.kptl_shutdown = 2;
- mb();
-
- i = 2;
- while (atomic_read (&kptllnd_data.kptl_nthreads) != 0) {
- /* Wake up all threads*/
- wake_up_all(&kptllnd_data.kptl_sched_waitq);
- wake_up_all(&kptllnd_data.kptl_watchdog_waitq);
-
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d threads to terminate\n",
- atomic_read(&kptllnd_data.kptl_nthreads));
- cfs_pause(cfs_time_seconds(1));
- }
-
- CDEBUG(D_NET, "All Threads stopped\n");
- LASSERT(list_empty(&kptllnd_data.kptl_sched_txq));
-
- kptllnd_cleanup_tx_descs();
-
- /* Nothing here now, but libcfs might soon require
- * us to explicitly destroy wait queues and semaphores
- * that would be done here */
-
- /* fall through */
-
- case PTLLND_INIT_NOTHING:
- CDEBUG(D_NET, "PTLLND_INIT_NOTHING\n");
- break;
- }
-
- if (!PtlHandleIsEqual(kptllnd_data.kptl_eqh, PTL_INVALID_HANDLE)) {
- prc = PtlEQFree(kptllnd_data.kptl_eqh);
- if (prc != PTL_OK)
- CERROR("Error %s(%d) freeing portals EQ\n",
- kptllnd_errtype2str(prc), prc);
- }
-
- if (!PtlHandleIsEqual(kptllnd_data.kptl_nih, PTL_INVALID_HANDLE)) {
- prc = PtlNIFini(kptllnd_data.kptl_nih);
- if (prc != PTL_OK)
- CERROR("Error %s(%d) finalizing portals NI\n",
- kptllnd_errtype2str(prc), prc);
- }
-
- LASSERT (atomic_read(&kptllnd_data.kptl_ntx) == 0);
- LASSERT (list_empty(&kptllnd_data.kptl_idle_txs));
-
- if (kptllnd_data.kptl_rx_cache != NULL)
- cfs_mem_cache_destroy(kptllnd_data.kptl_rx_cache);
-
- if (kptllnd_data.kptl_peers != NULL)
- LIBCFS_FREE (kptllnd_data.kptl_peers,
- sizeof (struct list_head) *
- kptllnd_data.kptl_peer_hash_size);
-
- if (kptllnd_data.kptl_nak_msg != NULL)
- LIBCFS_FREE (kptllnd_data.kptl_nak_msg,
- offsetof(kptl_msg_t, ptlm_u));
-
- memset(&kptllnd_data, 0, sizeof(kptllnd_data));
-
- CDEBUG(D_MALLOC, "after LND cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- PORTAL_MODULE_UNUSE;
-}
-
-int __init
-kptllnd_module_init (void)
-{
- int rc;
-
- kptllnd_assert_wire_constants();
-
- rc = kptllnd_tunables_init();
- if (rc != 0)
- return rc;
-
- kptllnd_init_ptltrace();
-
- lnet_register_lnd(&kptllnd_lnd);
-
- return 0;
-}
-
-void __exit
-kptllnd_module_fini (void)
-{
- lnet_unregister_lnd(&kptllnd_lnd);
- kptllnd_tunables_fini();
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel Portals LND v1.00");
-MODULE_LICENSE("GPL");
-
-module_init(kptllnd_module_init);
-module_exit(kptllnd_module_fini);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-#include <portals/p30.h>
-#ifdef CRAY_XT3
-#include <portals/ptltrace.h>
-#endif
-#include <lnet/ptllnd.h> /* Depends on portals/p30.h */
-
-/*
- * Define this to enable console debug logging
- * and simulation
- */
-//#define PJK_DEBUGGING
-
-#ifdef CONFIG_SMP
-# define PTLLND_N_SCHED num_online_cpus() /* # schedulers */
-#else
-# define PTLLND_N_SCHED 1 /* # schedulers */
-#endif
-
-#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peercredits)-1)
- /* when eagerly to return credits */
-
-typedef struct
-{
- int *kptl_ntx; /* # tx descs to pre-allocate */
- int *kptl_max_nodes; /* max # nodes all talking to me */
- int *kptl_max_procs_per_node; /* max # processes per node */
- int *kptl_checksum; /* checksum kptl_msg_t? */
- int *kptl_timeout; /* comms timeout (seconds) */
- int *kptl_portal; /* portal number */
- int *kptl_pid; /* portals PID (self + kernel peers) */
- int *kptl_rxb_npages; /* number of pages for rx buffer */
- int *kptl_rxb_nspare; /* number of spare rx buffers */
- int *kptl_credits; /* number of credits */
- int *kptl_peercredits; /* number of credits */
- int *kptl_max_msg_size; /* max immd message size*/
- int *kptl_peer_hash_table_size; /* # slots in peer hash table */
- int *kptl_reschedule_loops; /* scheduler yield loops */
- int *kptl_ack_puts; /* make portals ack PUTs */
-#ifdef CRAY_XT3
- int *kptl_ptltrace_on_timeout; /* dump pltrace on timeout? */
- char **kptl_ptltrace_basename; /* ptltrace dump file basename */
-#endif
-#ifdef PJK_DEBUGGING
- int *kptl_simulation_bitmap;/* simulation bitmap */
-#endif
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *kptl_sysctl; /* sysctl interface */
-#endif
-} kptl_tunables_t;
-
-#include "lnet/ptllnd_wire.h"
-
-/***********************************************************************/
-
-typedef struct kptl_data kptl_data_t;
-typedef struct kptl_rx_buffer kptl_rx_buffer_t;
-typedef struct kptl_peer kptl_peer_t;
-
-typedef struct {
- char eva_type;
-} kptl_eventarg_t;
-
-#define PTLLND_EVENTARG_TYPE_MSG 0x1
-#define PTLLND_EVENTARG_TYPE_RDMA 0x2
-#define PTLLND_EVENTARG_TYPE_BUF 0x3
-
-typedef struct kptl_rx /* receive message */
-{
- struct list_head rx_list; /* queue for attention */
- kptl_rx_buffer_t *rx_rxb; /* the rx buffer pointer */
- kptl_msg_t *rx_msg; /* received message */
- int rx_nob; /* received message size */
- unsigned long rx_treceived; /* time received */
- ptl_process_id_t rx_initiator; /* sender's address */
-#ifdef CRAY_XT3
- ptl_uid_t rx_uid; /* sender's uid */
-#endif
- kptl_peer_t *rx_peer; /* pointer to peer */
- char rx_space[0]; /* copy of incoming request */
-} kptl_rx_t;
-
-#define PTLLND_POSTRX_DONT_POST 0 /* don't post */
-#define PTLLND_POSTRX_NO_CREDIT 1 /* post: no credits */
-#define PTLLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */
-
-typedef struct kptl_rx_buffer_pool
-{
- spinlock_t rxbp_lock;
- struct list_head rxbp_list; /* all allocated buffers */
- int rxbp_count; /* # allocated buffers */
- int rxbp_reserved; /* # requests to buffer */
- int rxbp_shutdown; /* shutdown flag */
-} kptl_rx_buffer_pool_t;
-
-struct kptl_rx_buffer
-{
- kptl_rx_buffer_pool_t *rxb_pool;
- struct list_head rxb_list; /* for the rxb_pool list */
- struct list_head rxb_repost_list;/* for the kptl_sched_rxbq list */
- int rxb_posted:1; /* on the net */
- int rxb_idle:1; /* all done */
- kptl_eventarg_t rxb_eventarg; /* event->md.user_ptr */
- int rxb_refcount; /* reference count */
- ptl_handle_md_t rxb_mdh; /* the portals memory descriptor (MD) handle */
- char *rxb_buffer; /* the buffer */
-
-};
-
-enum kptl_tx_type
-{
- TX_TYPE_RESERVED = 0,
- TX_TYPE_SMALL_MESSAGE = 1,
- TX_TYPE_PUT_REQUEST = 2,
- TX_TYPE_GET_REQUEST = 3,
- TX_TYPE_PUT_RESPONSE = 4,
- TX_TYPE_GET_RESPONSE = 5,
-};
-
-typedef union {
-#ifdef _USING_LUSTRE_PORTALS_
- struct iovec iov[PTL_MD_MAX_IOV];
- lnet_kiov_t kiov[PTL_MD_MAX_IOV];
-#else
- ptl_md_iovec_t iov[PTL_MD_MAX_IOV];
-#endif
-} kptl_fragvec_t;
-
-typedef struct kptl_tx /* transmit message */
-{
- struct list_head tx_list; /* queue on idle_txs etc */
- atomic_t tx_refcount; /* reference count*/
- enum kptl_tx_type tx_type; /* small msg/{put,get}{req,resp} */
- int tx_active:1; /* queued on the peer */
- int tx_idle:1; /* on the free list */
- int tx_acked:1; /* portals ACK wanted (for debug only) */
- kptl_eventarg_t tx_msg_eventarg; /* event->md.user_ptr */
- kptl_eventarg_t tx_rdma_eventarg; /* event->md.user_ptr */
- int tx_status; /* the status of this tx descriptor */
- ptl_handle_md_t tx_rdma_mdh; /* RDMA buffer */
- ptl_handle_md_t tx_msg_mdh; /* the portals MD handle for the initial message */
- lnet_msg_t *tx_lnet_msg; /* LNET message to finalize */
- lnet_msg_t *tx_lnet_replymsg; /* LNET reply message to finalize */
- kptl_msg_t *tx_msg; /* the message data */
- kptl_peer_t *tx_peer; /* the peer this is waiting on */
- unsigned long tx_deadline; /* deadline */
- unsigned long tx_tposted; /* time posted */
- ptl_md_t tx_rdma_md; /* rdma descriptor */
- kptl_fragvec_t *tx_frags; /* buffer fragments */
-} kptl_tx_t;
-
-enum kptllnd_peer_state
-{
- PEER_STATE_UNINITIALIZED = 0,
- PEER_STATE_ALLOCATED = 1,
- PEER_STATE_WAITING_HELLO = 2,
- PEER_STATE_ACTIVE = 3,
- PEER_STATE_CLOSING = 4,
- PEER_STATE_ZOMBIE = 5,
-};
-
-struct kptl_peer
-{
- struct list_head peer_list;
- atomic_t peer_refcount; /* The current refrences */
- enum kptllnd_peer_state peer_state;
- spinlock_t peer_lock; /* serialize */
- struct list_head peer_noops; /* PTLLND_MSG_TYPE_NOOP txs */
- struct list_head peer_sendq; /* txs waiting for mh handles */
- struct list_head peer_activeq; /* txs awaiting completion */
- lnet_process_id_t peer_id; /* Peer's LNET id */
- ptl_process_id_t peer_ptlid; /* Peer's portals id */
- __u64 peer_incarnation; /* peer's incarnation */
- __u64 peer_myincarnation; /* my incarnation at HELLO */
- int peer_sent_hello; /* have I sent HELLO? */
- int peer_credits; /* number of send credits */
- int peer_outstanding_credits;/* number of peer credits to return */
- int peer_sent_credits; /* #msg buffers posted for peer */
- int peer_max_msg_size; /* peer's rx buffer size */
- int peer_error; /* errno on closing this peer */
- int peer_retry_noop; /* need to retry returning credits */
- int peer_check_stamp; /* watchdog check stamp */
- cfs_time_t peer_last_alive; /* when (in jiffies) I was last alive */
- __u64 peer_next_matchbits; /* Next value to register RDMA from peer */
- __u64 peer_last_matchbits_seen; /* last matchbits used to RDMA to peer */
-};
-
-struct kptl_data
-{
- int kptl_init; /* initialisation state */
- volatile int kptl_shutdown; /* shut down? */
- atomic_t kptl_nthreads; /* # live threads */
- lnet_ni_t *kptl_ni; /* _the_ LND instance */
- ptl_handle_ni_t kptl_nih; /* network inteface handle */
- ptl_process_id_t kptl_portals_id; /* Portals ID of interface */
- __u64 kptl_incarnation; /* which one am I */
- ptl_handle_eq_t kptl_eqh; /* Event Queue (EQ) */
-
- spinlock_t kptl_sched_lock; /* serialise... */
- wait_queue_head_t kptl_sched_waitq; /* schedulers sleep here */
- struct list_head kptl_sched_txq; /* tx requiring attention */
- struct list_head kptl_sched_rxq; /* rx requiring attention */
- struct list_head kptl_sched_rxbq; /* rxb requiring reposting */
-
- wait_queue_head_t kptl_watchdog_waitq; /* watchdog sleeps here */
-
- kptl_rx_buffer_pool_t kptl_rx_buffer_pool; /* rx buffer pool */
- cfs_mem_cache_t* kptl_rx_cache; /* rx descripter cache */
-
- atomic_t kptl_ntx; /* # tx descs allocated */
- spinlock_t kptl_tx_lock; /* serialise idle tx list*/
- struct list_head kptl_idle_txs; /* idle tx descriptors */
-
- rwlock_t kptl_peer_rw_lock; /* lock for peer table */
- struct list_head *kptl_peers; /* hash table of all my known peers */
- struct list_head kptl_closing_peers; /* peers being closed */
- struct list_head kptl_zombie_peers; /* peers waiting for refs to drain */
- int kptl_peer_hash_size; /* size of kptl_peers */
- int kptl_npeers; /* # peers extant */
- int kptl_n_active_peers; /* # active peers */
- int kptl_expected_peers; /* # peers I can buffer HELLOs from */
-
- kptl_msg_t *kptl_nak_msg; /* common NAK message */
- spinlock_t kptl_ptlid2str_lock; /* serialise str ops */
-};
-
-enum
-{
- PTLLND_INIT_NOTHING = 0,
- PTLLND_INIT_DATA,
- PTLLND_INIT_ALL,
-};
-
-extern kptl_tunables_t kptllnd_tunables;
-extern kptl_data_t kptllnd_data;
-
-static inline lnet_nid_t
-kptllnd_ptl2lnetnid(ptl_nid_t ptl_nid)
-{
-#ifdef _USING_LUSTRE_PORTALS_
- return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_ni->ni_nid),
- LNET_NIDADDR(ptl_nid));
-#else
- return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_ni->ni_nid),
- ptl_nid);
-#endif
-}
-
-static inline ptl_nid_t
-kptllnd_lnet2ptlnid(lnet_nid_t lnet_nid)
-{
-#ifdef _USING_LUSTRE_PORTALS_
- return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_portals_id.nid),
- LNET_NIDADDR(lnet_nid));
-#else
- return LNET_NIDADDR(lnet_nid);
-#endif
-}
-
-int kptllnd_startup(lnet_ni_t *ni);
-void kptllnd_shutdown(lnet_ni_t *ni);
-int kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-int kptllnd_eager_recv(struct lnet_ni *ni, void *private,
- lnet_msg_t *msg, void **new_privatep);
-void kptllnd_eq_callback(ptl_event_t *evp);
-int kptllnd_scheduler(void *arg);
-int kptllnd_watchdog(void *arg);
-int kptllnd_thread_start(int (*fn)(void *arg), void *arg);
-int kptllnd_tunables_init(void);
-void kptllnd_tunables_fini(void);
-
-const char *kptllnd_evtype2str(int evtype);
-const char *kptllnd_msgtype2str(int msgtype);
-const char *kptllnd_errtype2str(int errtype);
-
-static inline void *
-kptllnd_eventarg2obj (kptl_eventarg_t *eva)
-{
- switch (eva->eva_type) {
- default:
- LBUG();
- case PTLLND_EVENTARG_TYPE_BUF:
- return list_entry(eva, kptl_rx_buffer_t, rxb_eventarg);
- case PTLLND_EVENTARG_TYPE_RDMA:
- return list_entry(eva, kptl_tx_t, tx_rdma_eventarg);
- case PTLLND_EVENTARG_TYPE_MSG:
- return list_entry(eva, kptl_tx_t, tx_msg_eventarg);
- }
-}
-
-/*
- * RX BUFFER SUPPORT FUNCTIONS
- */
-void kptllnd_rx_buffer_pool_init(kptl_rx_buffer_pool_t *rxbp);
-void kptllnd_rx_buffer_pool_fini(kptl_rx_buffer_pool_t *rxbp);
-int kptllnd_rx_buffer_pool_reserve(kptl_rx_buffer_pool_t *rxbp, int count);
-void kptllnd_rx_buffer_pool_unreserve(kptl_rx_buffer_pool_t *rxbp, int count);
-void kptllnd_rx_buffer_callback(ptl_event_t *ev);
-void kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb);
-
-static inline int
-kptllnd_rx_buffer_size(void)
-{
- return PAGE_SIZE * (*kptllnd_tunables.kptl_rxb_npages);
-}
-
-static inline void
-kptllnd_rx_buffer_addref(kptl_rx_buffer_t *rxb)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags);
- rxb->rxb_refcount++;
- spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags);
-}
-
-static inline void
-kptllnd_rx_buffer_decref_locked(kptl_rx_buffer_t *rxb)
-{
- if (--(rxb->rxb_refcount) == 0) {
- spin_lock(&kptllnd_data.kptl_sched_lock);
-
- list_add_tail(&rxb->rxb_repost_list,
- &kptllnd_data.kptl_sched_rxbq);
- wake_up(&kptllnd_data.kptl_sched_waitq);
-
- spin_unlock(&kptllnd_data.kptl_sched_lock);
- }
-}
-
-static inline void
-kptllnd_rx_buffer_decref(kptl_rx_buffer_t *rxb)
-{
- unsigned long flags;
- int count;
-
- spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags);
- count = --(rxb->rxb_refcount);
- spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags);
-
- if (count == 0)
- kptllnd_rx_buffer_post(rxb);
-}
-
-/*
- * RX SUPPORT FUNCTIONS
- */
-void kptllnd_rx_parse(kptl_rx_t *rx);
-void kptllnd_rx_done(kptl_rx_t *rx, int post_credit);
-
-/*
- * PEER SUPPORT FUNCTIONS
- */
-int kptllnd_get_peer_info(int index,
- lnet_process_id_t *id,
- int *state, int *sent_hello,
- int *refcount, __u64 *incarnation,
- __u64 *next_matchbits, __u64 *last_matchbits_seen,
- int *nsendq, int *nactiveq,
- int *credits, int *outstanding_credits);
-void kptllnd_peer_destroy(kptl_peer_t *peer);
-int kptllnd_peer_del(lnet_process_id_t id);
-void kptllnd_peer_close_locked(kptl_peer_t *peer, int why);
-void kptllnd_peer_close(kptl_peer_t *peer, int why);
-void kptllnd_handle_closing_peers(void);
-int kptllnd_peer_connect(kptl_tx_t *tx, lnet_nid_t nid);
-void kptllnd_peer_check_sends(kptl_peer_t *peer);
-void kptllnd_peer_check_bucket(int idx, int stamp);
-void kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag);
-int kptllnd_find_target(kptl_peer_t **peerp, lnet_process_id_t target);
-kptl_peer_t *kptllnd_peer_handle_hello(ptl_process_id_t initiator,
- kptl_msg_t *msg);
-kptl_peer_t *kptllnd_id2peer_locked(lnet_process_id_t id);
-void kptllnd_peer_alive(kptl_peer_t *peer);
-
-static inline void
-kptllnd_peer_addref (kptl_peer_t *peer)
-{
- atomic_inc(&peer->peer_refcount);
-}
-
-static inline void
-kptllnd_peer_decref (kptl_peer_t *peer)
-{
- if (atomic_dec_and_test(&peer->peer_refcount))
- kptllnd_peer_destroy(peer);
-}
-
-static inline void
-kptllnd_set_tx_peer(kptl_tx_t *tx, kptl_peer_t *peer)
-{
- LASSERT (tx->tx_peer == NULL);
-
- kptllnd_peer_addref(peer);
- tx->tx_peer = peer;
-}
-
-static inline struct list_head *
-kptllnd_nid2peerlist(lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) %
- kptllnd_data.kptl_peer_hash_size;
-
- return &kptllnd_data.kptl_peers[hash];
-}
-
-static inline kptl_peer_t *
-kptllnd_id2peer(lnet_process_id_t id)
-{
- kptl_peer_t *peer;
- unsigned long flags;
-
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- peer = kptllnd_id2peer_locked(id);
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- return peer;
-}
-
-static inline int
-kptllnd_reserve_buffers(int n)
-{
- return kptllnd_rx_buffer_pool_reserve(&kptllnd_data.kptl_rx_buffer_pool,
- n);
-}
-
-static inline int
-kptllnd_peer_reserve_buffers(void)
-{
- return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peercredits);
-}
-
-static inline void
-kptllnd_peer_unreserve_buffers(void)
-{
- kptllnd_rx_buffer_pool_unreserve(&kptllnd_data.kptl_rx_buffer_pool,
- *kptllnd_tunables.kptl_peercredits);
-}
-
-/*
- * TX SUPPORT FUNCTIONS
- */
-int kptllnd_setup_tx_descs(void);
-void kptllnd_cleanup_tx_descs(void);
-void kptllnd_tx_fini(kptl_tx_t *tx);
-kptl_tx_t *kptllnd_get_idle_tx(enum kptl_tx_type purpose);
-void kptllnd_tx_callback(ptl_event_t *ev);
-const char *kptllnd_tx_typestr(int type);
-
-static inline void
-kptllnd_tx_addref(kptl_tx_t *tx)
-{
- atomic_inc(&tx->tx_refcount);
-}
-
-static inline void
-kptllnd_tx_decref(kptl_tx_t *tx)
-{
- LASSERT (!in_interrupt()); /* Thread context only */
-
- if (atomic_dec_and_test(&tx->tx_refcount))
- kptllnd_tx_fini(tx);
-}
-
-/*
- * MESSAGE SUPPORT FUNCTIONS
- */
-void kptllnd_init_msg(kptl_msg_t *msg, int type, int body_nob);
-void kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer);
-int kptllnd_msg_unpack(kptl_msg_t *msg, int nob);
-
-/*
- * MISC SUPPORT FUNCTIONS
- */
-void kptllnd_init_rdma_md(kptl_tx_t *tx, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int nob);
-char *kptllnd_ptlid2str(ptl_process_id_t id);
-
-void kptllnd_init_ptltrace(void);
-void kptllnd_dump_ptltrace(void);
-
-#ifdef PJK_DEBUGGING
-#define SIMULATION_FAIL_TX_PUT_ALLOC 0 /* 0x00000001 */
-#define SIMULATION_FAIL_TX_GET_ALLOC 1 /* 0x00000002 */
-#define SIMULATION_FAIL_TX 2 /* 0x00000004 */
-#define SIMULATION_FAIL_RX_ALLOC 3 /* 0x00000008 */
-
-#define IS_SIMULATION_ENABLED(x) \
- (((*kptllnd_tunables.kptl_simulation_bitmap) & 1<< SIMULATION_##x) != 0)
-#else
-#define IS_SIMULATION_ENABLED(x) 0
-#endif
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-#include "ptllnd.h"
-
-#ifndef _USING_LUSTRE_PORTALS_
-int
-kptllnd_extract_iov (int dst_niov, ptl_md_iovec_t *dst,
- int src_niov, struct iovec *src,
- unsigned int offset, unsigned int len)
-{
- /* Initialise 'dst' to the subset of 'src' starting at 'offset',
- * for exactly 'len' bytes, and return the number of entries.
- * NB not destructive to 'src' */
- unsigned int frag_len;
- unsigned int niov;
-
- if (len == 0) /* no data => */
- return (0); /* no frags */
-
- LASSERT (src_niov > 0);
- while (offset >= src->iov_len) { /* skip initial frags */
- offset -= src->iov_len;
- src_niov--;
- src++;
- LASSERT (src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT (src_niov > 0);
- LASSERT (niov <= dst_niov);
-
- frag_len = src->iov_len - offset;
- dst->iov_base = ((char *)src->iov_base) + offset;
-
- if (len <= frag_len) {
- dst->iov_len = len;
- return (niov);
- }
-
- dst->iov_len = frag_len;
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-
-int
-kptllnd_extract_phys (int dst_niov, ptl_md_iovec_t *dst,
- int src_niov, lnet_kiov_t *src,
- unsigned int offset, unsigned int len)
-{
- /* Initialise 'dst' to the physical addresses of the subset of 'src'
- * starting at 'offset', for exactly 'len' bytes, and return the number
- * of entries. NB not destructive to 'src' */
- unsigned int frag_len;
- unsigned int niov;
- __u64 phys_page;
- __u64 phys;
-
- if (len == 0) /* no data => */
- return (0); /* no frags */
-
- LASSERT (src_niov > 0);
- while (offset >= src->kiov_len) { /* skip initial frags */
- offset -= src->kiov_len;
- src_niov--;
- src++;
- LASSERT (src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT (src_niov > 0);
- LASSERT (niov <= dst_niov);
-
- frag_len = min(src->kiov_len - offset, len);
- phys_page = lnet_page2phys(src->kiov_page);
- phys = phys_page + src->kiov_offset + offset;
-
- LASSERT (sizeof(void *) > 4 ||
- (phys <= 0xffffffffULL &&
- phys + (frag_len - 1) <= 0xffffffffULL));
-
- dst->iov_base = (void *)((unsigned long)phys);
- dst->iov_len = frag_len;
-
- if (frag_len == len)
- return niov;
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-#endif
-
-void
-kptllnd_init_rdma_md(kptl_tx_t *tx, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int nob)
-{
- LASSERT (iov == NULL || kiov == NULL);
-
- memset(&tx->tx_rdma_md, 0, sizeof(tx->tx_rdma_md));
-
- tx->tx_rdma_md.start = tx->tx_frags;
- tx->tx_rdma_md.user_ptr = &tx->tx_rdma_eventarg;
- tx->tx_rdma_md.eq_handle = kptllnd_data.kptl_eqh;
- tx->tx_rdma_md.options = PTL_MD_LUSTRE_COMPLETION_SEMANTICS |
- PTL_MD_EVENT_START_DISABLE;
- switch (tx->tx_type) {
- default:
- LBUG();
-
- case TX_TYPE_PUT_REQUEST: /* passive: peer gets */
- tx->tx_rdma_md.threshold = 1; /* GET event */
- tx->tx_rdma_md.options |= PTL_MD_OP_GET;
- break;
-
- case TX_TYPE_GET_REQUEST: /* passive: peer puts */
- tx->tx_rdma_md.threshold = 1; /* PUT event */
- tx->tx_rdma_md.options |= PTL_MD_OP_PUT;
- break;
-
- case TX_TYPE_PUT_RESPONSE: /* active: I get */
- tx->tx_rdma_md.threshold = 2; /* SEND + REPLY */
- break;
-
- case TX_TYPE_GET_RESPONSE: /* active: I put */
- tx->tx_rdma_md.threshold = tx->tx_acked ? 2 : 1; /* SEND + ACK? */
- break;
- }
-
- if (nob == 0) {
- tx->tx_rdma_md.length = 0;
- return;
- }
-
-#ifdef _USING_LUSTRE_PORTALS_
- if (iov != NULL) {
- tx->tx_rdma_md.options |= PTL_MD_IOVEC;
- tx->tx_rdma_md.length =
- lnet_extract_iov(PTL_MD_MAX_IOV, tx->tx_frags->iov,
- niov, iov, offset, nob);
- return;
- }
-
- /* Cheating OK since ptl_kiov_t == lnet_kiov_t */
- CLASSERT(sizeof(ptl_kiov_t) == sizeof(lnet_kiov_t));
- CLASSERT(offsetof(ptl_kiov_t, kiov_offset) ==
- offsetof(lnet_kiov_t, kiov_offset));
- CLASSERT(offsetof(ptl_kiov_t, kiov_page) ==
- offsetof(lnet_kiov_t, kiov_page));
- CLASSERT(offsetof(ptl_kiov_t, kiov_len) ==
- offsetof(lnet_kiov_t, kiov_len));
-
- tx->tx_rdma_md.options |= PTL_MD_KIOV;
- tx->tx_rdma_md.length =
- lnet_extract_kiov(PTL_MD_MAX_IOV, tx->tx_frags->kiov,
- niov, kiov, offset, nob);
-#else
- if (iov != NULL) {
- tx->tx_rdma_md.options |= PTL_MD_IOVEC;
- tx->tx_rdma_md.length =
- kptllnd_extract_iov(PTL_MD_MAX_IOV, tx->tx_frags->iov,
- niov, iov, offset, nob);
- return;
- }
-
- tx->tx_rdma_md.options |= PTL_MD_IOVEC | PTL_MD_PHYS;
- tx->tx_rdma_md.length =
- kptllnd_extract_phys(PTL_MD_MAX_IOV, tx->tx_frags->iov,
- niov, kiov, offset, nob);
-#endif
-}
-
-int
-kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, int nob)
-{
- kptl_tx_t *tx;
- ptl_err_t ptlrc;
- kptl_msg_t *rxmsg = rx->rx_msg;
- kptl_peer_t *peer = rx->rx_peer;
- unsigned long flags;
- ptl_handle_md_t mdh;
-
- LASSERT (type == TX_TYPE_PUT_RESPONSE ||
- type == TX_TYPE_GET_RESPONSE);
-
- tx = kptllnd_get_idle_tx(type);
- if (tx == NULL) {
- CERROR ("Can't do %s rdma to %s: can't allocate descriptor\n",
- type == TX_TYPE_PUT_RESPONSE ? "GET" : "PUT",
- libcfs_id2str(peer->peer_id));
- return -ENOMEM;
- }
-
- kptllnd_set_tx_peer(tx, peer);
- kptllnd_init_rdma_md(tx, niov, iov, kiov, offset, nob);
-
- ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md,
- PTL_UNLINK, &mdh);
- if (ptlrc != PTL_OK) {
- CERROR("PtlMDBind(%s) failed: %s(%d)\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_errtype2str(ptlrc), ptlrc);
- tx->tx_status = -EIO;
- kptllnd_tx_decref(tx);
- return -EIO;
- }
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- tx->tx_lnet_msg = lntmsg;
- /* lnet_finalize() will be called when tx is torn down, so I must
- * return success from here on... */
-
- tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ);
- tx->tx_rdma_mdh = mdh;
- tx->tx_active = 1;
- list_add_tail(&tx->tx_list, &peer->peer_activeq);
-
- /* peer has now got my ref on 'tx' */
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- tx->tx_tposted = jiffies;
-
- if (type == TX_TYPE_GET_RESPONSE)
- ptlrc = PtlPut(mdh,
- tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ,
- rx->rx_initiator,
- *kptllnd_tunables.kptl_portal,
- 0, /* acl cookie */
- rxmsg->ptlm_u.rdma.kptlrm_matchbits,
- 0, /* offset */
- (lntmsg != NULL) ? /* header data */
- PTLLND_RDMA_OK :
- PTLLND_RDMA_FAIL);
- else
- ptlrc = PtlGet(mdh,
- rx->rx_initiator,
- *kptllnd_tunables.kptl_portal,
- 0, /* acl cookie */
- rxmsg->ptlm_u.rdma.kptlrm_matchbits,
- 0); /* offset */
-
- if (ptlrc != PTL_OK) {
- CERROR("Ptl%s failed: %s(%d)\n",
- (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get",
- kptllnd_errtype2str(ptlrc), ptlrc);
-
- kptllnd_peer_close(peer, -EIO);
- /* Everything (including this RDMA) queued on the peer will
- * be completed with failure */
- }
-
- return 0;
-}
-
-int
-kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- lnet_hdr_t *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- kptl_peer_t *peer;
- kptl_tx_t *tx;
- int nob;
- int nfrag;
- int rc;
-
- LASSERT (payload_nob == 0 || payload_niov > 0);
- LASSERT (payload_niov <= LNET_MAX_IOV);
- LASSERT (payload_niov <= PTL_MD_MAX_IOV); /* !!! */
- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
- LASSERT (!in_interrupt());
-
- rc = kptllnd_find_target(&peer, target);
- if (rc != 0)
- return rc;
-
- switch (type) {
- default:
- LBUG();
- return -EINVAL;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- /* Should the payload avoid RDMA? */
- nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[payload_nob]);
- if (payload_kiov == NULL &&
- nob <= peer->peer_max_msg_size)
- break;
-
- tx = kptllnd_get_idle_tx(TX_TYPE_PUT_REQUEST);
- if (tx == NULL) {
- CERROR("Can't send %s to %s: can't allocate descriptor\n",
- lnet_msgtyp2str(type),
- libcfs_id2str(target));
- rc = -ENOMEM;
- goto out;
- }
-
- kptllnd_init_rdma_md(tx, payload_niov,
- payload_iov, payload_kiov,
- payload_offset, payload_nob);
-
- tx->tx_lnet_msg = lntmsg;
- tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr;
- kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_PUT,
- sizeof(kptl_rdma_msg_t));
-
- CDEBUG(D_NETTRACE, "%s: passive PUT p %d %p\n",
- libcfs_id2str(target),
- le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index), tx);
-
- kptllnd_tx_launch(peer, tx, 0);
- goto out;
-
- case LNET_MSG_GET:
- /* routed gets don't RDMA */
- if (target_is_router || routing)
- break;
-
- /* Is the payload small enough not to need RDMA? */
- nob = lntmsg->msg_md->md_length;
- nob = offsetof(kptl_msg_t,
- ptlm_u.immediate.kptlim_payload[nob]);
- if (nob <= peer->peer_max_msg_size)
- break;
-
- tx = kptllnd_get_idle_tx(TX_TYPE_GET_REQUEST);
- if (tx == NULL) {
- CERROR("Can't send GET to %s: can't allocate descriptor\n",
- libcfs_id2str(target));
- rc = -ENOMEM;
- goto out;
- }
-
- tx->tx_lnet_replymsg =
- lnet_create_reply_msg(kptllnd_data.kptl_ni, lntmsg);
- if (tx->tx_lnet_replymsg == NULL) {
- CERROR("Failed to allocate LNET reply for %s\n",
- libcfs_id2str(target));
- kptllnd_tx_decref(tx);
- rc = -ENOMEM;
- goto out;
- }
-
- if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
- kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.iov, NULL,
- 0, lntmsg->msg_md->md_length);
- else
- kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov,
- NULL, lntmsg->msg_md->md_iov.kiov,
- 0, lntmsg->msg_md->md_length);
-
- tx->tx_lnet_msg = lntmsg;
- tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr;
- kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_GET,
- sizeof(kptl_rdma_msg_t));
-
- CDEBUG(D_NETTRACE, "%s: passive GET p %d %p\n",
- libcfs_id2str(target),
- le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index), tx);
-
- kptllnd_tx_launch(peer, tx, 0);
- goto out;
-
- case LNET_MSG_ACK:
- CDEBUG(D_NET, "LNET_MSG_ACK\n");
- LASSERT (payload_nob == 0);
- break;
- }
-
- /* I don't have to handle kiovs */
- LASSERT (payload_nob == 0 || payload_iov != NULL);
-
- tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE);
- if (tx == NULL) {
- CERROR("Can't send %s to %s: can't allocate descriptor\n",
- lnet_msgtyp2str(type), libcfs_id2str(target));
- rc = -ENOMEM;
- goto out;
- }
-
- tx->tx_lnet_msg = lntmsg;
- tx->tx_msg->ptlm_u.immediate.kptlim_hdr = *hdr;
-
- if (payload_nob == 0) {
- nfrag = 0;
- } else {
- tx->tx_frags->iov[0].iov_base = tx->tx_msg;
- tx->tx_frags->iov[0].iov_len = offsetof(kptl_msg_t,
- ptlm_u.immediate.kptlim_payload);
-
- /* NB relying on lustre not asking for PTL_MD_MAX_IOV
- * fragments!! */
-#ifdef _USING_LUSTRE_PORTALS_
- nfrag = 1 + lnet_extract_iov(PTL_MD_MAX_IOV - 1,
- &tx->tx_frags->iov[1],
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-#else
- nfrag = 1 + kptllnd_extract_iov(PTL_MD_MAX_IOV - 1,
- &tx->tx_frags->iov[1],
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-#endif
- }
-
- nob = offsetof(kptl_immediate_msg_t, kptlim_payload[payload_nob]);
- kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_IMMEDIATE, nob);
-
- CDEBUG(D_NETTRACE, "%s: immediate %s p %d %p\n",
- libcfs_id2str(target),
- lnet_msgtyp2str(lntmsg->msg_type),
- (le32_to_cpu(lntmsg->msg_type) == LNET_MSG_PUT) ?
- le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index) :
- (le32_to_cpu(lntmsg->msg_type) == LNET_MSG_GET) ?
- le32_to_cpu(lntmsg->msg_hdr.msg.get.ptl_index) : -1,
- tx);
-
- kptllnd_tx_launch(peer, tx, nfrag);
-
- out:
- kptllnd_peer_decref(peer);
- return rc;
-}
-
-int
-kptllnd_eager_recv(struct lnet_ni *ni, void *private,
- lnet_msg_t *msg, void **new_privatep)
-{
- kptl_rx_t *rx = private;
-
- CDEBUG(D_NET, "Eager RX=%p RXB=%p\n", rx, rx->rx_rxb);
-
- /* I have to release my ref on rxb (if I have one) to ensure I'm an
- * eager receiver, so I copy the incoming request from the buffer it
- * landed in, into space reserved in the descriptor... */
-
-#if (PTL_MD_LOCAL_ALIGN8 == 0)
- if (rx->rx_rxb == NULL) /* already copied */
- return 0; /* to fix alignment */
-#else
- LASSERT(rx->rx_rxb != NULL);
-#endif
- LASSERT(rx->rx_nob <= *kptllnd_tunables.kptl_max_msg_size);
-
- memcpy(rx->rx_space, rx->rx_msg, rx->rx_nob);
- rx->rx_msg = (kptl_msg_t *)rx->rx_space;
-
- kptllnd_rx_buffer_decref(rx->rx_rxb);
- rx->rx_rxb = NULL;
-
- return 0;
-}
-
-
-int
-kptllnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- kptl_rx_t *rx = private;
- kptl_msg_t *rxmsg = rx->rx_msg;
- int nob;
- int rc;
-
- CDEBUG(D_NET, "%s niov=%d offset=%d mlen=%d rlen=%d\n",
- kptllnd_msgtype2str(rxmsg->ptlm_type),
- niov, offset, mlen, rlen);
-
- LASSERT (mlen <= rlen);
- LASSERT (mlen >= 0);
- LASSERT (!in_interrupt());
- LASSERT (!(kiov != NULL && iov != NULL)); /* never both */
- LASSERT (niov <= PTL_MD_MAX_IOV); /* !!! */
-
-#ifdef CRAY_XT3
- if (lntmsg != NULL &&
- rx->rx_uid != 0) {
- /* Set the UID if the sender's uid isn't 0; i.e. non-root
- * running in userspace (e.g. a catamount node; linux kernel
- * senders, including routers have uid 0). If this is a lustre
- * RPC request, this tells lustre not to trust the creds in the
- * RPC message body. */
- lnet_set_msg_uid(ni, lntmsg, rx->rx_uid);
- }
-#endif
- switch(rxmsg->ptlm_type)
- {
- default:
- LBUG();
- rc = -EINVAL;
- break;
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE %d,%d\n", mlen, rlen);
-
- nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[rlen]);
- if (nob > rx->rx_nob) {
- CERROR ("Immediate message from %s too big: %d(%d)\n",
- libcfs_id2str(rx->rx_peer->peer_id), nob,
- rx->rx_nob);
- rc = -EINVAL;
- break;
- }
-
- if (kiov != NULL)
- lnet_copy_flat2kiov(
- niov, kiov, offset,
- *kptllnd_tunables.kptl_max_msg_size,
- rxmsg->ptlm_u.immediate.kptlim_payload,
- 0,
- mlen);
- else
- lnet_copy_flat2iov(
- niov, iov, offset,
- *kptllnd_tunables.kptl_max_msg_size,
- rxmsg->ptlm_u.immediate.kptlim_payload,
- 0,
- mlen);
-
- lnet_finalize (ni, lntmsg, 0);
- rc = 0;
- break;
-
- case PTLLND_MSG_TYPE_GET:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_GET %d,%d\n", mlen, rlen);
-
- /* NB always send RDMA so the peer can complete. I send
- * success/failure in the portals 'hdr_data' */
-
- if (lntmsg == NULL)
- rc = kptllnd_active_rdma(rx, NULL,
- TX_TYPE_GET_RESPONSE,
- 0, NULL, NULL, 0, 0);
- else
- rc = kptllnd_active_rdma(rx, lntmsg,
- TX_TYPE_GET_RESPONSE,
- lntmsg->msg_niov,
- lntmsg->msg_iov,
- lntmsg->msg_kiov,
- lntmsg->msg_offset,
- lntmsg->msg_len);
- break;
-
- case PTLLND_MSG_TYPE_PUT:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_PUT %d,%d\n", mlen, rlen);
-
- /* NB always send RDMA so the peer can complete; it'll be 0
- * bytes if there was no match (lntmsg == NULL). I have no way
- * to let my peer know this, but she's only interested in when
- * the net has stopped accessing her buffer in any case. */
-
- rc = kptllnd_active_rdma(rx, lntmsg, TX_TYPE_PUT_RESPONSE,
- niov, iov, kiov, offset, mlen);
- break;
- }
-
- /*
- * We're done with the RX
- */
- kptllnd_rx_done(rx, PTLLND_POSTRX_PEER_CREDIT);
- return rc;
-}
-
-void
-kptllnd_eq_callback(ptl_event_t *ev)
-{
- kptl_eventarg_t *eva = ev->md.user_ptr;
-
- switch (eva->eva_type) {
- default:
- LBUG();
-
- case PTLLND_EVENTARG_TYPE_MSG:
- case PTLLND_EVENTARG_TYPE_RDMA:
- kptllnd_tx_callback(ev);
- break;
-
- case PTLLND_EVENTARG_TYPE_BUF:
- kptllnd_rx_buffer_callback(ev);
- break;
- }
-}
-
-void
-kptllnd_thread_fini (void)
-{
- atomic_dec(&kptllnd_data.kptl_nthreads);
-}
-
-int
-kptllnd_thread_start (int (*fn)(void *arg), void *arg)
-{
- long pid;
-
- atomic_inc(&kptllnd_data.kptl_nthreads);
-
- pid = kernel_thread (fn, arg, 0);
- if (pid >= 0)
- return 0;
-
- CERROR("Failed to start kernel_thread: error %d\n", (int)pid);
- kptllnd_thread_fini();
- return (int)pid;
-}
-
-int
-kptllnd_watchdog(void *arg)
-{
- int id = (long)arg;
- char name[16];
- wait_queue_t waitlink;
- int stamp = 0;
- int peer_index = 0;
- unsigned long deadline = jiffies;
- int timeout;
- int i;
-
- snprintf(name, sizeof(name), "kptllnd_wd_%02d", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- init_waitqueue_entry(&waitlink, current);
-
- /* threads shut down in phase 2 after all peers have been destroyed */
- while (kptllnd_data.kptl_shutdown < 2) {
-
- timeout = (int)(deadline - jiffies);
-
- if (timeout <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = kptllnd_data.kptl_peer_hash_size;
-
-
- /* Time to check for RDMA timeouts on a few more
- * peers: I do checks every 'p' seconds on a
- * proportion of the peer table and I need to check
- * every connection 'n' times within a timeout
- * interval, to ensure I detect a timeout on any
- * connection within (n+1)/n times the timeout
- * interval. */
-
- if ((*kptllnd_tunables.kptl_timeout) > n * p)
- chunk = (chunk * n * p) /
- (*kptllnd_tunables.kptl_timeout);
- if (chunk == 0)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kptllnd_peer_check_bucket(peer_index, stamp);
- peer_index = (peer_index + 1) %
- kptllnd_data.kptl_peer_hash_size;
- }
-
- deadline += p * HZ;
- stamp++;
- continue;
- }
-
- kptllnd_handle_closing_peers();
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&kptllnd_data.kptl_watchdog_waitq,
- &waitlink);
-
- schedule_timeout(timeout);
-
- set_current_state (TASK_RUNNING);
- remove_wait_queue(&kptllnd_data.kptl_watchdog_waitq, &waitlink);
- }
-
- kptllnd_thread_fini();
- CDEBUG(D_NET, "<<<\n");
- return (0);
-};
-
-int
-kptllnd_scheduler (void *arg)
-{
- int id = (long)arg;
- char name[16];
- wait_queue_t waitlink;
- unsigned long flags;
- int did_something;
- int counter = 0;
- kptl_rx_t *rx;
- kptl_rx_buffer_t *rxb;
- kptl_tx_t *tx;
-
- snprintf(name, sizeof(name), "kptllnd_sd_%02d", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- init_waitqueue_entry(&waitlink, current);
-
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags);
-
- /* threads shut down in phase 2 after all peers have been destroyed */
- while (kptllnd_data.kptl_shutdown < 2) {
-
- did_something = 0;
-
- if (!list_empty(&kptllnd_data.kptl_sched_rxq)) {
- rx = list_entry (kptllnd_data.kptl_sched_rxq.next,
- kptl_rx_t, rx_list);
- list_del(&rx->rx_list);
-
- spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock,
- flags);
-
- kptllnd_rx_parse(rx);
- did_something = 1;
-
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags);
- }
-
- if (!list_empty(&kptllnd_data.kptl_sched_rxbq)) {
- rxb = list_entry (kptllnd_data.kptl_sched_rxbq.next,
- kptl_rx_buffer_t, rxb_repost_list);
- list_del(&rxb->rxb_repost_list);
-
- spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock,
- flags);
-
- kptllnd_rx_buffer_post(rxb);
- did_something = 1;
-
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags);
- }
-
- if (!list_empty(&kptllnd_data.kptl_sched_txq)) {
- tx = list_entry (kptllnd_data.kptl_sched_txq.next,
- kptl_tx_t, tx_list);
- list_del_init(&tx->tx_list);
-
- spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags);
-
- kptllnd_tx_fini(tx);
- did_something = 1;
-
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags);
- }
-
- if (did_something) {
- if (++counter != *kptllnd_tunables.kptl_reschedule_loops)
- continue;
- }
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&kptllnd_data.kptl_sched_waitq,
- &waitlink);
- spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags);
-
- if (!did_something)
- schedule();
- else
- cond_resched();
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&kptllnd_data.kptl_sched_waitq, &waitlink);
-
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags);
-
- counter = 0;
- }
-
- spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags);
-
- kptllnd_thread_fini();
- return 0;
-}
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-
-#include "ptllnd.h"
-
-static int ntx = 256;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
- "# of TX descriptors");
-
-static int max_nodes = 1152;
-CFS_MODULE_PARM(max_nodes, "i", int, 0444,
- "maximum number of peer nodes");
-
-static int max_procs_per_node = 2;
-CFS_MODULE_PARM(max_procs_per_node, "i", int, 0444,
- "maximum number of processes per peer node to cache");
-
-static int checksum = 0;
-CFS_MODULE_PARM(checksum, "i", int, 0644,
- "set non-zero to enable message (not RDMA) checksums");
-
-static int timeout = 50;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
- "timeout (seconds)");
-
-static int portal = PTLLND_PORTAL; /* <lnet/ptllnd_wire.h> */
-CFS_MODULE_PARM(portal, "i", int, 0444,
- "portal id");
-
-static int pid = PTLLND_PID; /* <lnet/ptllnd_wire.h> */
-CFS_MODULE_PARM(pid, "i", int, 0444,
- "portals pid");
-
-static int rxb_npages = 1;
-CFS_MODULE_PARM(rxb_npages, "i", int, 0444,
- "# of pages per rx buffer");
-
-static int rxb_nspare = 8;
-CFS_MODULE_PARM(rxb_nspare, "i", int, 0444,
- "# of spare rx buffers");
-
-static int credits = 128;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "concurrent sends");
-
-static int peercredits = PTLLND_PEERCREDITS; /* <lnet/ptllnd_wire.h> */
-CFS_MODULE_PARM(peercredits, "i", int, 0444,
- "concurrent sends to 1 peer");
-
-static int max_msg_size = PTLLND_MAX_KLND_MSG_SIZE; /* <lnet/ptllnd_wire.h> */
-CFS_MODULE_PARM(max_msg_size, "i", int, 0444,
- "max size of immediate message");
-
-static int peer_hash_table_size = 101;
-CFS_MODULE_PARM(peer_hash_table_size, "i", int, 0444,
- "# of slots in the peer hash table");
-
-static int reschedule_loops = 100;
-CFS_MODULE_PARM(reschedule_loops, "i", int, 0644,
- "# of loops before scheduler does cond_resched()");
-
-static int ack_puts = 0;
-CFS_MODULE_PARM(ack_puts, "i", int, 0644,
- "get portals to ack all PUTs");
-
-#ifdef CRAY_XT3
-static int ptltrace_on_timeout = 0;
-CFS_MODULE_PARM(ptltrace_on_timeout, "i", int, 0644,
- "dump ptltrace on timeout");
-
-static char *ptltrace_basename = "/tmp/lnet-ptltrace";
-CFS_MODULE_PARM(ptltrace_basename, "s", charp, 0644,
- "ptltrace dump file basename");
-#endif
-#ifdef PJK_DEBUGGING
-static int simulation_bitmap = 0;
-CFS_MODULE_PARM(simulation_bitmap, "i", int, 0444,
- "simulation bitmap");
-#endif
-
-
-kptl_tunables_t kptllnd_tunables = {
- .kptl_ntx = &ntx,
- .kptl_max_nodes = &max_nodes,
- .kptl_max_procs_per_node = &max_procs_per_node,
- .kptl_checksum = &checksum,
- .kptl_portal = &portal,
- .kptl_pid = &pid,
- .kptl_timeout = &timeout,
- .kptl_rxb_npages = &rxb_npages,
- .kptl_rxb_nspare = &rxb_nspare,
- .kptl_credits = &credits,
- .kptl_peercredits = &peercredits,
- .kptl_max_msg_size = &max_msg_size,
- .kptl_peer_hash_table_size = &peer_hash_table_size,
- .kptl_reschedule_loops = &reschedule_loops,
- .kptl_ack_puts = &ack_puts,
-#ifdef CRAY_XT3
- .kptl_ptltrace_on_timeout = &ptltrace_on_timeout,
- .kptl_ptltrace_basename = &ptltrace_basename,
-#endif
-#ifdef PJK_DEBUGGING
- .kptl_simulation_bitmap = &simulation_bitmap,
-#endif
-};
-
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-#ifdef CRAY_XT3
-static char ptltrace_basename_space[1024];
-
-static void
-kptllnd_init_strtunable(char **str_param, char *space, int size)
-{
- strncpy(space, *str_param, size);
- space[size - 1] = 0;
- *str_param = space;
-}
-#endif
-
-static cfs_sysctl_table_t kptllnd_ctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 2,
- .procname = "max_nodes",
- .data = &max_nodes,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 3,
- .procname = "max_procs_per_node",
- .data = &max_procs_per_node,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 4,
- .procname = "checksum",
- .data = &checksum,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 5,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 6,
- .procname = "portal",
- .data = &portal,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 7,
- .procname = "pid",
- .data = &pid,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 8,
- .procname = "rxb_npages",
- .data = &rxb_npages,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 9,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 10,
- .procname = "peercredits",
- .data = &peercredits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 11,
- .procname = "max_msg_size",
- .data = &max_msg_size,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 12,
- .procname = "peer_hash_table_size",
- .data = &peer_hash_table_size,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 13,
- .procname = "reschedule_loops",
- .data = &reschedule_loops,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 14,
- .procname = "ack_puts",
- .data = &ack_puts,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
-#ifdef CRAY_XT3
- {
- .ctl_name = 15,
- .procname = "ptltrace_on_timeout",
- .data = &ptltrace_on_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 16,
- .procname = "ptltrace_basename",
- .data = ptltrace_basename_space,
- .maxlen = sizeof(ptltrace_basename_space),
- .mode = 0644,
- .proc_handler = &proc_dostring,
- .strategy = &sysctl_string
- },
-#endif
-#ifdef PJK_DEBUGGING
- {
- .ctl_name = 17,
- .procname = "simulation_bitmap",
- .data = &simulation_bitmap,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
-#endif
-
- {0}
-};
-
-static cfs_sysctl_table_t kptllnd_top_ctl_table[] = {
- {
- .ctl_name = 203,
- .procname = "ptllnd",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = kptllnd_ctl_table
- },
- {0}
-};
-
-int
-kptllnd_tunables_init ()
-{
-#ifdef CRAY_XT3
- kptllnd_init_strtunable(&ptltrace_basename,
- ptltrace_basename_space,
- sizeof(ptltrace_basename_space));
-#endif
- kptllnd_tunables.kptl_sysctl =
- cfs_register_sysctl_table(kptllnd_top_ctl_table, 0);
-
- if (kptllnd_tunables.kptl_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-
- return 0;
-}
-
-void
-kptllnd_tunables_fini ()
-{
- if (kptllnd_tunables.kptl_sysctl != NULL)
- cfs_unregister_sysctl_table(kptllnd_tunables.kptl_sysctl);
-}
-
-#else
-
-int
-kptllnd_tunables_init ()
-{
- return 0;
-}
-
-void
-kptllnd_tunables_fini ()
-{
-}
-
-#endif
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- * E Barton <eeb@bartonsoftware.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-#include "ptllnd.h"
-#include <libcfs/list.h>
-
-static int
-kptllnd_count_queue(struct list_head *q)
-{
- struct list_head *e;
- int n = 0;
-
- list_for_each(e, q) {
- n++;
- }
-
- return n;
-}
-
-int
-kptllnd_get_peer_info(int index,
- lnet_process_id_t *id,
- int *state, int *sent_hello,
- int *refcount, __u64 *incarnation,
- __u64 *next_matchbits, __u64 *last_matchbits_seen,
- int *nsendq, int *nactiveq,
- int *credits, int *outstanding_credits)
-{
- rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock;
- unsigned long flags;
- struct list_head *ptmp;
- kptl_peer_t *peer;
- int i;
- int rc = -ENOENT;
-
- read_lock_irqsave(g_lock, flags);
-
- for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) {
-
- list_for_each (ptmp, &kptllnd_data.kptl_peers[i]) {
- peer = list_entry(ptmp, kptl_peer_t, peer_list);
-
- if (index-- > 0)
- continue;
-
- *id = peer->peer_id;
- *state = peer->peer_state;
- *sent_hello = peer->peer_sent_hello;
- *refcount = atomic_read(&peer->peer_refcount);
- *incarnation = peer->peer_incarnation;
-
- spin_lock(&peer->peer_lock);
-
- *next_matchbits = peer->peer_next_matchbits;
- *last_matchbits_seen = peer->peer_last_matchbits_seen;
- *credits = peer->peer_credits;
- *outstanding_credits = peer->peer_outstanding_credits;
-
- *nsendq = kptllnd_count_queue(&peer->peer_sendq);
- *nactiveq = kptllnd_count_queue(&peer->peer_activeq);
-
- spin_unlock(&peer->peer_lock);
-
- rc = 0;
- goto out;
- }
- }
-
- out:
- read_unlock_irqrestore(g_lock, flags);
- return rc;
-}
-
-void
-kptllnd_peer_add_peertable_locked (kptl_peer_t *peer)
-{
- LASSERT (!kptllnd_data.kptl_shutdown);
- LASSERT (kptllnd_data.kptl_n_active_peers <
- kptllnd_data.kptl_expected_peers);
-
- LASSERT (peer->peer_state == PEER_STATE_WAITING_HELLO ||
- peer->peer_state == PEER_STATE_ACTIVE);
-
- kptllnd_data.kptl_n_active_peers++;
- atomic_inc(&peer->peer_refcount); /* +1 ref for the list */
-
- /* NB add to HEAD of peer list for MRU order!
- * (see kptllnd_cull_peertable) */
- list_add(&peer->peer_list, kptllnd_nid2peerlist(peer->peer_id.nid));
-}
-
-void
-kptllnd_cull_peertable_locked (lnet_process_id_t pid)
-{
- /* I'm about to add a new peer with this portals ID to the peer table,
- * so (a) this peer should not exist already and (b) I want to leave at
- * most (max_procs_per_nid - 1) peers with this NID in the table. */
- struct list_head *peers = kptllnd_nid2peerlist(pid.nid);
- int cull_count = *kptllnd_tunables.kptl_max_procs_per_node;
- int count;
- struct list_head *tmp;
- struct list_head *nxt;
- kptl_peer_t *peer;
-
- count = 0;
- list_for_each_safe (tmp, nxt, peers) {
- /* NB I rely on kptllnd_peer_add_peertable_locked to add peers
- * in MRU order */
- peer = list_entry(tmp, kptl_peer_t, peer_list);
-
- if (peer->peer_id.nid != pid.nid)
- continue;
-
- LASSERT (peer->peer_id.pid != pid.pid);
-
- count++;
-
- if (count < cull_count) /* recent (don't cull) */
- continue;
-
- CDEBUG(D_NET, "Cull %s(%s)\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_ptlid2str(peer->peer_ptlid));
-
- kptllnd_peer_close_locked(peer, 0);
- }
-}
-
-kptl_peer_t *
-kptllnd_peer_allocate (lnet_process_id_t lpid, ptl_process_id_t ppid)
-{
- unsigned long flags;
- kptl_peer_t *peer;
-
- LIBCFS_ALLOC(peer, sizeof (*peer));
- if (peer == NULL) {
- CERROR("Can't create peer %s (%s)\n",
- libcfs_id2str(lpid),
- kptllnd_ptlid2str(ppid));
- return NULL;
- }
-
- memset(peer, 0, sizeof(*peer)); /* zero flags etc */
-
- INIT_LIST_HEAD (&peer->peer_noops);
- INIT_LIST_HEAD (&peer->peer_sendq);
- INIT_LIST_HEAD (&peer->peer_activeq);
- spin_lock_init (&peer->peer_lock);
-
- peer->peer_state = PEER_STATE_ALLOCATED;
- peer->peer_error = 0;
- peer->peer_last_alive = cfs_time_current();
- peer->peer_id = lpid;
- peer->peer_ptlid = ppid;
- peer->peer_credits = 1; /* enough for HELLO */
- peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS;
- peer->peer_outstanding_credits = *kptllnd_tunables.kptl_peercredits - 1;
- peer->peer_sent_credits = 1; /* HELLO credit is implicit */
- peer->peer_max_msg_size = PTLLND_MIN_BUFFER_SIZE; /* until we know better */
-
- atomic_set(&peer->peer_refcount, 1); /* 1 ref for caller */
-
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- peer->peer_myincarnation = kptllnd_data.kptl_incarnation;
-
- /* Only increase # peers under lock, to guarantee we dont grow it
- * during shutdown */
- if (kptllnd_data.kptl_shutdown) {
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock,
- flags);
- LIBCFS_FREE(peer, sizeof(*peer));
- return NULL;
- }
-
- kptllnd_data.kptl_npeers++;
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- return peer;
-}
-
-void
-kptllnd_peer_destroy (kptl_peer_t *peer)
-{
- unsigned long flags;
-
- CDEBUG(D_NET, "Peer=%p\n", peer);
-
- LASSERT (!in_interrupt());
- LASSERT (atomic_read(&peer->peer_refcount) == 0);
- LASSERT (peer->peer_state == PEER_STATE_ALLOCATED ||
- peer->peer_state == PEER_STATE_ZOMBIE);
- LASSERT (list_empty(&peer->peer_noops));
- LASSERT (list_empty(&peer->peer_sendq));
- LASSERT (list_empty(&peer->peer_activeq));
-
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- if (peer->peer_state == PEER_STATE_ZOMBIE)
- list_del(&peer->peer_list);
-
- kptllnd_data.kptl_npeers--;
-
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- LIBCFS_FREE (peer, sizeof (*peer));
-}
-
-void
-kptllnd_cancel_txlist (struct list_head *peerq, struct list_head *txs)
-{
- struct list_head *tmp;
- struct list_head *nxt;
- kptl_tx_t *tx;
-
- list_for_each_safe (tmp, nxt, peerq) {
- tx = list_entry(tmp, kptl_tx_t, tx_list);
-
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, txs);
-
- tx->tx_status = -EIO;
- tx->tx_active = 0;
- }
-}
-
-void
-kptllnd_peer_cancel_txs(kptl_peer_t *peer, struct list_head *txs)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- kptllnd_cancel_txlist(&peer->peer_noops, txs);
- kptllnd_cancel_txlist(&peer->peer_sendq, txs);
- kptllnd_cancel_txlist(&peer->peer_activeq, txs);
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-}
-
-void
-kptllnd_peer_alive (kptl_peer_t *peer)
-{
- /* This is racy, but everyone's only writing cfs_time_current() */
- peer->peer_last_alive = cfs_time_current();
- mb();
-}
-
-void
-kptllnd_peer_notify (kptl_peer_t *peer)
-{
- unsigned long flags;
- time_t last_alive = 0;
- int error = 0;
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- if (peer->peer_error != 0) {
- error = peer->peer_error;
- peer->peer_error = 0;
-
- last_alive = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() -
- peer->peer_last_alive);
- }
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- if (error != 0)
- lnet_notify (kptllnd_data.kptl_ni, peer->peer_id.nid, 0,
- last_alive);
-}
-
-void
-kptllnd_handle_closing_peers ()
-{
- unsigned long flags;
- struct list_head txs;
- kptl_peer_t *peer;
- struct list_head *tmp;
- struct list_head *nxt;
- kptl_tx_t *tx;
- int idle;
-
- /* Check with a read lock first to avoid blocking anyone */
-
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- idle = list_empty(&kptllnd_data.kptl_closing_peers) &&
- list_empty(&kptllnd_data.kptl_zombie_peers);
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- if (idle)
- return;
-
- INIT_LIST_HEAD(&txs);
-
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- /* Cancel txs on all zombie peers. NB anyone dropping the last peer
- * ref removes it from this list, so I musn't drop the lock while
- * scanning it. */
- list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) {
- peer = list_entry (tmp, kptl_peer_t, peer_list);
-
- LASSERT (peer->peer_state == PEER_STATE_ZOMBIE);
-
- kptllnd_peer_cancel_txs(peer, &txs);
- }
-
- /* Notify LNET and cancel txs on closing (i.e. newly closed) peers. NB
- * I'm the only one removing from this list, but peers can be added on
- * the end any time I drop the lock. */
-
- list_for_each_safe (tmp, nxt, &kptllnd_data.kptl_closing_peers) {
- peer = list_entry (tmp, kptl_peer_t, peer_list);
-
- LASSERT (peer->peer_state == PEER_STATE_CLOSING);
-
- list_del(&peer->peer_list);
- list_add_tail(&peer->peer_list,
- &kptllnd_data.kptl_zombie_peers);
- peer->peer_state = PEER_STATE_ZOMBIE;
-
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- kptllnd_peer_notify(peer);
- kptllnd_peer_cancel_txs(peer, &txs);
- kptllnd_peer_decref(peer);
-
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- }
-
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- /* Drop peer's ref on all cancelled txs. This will get
- * kptllnd_tx_fini() to abort outstanding comms if necessary. */
-
- list_for_each_safe (tmp, nxt, &txs) {
- tx = list_entry(tmp, kptl_tx_t, tx_list);
- list_del(&tx->tx_list);
- kptllnd_tx_decref(tx);
- }
-}
-
-void
-kptllnd_peer_close_locked(kptl_peer_t *peer, int why)
-{
- switch (peer->peer_state) {
- default:
- LBUG();
-
- case PEER_STATE_WAITING_HELLO:
- case PEER_STATE_ACTIVE:
- /* Ensure new peers see a new incarnation of me */
- LASSERT(peer->peer_myincarnation <= kptllnd_data.kptl_incarnation);
- if (peer->peer_myincarnation == kptllnd_data.kptl_incarnation)
- kptllnd_data.kptl_incarnation++;
-
- /* Removing from peer table */
- kptllnd_data.kptl_n_active_peers--;
- LASSERT (kptllnd_data.kptl_n_active_peers >= 0);
-
- list_del(&peer->peer_list);
- kptllnd_peer_unreserve_buffers();
-
- peer->peer_error = why; /* stash 'why' only on first close */
- peer->peer_state = PEER_STATE_CLOSING;
-
- /* Schedule for immediate attention, taking peer table's ref */
- list_add_tail(&peer->peer_list,
- &kptllnd_data.kptl_closing_peers);
- wake_up(&kptllnd_data.kptl_watchdog_waitq);
- break;
-
- case PEER_STATE_ZOMBIE:
- case PEER_STATE_CLOSING:
- break;
- }
-}
-
-void
-kptllnd_peer_close(kptl_peer_t *peer, int why)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- kptllnd_peer_close_locked(peer, why);
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-}
-
-int
-kptllnd_peer_del(lnet_process_id_t id)
-{
- struct list_head *ptmp;
- struct list_head *pnxt;
- kptl_peer_t *peer;
- int lo;
- int hi;
- int i;
- unsigned long flags;
- int rc = -ENOENT;
-
- /*
- * Find the single bucket we are supposed to look at or if nid is a
- * wildcard (LNET_NID_ANY) then look at all of the buckets
- */
- if (id.nid != LNET_NID_ANY) {
- struct list_head *l = kptllnd_nid2peerlist(id.nid);
-
- lo = hi = l - kptllnd_data.kptl_peers;
- } else {
- if (id.pid != LNET_PID_ANY)
- return -EINVAL;
-
- lo = 0;
- hi = kptllnd_data.kptl_peer_hash_size - 1;
- }
-
-again:
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kptllnd_data.kptl_peers[i]) {
- peer = list_entry (ptmp, kptl_peer_t, peer_list);
-
- if (!(id.nid == LNET_NID_ANY ||
- (peer->peer_id.nid == id.nid &&
- (id.pid == LNET_PID_ANY ||
- peer->peer_id.pid == id.pid))))
- continue;
-
- kptllnd_peer_addref(peer); /* 1 ref for me... */
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock,
- flags);
-
- kptllnd_peer_close(peer, 0);
- kptllnd_peer_decref(peer); /* ...until here */
-
- rc = 0; /* matched something */
-
- /* start again now I've dropped the lock */
- goto again;
- }
- }
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- return (rc);
-}
-
-void
-kptllnd_post_tx(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag)
-{
- /* CAVEAT EMPTOR: I take over caller's ref on 'tx' */
- ptl_handle_md_t msg_mdh;
- ptl_md_t md;
- ptl_err_t prc;
- unsigned long flags;
-
- LASSERT (!tx->tx_idle);
- LASSERT (!tx->tx_active);
- LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE));
- LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE));
- LASSERT (tx->tx_type == TX_TYPE_SMALL_MESSAGE ||
- tx->tx_type == TX_TYPE_PUT_REQUEST ||
- tx->tx_type == TX_TYPE_GET_REQUEST);
-
- kptllnd_set_tx_peer(tx, peer);
-
- memset(&md, 0, sizeof(md));
-
- md.threshold = tx->tx_acked ? 2 : 1; /* SEND END + ACK? */
- md.options = PTL_MD_OP_PUT |
- PTL_MD_LUSTRE_COMPLETION_SEMANTICS |
- PTL_MD_EVENT_START_DISABLE;
- md.user_ptr = &tx->tx_msg_eventarg;
- md.eq_handle = kptllnd_data.kptl_eqh;
-
- if (nfrag == 0) {
- md.start = tx->tx_msg;
- md.length = tx->tx_msg->ptlm_nob;
- } else {
- LASSERT (nfrag > 1);
- LASSERT (tx->tx_frags->iov[0].iov_base == (void *)tx->tx_msg);
-
- md.start = tx->tx_frags;
- md.length = nfrag;
- md.options |= PTL_MD_IOVEC;
- }
-
- prc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &msg_mdh);
- if (prc != PTL_OK) {
- CERROR("PtlMDBind(%s) failed: %s(%d)\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_errtype2str(prc), prc);
- tx->tx_status = -EIO;
- kptllnd_tx_decref(tx);
- return;
- }
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ);
- tx->tx_active = 1;
- tx->tx_msg_mdh = msg_mdh;
-
- /* Ensure HELLO is sent first */
- if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_NOOP)
- list_add(&tx->tx_list, &peer->peer_noops);
- else if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_HELLO)
- list_add(&tx->tx_list, &peer->peer_sendq);
- else
- list_add_tail(&tx->tx_list, &peer->peer_sendq);
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-}
-
-static inline int
-kptllnd_peer_send_noop (kptl_peer_t *peer)
-{
- if (!peer->peer_sent_hello ||
- peer->peer_credits == 0 ||
- !list_empty(&peer->peer_noops) ||
- peer->peer_outstanding_credits < PTLLND_CREDIT_HIGHWATER)
- return 0;
-
- /* No tx to piggyback NOOP onto or no credit to send a tx */
- return (list_empty(&peer->peer_sendq) || peer->peer_credits == 1);
-}
-
-void
-kptllnd_peer_check_sends (kptl_peer_t *peer)
-{
- ptl_handle_me_t meh;
- kptl_tx_t *tx;
- int rc;
- int msg_type;
- unsigned long flags;
-
- LASSERT(!in_interrupt());
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- peer->peer_retry_noop = 0;
-
- if (kptllnd_peer_send_noop(peer)) {
- /* post a NOOP to return credits */
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE);
- if (tx == NULL) {
- CERROR("Can't return credits to %s: can't allocate descriptor\n",
- libcfs_id2str(peer->peer_id));
- } else {
- kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_NOOP, 0);
- kptllnd_post_tx(peer, tx, 0);
- }
-
- spin_lock_irqsave(&peer->peer_lock, flags);
- peer->peer_retry_noop = (tx == NULL);
- }
-
- for (;;) {
- if (!list_empty(&peer->peer_noops)) {
- LASSERT (peer->peer_sent_hello);
- tx = list_entry(peer->peer_noops.next,
- kptl_tx_t, tx_list);
- } else if (!list_empty(&peer->peer_sendq)) {
- tx = list_entry(peer->peer_sendq.next,
- kptl_tx_t, tx_list);
- } else {
- /* nothing to send right now */
- break;
- }
-
- LASSERT (tx->tx_active);
- LASSERT (!PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE));
- LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE));
-
- LASSERT (peer->peer_outstanding_credits >= 0);
- LASSERT (peer->peer_sent_credits >= 0);
- LASSERT (peer->peer_sent_credits +
- peer->peer_outstanding_credits <=
- *kptllnd_tunables.kptl_peercredits);
- LASSERT (peer->peer_credits >= 0);
-
- msg_type = tx->tx_msg->ptlm_type;
-
- /* Ensure HELLO is sent first */
- if (!peer->peer_sent_hello) {
- LASSERT (list_empty(&peer->peer_noops));
- if (msg_type != PTLLND_MSG_TYPE_HELLO)
- break;
- peer->peer_sent_hello = 1;
- }
-
- if (peer->peer_credits == 0) {
- CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: no credits for %s[%p]\n",
- libcfs_id2str(peer->peer_id),
- peer->peer_credits,
- peer->peer_outstanding_credits,
- peer->peer_sent_credits,
- kptllnd_msgtype2str(msg_type), tx);
- break;
- }
-
- /* Last/Initial credit reserved for NOOP/HELLO */
- if (peer->peer_credits == 1 &&
- msg_type != PTLLND_MSG_TYPE_HELLO &&
- msg_type != PTLLND_MSG_TYPE_NOOP) {
- CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: "
- "not using last credit for %s[%p]\n",
- libcfs_id2str(peer->peer_id),
- peer->peer_credits,
- peer->peer_outstanding_credits,
- peer->peer_sent_credits,
- kptllnd_msgtype2str(msg_type), tx);
- break;
- }
-
- list_del(&tx->tx_list);
-
- /* Discard any NOOP I queued if I'm not at the high-water mark
- * any more or more messages have been queued */
- if (msg_type == PTLLND_MSG_TYPE_NOOP &&
- !kptllnd_peer_send_noop(peer)) {
- tx->tx_active = 0;
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- CDEBUG(D_NET, "%s: redundant noop\n",
- libcfs_id2str(peer->peer_id));
- kptllnd_tx_decref(tx);
-
- spin_lock_irqsave(&peer->peer_lock, flags);
- continue;
- }
-
- /* fill last-minute msg fields */
- kptllnd_msg_pack(tx->tx_msg, peer);
-
- if (tx->tx_type == TX_TYPE_PUT_REQUEST ||
- tx->tx_type == TX_TYPE_GET_REQUEST) {
- /* peer_next_matchbits must be known good */
- LASSERT (peer->peer_state >= PEER_STATE_ACTIVE);
- /* Assume 64-bit matchbits can't wrap */
- LASSERT (peer->peer_next_matchbits >= PTL_RESERVED_MATCHBITS);
- tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits =
- peer->peer_next_matchbits++;
- }
-
- peer->peer_sent_credits += peer->peer_outstanding_credits;
- peer->peer_outstanding_credits = 0;
- peer->peer_credits--;
-
- CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: %s tx=%p nob=%d cred=%d\n",
- libcfs_id2str(peer->peer_id), peer->peer_credits,
- peer->peer_outstanding_credits, peer->peer_sent_credits,
- kptllnd_msgtype2str(msg_type), tx, tx->tx_msg->ptlm_nob,
- tx->tx_msg->ptlm_credits);
-
- list_add_tail(&tx->tx_list, &peer->peer_activeq);
-
- kptllnd_tx_addref(tx); /* 1 ref for me... */
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- if (tx->tx_type == TX_TYPE_PUT_REQUEST ||
- tx->tx_type == TX_TYPE_GET_REQUEST) {
- /* Post bulk now we have safe matchbits */
- rc = PtlMEAttach(kptllnd_data.kptl_nih,
- *kptllnd_tunables.kptl_portal,
- peer->peer_ptlid,
- tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits,
- 0, /* ignore bits */
- PTL_UNLINK,
- PTL_INS_BEFORE,
- &meh);
- if (rc != PTL_OK) {
- CERROR("PtlMEAttach(%s) failed: %s(%d)\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_errtype2str(rc), rc);
- goto failed;
- }
-
- rc = PtlMDAttach(meh, tx->tx_rdma_md, PTL_UNLINK,
- &tx->tx_rdma_mdh);
- if (rc != PTL_OK) {
- CERROR("PtlMDAttach(%s) failed: %s(%d)\n",
- libcfs_id2str(tx->tx_peer->peer_id),
- kptllnd_errtype2str(rc), rc);
- rc = PtlMEUnlink(meh);
- LASSERT(rc == PTL_OK);
- tx->tx_rdma_mdh = PTL_INVALID_HANDLE;
- goto failed;
- }
- /* I'm not racing with the event callback here. It's a
- * bug if there's an event on the MD I just attached
- * before I actually send the RDMA request message -
- * probably matchbits re-used in error. */
- }
-
- tx->tx_tposted = jiffies; /* going on the wire */
-
- rc = PtlPut (tx->tx_msg_mdh,
- tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ,
- peer->peer_ptlid,
- *kptllnd_tunables.kptl_portal,
- 0, /* acl cookie */
- LNET_MSG_MATCHBITS,
- 0, /* offset */
- 0); /* header data */
- if (rc != PTL_OK) {
- CERROR("PtlPut %s error %s(%d)\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_errtype2str(rc), rc);
- goto failed;
- }
-
- kptllnd_tx_decref(tx); /* drop my ref */
-
- spin_lock_irqsave(&peer->peer_lock, flags);
- }
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- return;
-
- failed:
- /* Nuke everything (including tx we were trying) */
- kptllnd_peer_close(peer, -EIO);
- kptllnd_tx_decref(tx);
-}
-
-kptl_tx_t *
-kptllnd_find_timed_out_tx(kptl_peer_t *peer)
-{
- kptl_tx_t *tx;
- struct list_head *ele;
-
- list_for_each(ele, &peer->peer_sendq) {
- tx = list_entry(ele, kptl_tx_t, tx_list);
-
- if (time_after_eq(jiffies, tx->tx_deadline)) {
- kptllnd_tx_addref(tx);
- return tx;
- }
- }
-
- list_for_each(ele, &peer->peer_activeq) {
- tx = list_entry(ele, kptl_tx_t, tx_list);
-
- if (time_after_eq(jiffies, tx->tx_deadline)) {
- kptllnd_tx_addref(tx);
- return tx;
- }
- }
-
- return NULL;
-}
-
-
-void
-kptllnd_peer_check_bucket (int idx, int stamp)
-{
- struct list_head *peers = &kptllnd_data.kptl_peers[idx];
- struct list_head *ptmp;
- kptl_peer_t *peer;
- kptl_tx_t *tx;
- unsigned long flags;
- int nsend;
- int nactive;
- int check_sends;
-
- CDEBUG(D_NET, "Bucket=%d, stamp=%d\n", idx, stamp);
-
- again:
- /* NB. Shared lock while I just look */
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- list_for_each (ptmp, peers) {
- peer = list_entry (ptmp, kptl_peer_t, peer_list);
-
- CDEBUG(D_NET, "Peer=%s Credits=%d Outstanding=%d Send=%d\n",
- libcfs_id2str(peer->peer_id), peer->peer_credits,
- peer->peer_outstanding_credits, peer->peer_sent_credits);
-
- spin_lock(&peer->peer_lock);
-
- if (peer->peer_check_stamp == stamp) {
- /* checked already this pass */
- spin_unlock(&peer->peer_lock);
- continue;
- }
-
- peer->peer_check_stamp = stamp;
- tx = kptllnd_find_timed_out_tx(peer);
- check_sends = peer->peer_retry_noop;
-
- spin_unlock(&peer->peer_lock);
-
- if (tx == NULL && !check_sends)
- continue;
-
- kptllnd_peer_addref(peer); /* 1 ref for me... */
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- if (tx == NULL) { /* nothing timed out */
- kptllnd_peer_check_sends(peer);
- kptllnd_peer_decref(peer); /* ...until here or... */
-
- /* rescan after dropping the lock */
- goto again;
- }
-
- spin_lock_irqsave(&peer->peer_lock, flags);
- nsend = kptllnd_count_queue(&peer->peer_sendq);
- nactive = kptllnd_count_queue(&peer->peer_activeq);
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- LCONSOLE_ERROR_MSG(0x126, "Timing out %s: %s\n",
- libcfs_id2str(peer->peer_id),
- (tx->tx_tposted == 0) ?
- "no free peer buffers" :
- "please check Portals");
-
- if (tx->tx_tposted) {
- CERROR("Could not send to %s after %ds (sent %lds ago); "
- "check Portals for possible issues\n",
- libcfs_id2str(peer->peer_id),
- *kptllnd_tunables.kptl_timeout,
- cfs_duration_sec(jiffies - tx->tx_tposted));
- } else {
- CERROR("Could not get credits for %s after %ds; "
- "possible Lustre networking issues\n",
- libcfs_id2str(peer->peer_id),
- *kptllnd_tunables.kptl_timeout);
- }
-
- CERROR("%s timed out: cred %d outstanding %d, sent %d, "
- "sendq %d, activeq %d Tx %p %s (%s%s%s) status %d "
- "%sposted %lu T/O %ds\n",
- libcfs_id2str(peer->peer_id), peer->peer_credits,
- peer->peer_outstanding_credits, peer->peer_sent_credits,
- nsend, nactive, tx, kptllnd_tx_typestr(tx->tx_type),
- tx->tx_active ? "A" : "",
- PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) ?
- "" : "M",
- PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) ?
- "" : "D",
- tx->tx_status,
- (tx->tx_tposted == 0) ? "not " : "",
- (tx->tx_tposted == 0) ? 0UL : (jiffies - tx->tx_tposted),
- *kptllnd_tunables.kptl_timeout);
-
- kptllnd_dump_ptltrace();
-
- kptllnd_tx_decref(tx);
-
- kptllnd_peer_close(peer, -ETIMEDOUT);
- kptllnd_peer_decref(peer); /* ...until here */
-
- /* start again now I've dropped the lock */
- goto again;
- }
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-}
-
-kptl_peer_t *
-kptllnd_id2peer_locked (lnet_process_id_t id)
-{
- struct list_head *peers = kptllnd_nid2peerlist(id.nid);
- struct list_head *tmp;
- kptl_peer_t *peer;
-
- list_for_each (tmp, peers) {
-
- peer = list_entry (tmp, kptl_peer_t, peer_list);
-
- LASSERT(peer->peer_state == PEER_STATE_WAITING_HELLO ||
- peer->peer_state == PEER_STATE_ACTIVE);
-
- if (peer->peer_id.nid != id.nid ||
- peer->peer_id.pid != id.pid)
- continue;
-
- kptllnd_peer_addref(peer);
-
- CDEBUG(D_NET, "%s -> %s (%d)\n",
- libcfs_id2str(id),
- kptllnd_ptlid2str(peer->peer_ptlid),
- atomic_read (&peer->peer_refcount));
- return peer;
- }
-
- return NULL;
-}
-
-void
-kptllnd_peertable_overflow_msg(char *str, lnet_process_id_t id)
-{
- LCONSOLE_ERROR_MSG(0x127, "%s %s overflows the peer table[%d]: "
- "messages may be dropped\n",
- str, libcfs_id2str(id),
- kptllnd_data.kptl_n_active_peers);
- LCONSOLE_ERROR_MSG(0x128, "Please correct by increasing "
- "'max_nodes' or 'max_procs_per_node'\n");
-}
-
-__u64
-kptllnd_get_last_seen_matchbits_locked(lnet_process_id_t lpid)
-{
- kptl_peer_t *peer;
- struct list_head *tmp;
-
- /* Find the last matchbits I saw this new peer using. Note..
- A. This peer cannot be in the peer table - she's new!
- B. If I can't find the peer in the closing/zombie peers, all
- matchbits are safe because all refs to the (old) peer have gone
- so all txs have completed so there's no risk of matchbit
- collision!
- */
-
- LASSERT(kptllnd_id2peer_locked(lpid) == NULL);
-
- /* peer's last matchbits can't change after it comes out of the peer
- * table, so first match is fine */
-
- list_for_each (tmp, &kptllnd_data.kptl_closing_peers) {
- peer = list_entry (tmp, kptl_peer_t, peer_list);
-
- if (peer->peer_id.nid == lpid.nid &&
- peer->peer_id.pid == lpid.pid)
- return peer->peer_last_matchbits_seen;
- }
-
- list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) {
- peer = list_entry (tmp, kptl_peer_t, peer_list);
-
- if (peer->peer_id.nid == lpid.nid &&
- peer->peer_id.pid == lpid.pid)
- return peer->peer_last_matchbits_seen;
- }
-
- return PTL_RESERVED_MATCHBITS;
-}
-
-kptl_peer_t *
-kptllnd_peer_handle_hello (ptl_process_id_t initiator,
- kptl_msg_t *msg)
-{
- rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock;
- kptl_peer_t *peer;
- kptl_peer_t *new_peer;
- lnet_process_id_t lpid;
- unsigned long flags;
- kptl_tx_t *hello_tx;
- int rc;
- __u64 safe_matchbits;
- __u64 last_matchbits_seen;
-
- lpid.nid = msg->ptlm_srcnid;
- lpid.pid = msg->ptlm_srcpid;
-
- CDEBUG(D_NET, "hello from %s(%s)\n",
- libcfs_id2str(lpid), kptllnd_ptlid2str(initiator));
-
- if (initiator.pid != kptllnd_data.kptl_portals_id.pid &&
- (msg->ptlm_srcpid & LNET_PID_USERFLAG) == 0) {
- /* If the peer's PID isn't _the_ ptllnd kernel pid, she must be
- * userspace. Refuse the connection if she hasn't set the
- * correct flag in her PID... */
- CERROR("Userflag not set in hello from %s (%s)\n",
- libcfs_id2str(lpid), kptllnd_ptlid2str(initiator));
- return NULL;
- }
-
- /* kptlhm_matchbits are the highest matchbits my peer may have used to
- * RDMA to me. I ensure I never register buffers for RDMA that could
- * match any she used */
- safe_matchbits = msg->ptlm_u.hello.kptlhm_matchbits + 1;
-
- if (safe_matchbits < PTL_RESERVED_MATCHBITS) {
- CERROR("Illegal matchbits "LPX64" in HELLO from %s\n",
- safe_matchbits, libcfs_id2str(lpid));
- return NULL;
- }
-
- if (msg->ptlm_u.hello.kptlhm_max_msg_size < PTLLND_MIN_BUFFER_SIZE) {
- CERROR("%s: max message size %d < MIN %d",
- libcfs_id2str(lpid),
- msg->ptlm_u.hello.kptlhm_max_msg_size,
- PTLLND_MIN_BUFFER_SIZE);
- return NULL;
- }
-
- if (msg->ptlm_credits <= 1) {
- CERROR("Need more than 1+%d credits from %s\n",
- msg->ptlm_credits, libcfs_id2str(lpid));
- return NULL;
- }
-
- write_lock_irqsave(g_lock, flags);
-
- peer = kptllnd_id2peer_locked(lpid);
- if (peer != NULL) {
- if (peer->peer_state == PEER_STATE_WAITING_HELLO) {
- /* Completing HELLO handshake */
- LASSERT(peer->peer_incarnation == 0);
-
- if (msg->ptlm_dststamp != 0 &&
- msg->ptlm_dststamp != peer->peer_myincarnation) {
- write_unlock_irqrestore(g_lock, flags);
-
- CERROR("Ignoring HELLO from %s: unexpected "
- "dststamp "LPX64" ("LPX64" wanted)\n",
- libcfs_id2str(lpid),
- msg->ptlm_dststamp,
- peer->peer_myincarnation);
- kptllnd_peer_decref(peer);
- return NULL;
- }
-
- /* Concurrent initiation or response to my HELLO */
- peer->peer_state = PEER_STATE_ACTIVE;
- peer->peer_incarnation = msg->ptlm_srcstamp;
- peer->peer_next_matchbits = safe_matchbits;
- peer->peer_max_msg_size =
- msg->ptlm_u.hello.kptlhm_max_msg_size;
-
- write_unlock_irqrestore(g_lock, flags);
- return peer;
- }
-
- if (msg->ptlm_dststamp != 0 &&
- msg->ptlm_dststamp <= peer->peer_myincarnation) {
- write_unlock_irqrestore(g_lock, flags);
-
- CERROR("Ignoring stale HELLO from %s: "
- "dststamp "LPX64" (current "LPX64")\n",
- libcfs_id2str(lpid),
- msg->ptlm_dststamp,
- peer->peer_myincarnation);
- kptllnd_peer_decref(peer);
- return NULL;
- }
-
- /* Brand new connection attempt: remove old incarnation */
- kptllnd_peer_close_locked(peer, 0);
- }
-
- kptllnd_cull_peertable_locked(lpid);
-
- write_unlock_irqrestore(g_lock, flags);
-
- if (peer != NULL) {
- CDEBUG(D_NET, "Peer %s (%s) reconnecting:"
- " stamp "LPX64"("LPX64")\n",
- libcfs_id2str(lpid), kptllnd_ptlid2str(initiator),
- msg->ptlm_srcstamp, peer->peer_incarnation);
-
- kptllnd_peer_decref(peer);
- }
-
- hello_tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE);
- if (hello_tx == NULL) {
- CERROR("Unable to allocate HELLO message for %s\n",
- libcfs_id2str(lpid));
- return NULL;
- }
-
- kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO,
- sizeof(kptl_hello_msg_t));
-
- new_peer = kptllnd_peer_allocate(lpid, initiator);
- if (new_peer == NULL) {
- kptllnd_tx_decref(hello_tx);
- return NULL;
- }
-
- rc = kptllnd_peer_reserve_buffers();
- if (rc != 0) {
- kptllnd_peer_decref(new_peer);
- kptllnd_tx_decref(hello_tx);
-
- CERROR("Failed to reserve buffers for %s\n",
- libcfs_id2str(lpid));
- return NULL;
- }
-
- write_lock_irqsave(g_lock, flags);
-
- again:
- if (kptllnd_data.kptl_shutdown) {
- write_unlock_irqrestore(g_lock, flags);
-
- CERROR ("Shutdown started, refusing connection from %s\n",
- libcfs_id2str(lpid));
- kptllnd_peer_unreserve_buffers();
- kptllnd_peer_decref(new_peer);
- kptllnd_tx_decref(hello_tx);
- return NULL;
- }
-
- peer = kptllnd_id2peer_locked(lpid);
- if (peer != NULL) {
- if (peer->peer_state == PEER_STATE_WAITING_HELLO) {
- /* An outgoing message instantiated 'peer' for me */
- LASSERT(peer->peer_incarnation == 0);
-
- peer->peer_state = PEER_STATE_ACTIVE;
- peer->peer_incarnation = msg->ptlm_srcstamp;
- peer->peer_next_matchbits = safe_matchbits;
- peer->peer_max_msg_size =
- msg->ptlm_u.hello.kptlhm_max_msg_size;
-
- write_unlock_irqrestore(g_lock, flags);
-
- CWARN("Outgoing instantiated peer %s\n",
- libcfs_id2str(lpid));
- } else {
- LASSERT (peer->peer_state == PEER_STATE_ACTIVE);
-
- write_unlock_irqrestore(g_lock, flags);
-
- /* WOW! Somehow this peer completed the HELLO
- * handshake while I slept. I guess I could have slept
- * while it rebooted and sent a new HELLO, so I'll fail
- * this one... */
- CWARN("Wow! peer %s\n", libcfs_id2str(lpid));
- kptllnd_peer_decref(peer);
- peer = NULL;
- }
-
- kptllnd_peer_unreserve_buffers();
- kptllnd_peer_decref(new_peer);
- kptllnd_tx_decref(hello_tx);
- return peer;
- }
-
- if (kptllnd_data.kptl_n_active_peers ==
- kptllnd_data.kptl_expected_peers) {
- /* peer table full */
- write_unlock_irqrestore(g_lock, flags);
-
- kptllnd_peertable_overflow_msg("Connection from ", lpid);
-
- rc = kptllnd_reserve_buffers(1); /* HELLO headroom */
- if (rc != 0) {
- CERROR("Refusing connection from %s\n",
- libcfs_id2str(lpid));
- kptllnd_peer_unreserve_buffers();
- kptllnd_peer_decref(new_peer);
- kptllnd_tx_decref(hello_tx);
- return NULL;
- }
-
- write_lock_irqsave(g_lock, flags);
- kptllnd_data.kptl_expected_peers++;
- goto again;
- }
-
- last_matchbits_seen = kptllnd_get_last_seen_matchbits_locked(lpid);
-
- hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen;
- hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size =
- *kptllnd_tunables.kptl_max_msg_size;
-
- new_peer->peer_state = PEER_STATE_ACTIVE;
- new_peer->peer_incarnation = msg->ptlm_srcstamp;
- new_peer->peer_next_matchbits = safe_matchbits;
- new_peer->peer_last_matchbits_seen = last_matchbits_seen;
- new_peer->peer_max_msg_size = msg->ptlm_u.hello.kptlhm_max_msg_size;
-
- kptllnd_peer_add_peertable_locked(new_peer);
-
- write_unlock_irqrestore(g_lock, flags);
-
- /* NB someone else could get in now and post a message before I post
- * the HELLO, but post_tx/check_sends take care of that! */
-
- CDEBUG(D_NETTRACE, "%s: post response hello %p\n",
- libcfs_id2str(new_peer->peer_id), hello_tx);
-
- kptllnd_post_tx(new_peer, hello_tx, 0);
- kptllnd_peer_check_sends(new_peer);
-
- return new_peer;
-}
-
-void
-kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag)
-{
- kptllnd_post_tx(peer, tx, nfrag);
- kptllnd_peer_check_sends(peer);
-}
-
-int
-kptllnd_find_target(kptl_peer_t **peerp, lnet_process_id_t target)
-{
- rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock;
- ptl_process_id_t ptl_id;
- kptl_peer_t *new_peer;
- kptl_tx_t *hello_tx;
- unsigned long flags;
- int rc;
- __u64 last_matchbits_seen;
-
- /* I expect to find the peer, so I only take a read lock... */
- read_lock_irqsave(g_lock, flags);
- *peerp = kptllnd_id2peer_locked(target);
- read_unlock_irqrestore(g_lock, flags);
-
- if (*peerp != NULL)
- return 0;
-
- if ((target.pid & LNET_PID_USERFLAG) != 0) {
- CWARN("Refusing to create a new connection to %s "
- "(non-kernel peer)\n", libcfs_id2str(target));
- return -EHOSTUNREACH;
- }
-
- /* The new peer is a kernel ptllnd, and kernel ptllnds all have
- * the same portals PID */
- ptl_id.nid = kptllnd_lnet2ptlnid(target.nid);
- ptl_id.pid = kptllnd_data.kptl_portals_id.pid;
-
- hello_tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE);
- if (hello_tx == NULL) {
- CERROR("Unable to allocate connect message for %s\n",
- libcfs_id2str(target));
- return -ENOMEM;
- }
-
- kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO,
- sizeof(kptl_hello_msg_t));
-
- new_peer = kptllnd_peer_allocate(target, ptl_id);
- if (new_peer == NULL) {
- rc = -ENOMEM;
- goto unwind_0;
- }
-
- rc = kptllnd_peer_reserve_buffers();
- if (rc != 0)
- goto unwind_1;
-
- write_lock_irqsave(g_lock, flags);
- again:
- if (kptllnd_data.kptl_shutdown) {
- write_unlock_irqrestore(g_lock, flags);
- rc = -ESHUTDOWN;
- goto unwind_2;
- }
-
- *peerp = kptllnd_id2peer_locked(target);
- if (*peerp != NULL) {
- write_unlock_irqrestore(g_lock, flags);
- goto unwind_2;
- }
-
- kptllnd_cull_peertable_locked(target);
-
- if (kptllnd_data.kptl_n_active_peers ==
- kptllnd_data.kptl_expected_peers) {
- /* peer table full */
- write_unlock_irqrestore(g_lock, flags);
-
- kptllnd_peertable_overflow_msg("Connection to ", target);
-
- rc = kptllnd_reserve_buffers(1); /* HELLO headroom */
- if (rc != 0) {
- CERROR("Can't create connection to %s\n",
- libcfs_id2str(target));
- rc = -ENOMEM;
- goto unwind_2;
- }
- write_lock_irqsave(g_lock, flags);
- kptllnd_data.kptl_expected_peers++;
- goto again;
- }
-
- last_matchbits_seen = kptllnd_get_last_seen_matchbits_locked(target);
-
- hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen;
- hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size =
- *kptllnd_tunables.kptl_max_msg_size;
-
- new_peer->peer_state = PEER_STATE_WAITING_HELLO;
- new_peer->peer_last_matchbits_seen = last_matchbits_seen;
-
- kptllnd_peer_add_peertable_locked(new_peer);
-
- write_unlock_irqrestore(g_lock, flags);
-
- /* NB someone else could get in now and post a message before I post
- * the HELLO, but post_tx/check_sends take care of that! */
-
- CDEBUG(D_NETTRACE, "%s: post initial hello %p\n",
- libcfs_id2str(new_peer->peer_id), hello_tx);
-
- kptllnd_post_tx(new_peer, hello_tx, 0);
- kptllnd_peer_check_sends(new_peer);
-
- *peerp = new_peer;
- return 0;
-
- unwind_2:
- kptllnd_peer_unreserve_buffers();
- unwind_1:
- kptllnd_peer_decref(new_peer);
- unwind_0:
- kptllnd_tx_decref(hello_tx);
-
- return rc;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2006 Cluster File Systems, Inc. All rights reserved.
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-#include "ptllnd.h"
-
-#ifdef CRAY_XT3
-static struct semaphore ptltrace_mutex;
-static struct semaphore ptltrace_signal;
-
-void
-kptllnd_ptltrace_to_file(char *filename)
-{
- CFS_DECL_JOURNAL_DATA;
- CFS_DECL_MMSPACE;
-
- cfs_file_t *filp;
- char *start;
- char *tmpbuf;
- int len;
- int rc;
- loff_t offset = 0;
- int eof = 0;
-
- CWARN("dumping ptltrace to %s\n", filename);
-
- LIBCFS_ALLOC(tmpbuf, PAGE_SIZE);
- if (tmpbuf == NULL) {
- CERROR("Can't allocate page buffer to dump %s\n", filename);
- return;
- }
-
- CFS_PUSH_JOURNAL;
-
- filp = cfs_filp_open(filename,
- O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
- if (filp == NULL) {
- if (rc != -EEXIST)
- CERROR("Error %d creating %s\n", rc, filename);
- goto out;
- }
-
- CFS_MMSPACE_OPEN;
-
- while (!eof) {
- start = NULL;
- len = ptl_proc_read(tmpbuf, &start, offset,
- PAGE_SIZE, &eof, NULL);
-
- /* we don't allow ptl_proc_read to mimic case 0 or 1 behavior
- * for a proc_read method, only #2: from proc_file_read
- *
- * 2) Set *start = an address within the buffer.
- * Put the data of the requested offset at *start.
- * Return the number of bytes of data placed there.
- * If this number is greater than zero and you
- * didn't signal eof and the reader is prepared to
- * take more data you will be called again with the
- * requested offset advanced by the number of bytes
- * absorbed.
- */
-
- if (len == 0) /* end of file */
- break;
-
- if (len < 0) {
- CERROR("ptl_proc_read: error %d\n", len);
- break;
- }
-
- if (start < tmpbuf || start + len > tmpbuf + PAGE_SIZE) {
- CERROR("ptl_proc_read bug: %p for %d not in %p for %ld\n",
- start, len, tmpbuf, PAGE_SIZE);
- break;
- }
-
- rc = cfs_filp_write(filp, start, len, cfs_filp_poff(filp));
- if (rc != len) {
- if (rc < 0)
- CERROR("Error %d writing %s\n", rc, filename);
- else
- CERROR("Partial write %d(%d) to %s\n",
- rc, len, filename);
- break;
- }
-
- offset += len;
- }
-
- CFS_MMSPACE_CLOSE;
-
- rc = cfs_filp_fsync(filp);
- if (rc != 0)
- CERROR("Error %d syncing %s\n", rc, filename);
-
- cfs_filp_close(filp);
-out:
- CFS_POP_JOURNAL;
- LIBCFS_FREE(tmpbuf, PAGE_SIZE);
-}
-
-int
-kptllnd_dump_ptltrace_thread(void *arg)
-{
- static char fname[1024];
-
- libcfs_daemonize("ptltracedump");
-
- /* serialise with other instances of me */
- mutex_down(&ptltrace_mutex);
-
- snprintf(fname, sizeof(fname), "%s.%ld.%ld",
- *kptllnd_tunables.kptl_ptltrace_basename,
- cfs_time_current_sec(), (long)arg);
-
- kptllnd_ptltrace_to_file(fname);
-
- mutex_up(&ptltrace_mutex);
-
- /* unblock my creator */
- mutex_up(&ptltrace_signal);
-
- return 0;
-}
-
-void
-kptllnd_dump_ptltrace(void)
-{
- int rc;
-
- if (!*kptllnd_tunables.kptl_ptltrace_on_timeout)
- return;
-
- rc = cfs_kernel_thread(kptllnd_dump_ptltrace_thread,
- (void *)(long)cfs_curproc_pid(),
- CLONE_VM | CLONE_FS | CLONE_FILES);
- if (rc < 0) {
- CERROR("Error %d starting ptltrace dump thread\n", rc);
- } else {
- /* block until thread completes */
- mutex_down(&ptltrace_signal);
- }
-}
-
-void
-kptllnd_init_ptltrace(void)
-{
- init_mutex(&ptltrace_mutex);
- init_mutex_locked(&ptltrace_signal);
-}
-
-#else
-
-void
-kptllnd_dump_ptltrace(void)
-{
-}
-
-void
-kptllnd_init_ptltrace(void)
-{
-}
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
- #include "ptllnd.h"
-
-void
-kptllnd_rx_buffer_pool_init(kptl_rx_buffer_pool_t *rxbp)
-{
- memset(rxbp, 0, sizeof(*rxbp));
- spin_lock_init(&rxbp->rxbp_lock);
- INIT_LIST_HEAD(&rxbp->rxbp_list);
-}
-
-void
-kptllnd_rx_buffer_destroy(kptl_rx_buffer_t *rxb)
-{
- kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool;
-
- LASSERT(rxb->rxb_refcount == 0);
- LASSERT(PtlHandleIsEqual(rxb->rxb_mdh, PTL_INVALID_HANDLE));
- LASSERT(!rxb->rxb_posted);
- LASSERT(rxb->rxb_idle);
-
- list_del(&rxb->rxb_list);
- rxbp->rxbp_count--;
-
- LIBCFS_FREE(rxb->rxb_buffer, kptllnd_rx_buffer_size());
- LIBCFS_FREE(rxb, sizeof(*rxb));
-}
-
-int
-kptllnd_rx_buffer_pool_reserve(kptl_rx_buffer_pool_t *rxbp, int count)
-{
- int bufsize;
- int msgs_per_buffer;
- int rc;
- kptl_rx_buffer_t *rxb;
- char *buffer;
- unsigned long flags;
-
- bufsize = kptllnd_rx_buffer_size();
- msgs_per_buffer = bufsize / (*kptllnd_tunables.kptl_max_msg_size);
-
- CDEBUG(D_NET, "kptllnd_rx_buffer_pool_reserve(%d)\n", count);
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- for (;;) {
- if (rxbp->rxbp_shutdown) {
- rc = -ESHUTDOWN;
- break;
- }
-
- if (rxbp->rxbp_reserved + count <=
- rxbp->rxbp_count * msgs_per_buffer) {
- rc = 0;
- break;
- }
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- LIBCFS_ALLOC(rxb, sizeof(*rxb));
- LIBCFS_ALLOC(buffer, bufsize);
-
- if (rxb == NULL || buffer == NULL) {
- CERROR("Failed to allocate rx buffer\n");
-
- if (rxb != NULL)
- LIBCFS_FREE(rxb, sizeof(*rxb));
- if (buffer != NULL)
- LIBCFS_FREE(buffer, bufsize);
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- rc = -ENOMEM;
- break;
- }
-
- memset(rxb, 0, sizeof(*rxb));
-
- rxb->rxb_eventarg.eva_type = PTLLND_EVENTARG_TYPE_BUF;
- rxb->rxb_refcount = 0;
- rxb->rxb_pool = rxbp;
- rxb->rxb_idle = 0;
- rxb->rxb_posted = 0;
- rxb->rxb_buffer = buffer;
- rxb->rxb_mdh = PTL_INVALID_HANDLE;
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- if (rxbp->rxbp_shutdown) {
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- LIBCFS_FREE(rxb, sizeof(*rxb));
- LIBCFS_FREE(buffer, bufsize);
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- rc = -ESHUTDOWN;
- break;
- }
-
- list_add_tail(&rxb->rxb_list, &rxbp->rxbp_list);
- rxbp->rxbp_count++;
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- kptllnd_rx_buffer_post(rxb);
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- }
-
- if (rc == 0)
- rxbp->rxbp_reserved += count;
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- return rc;
-}
-
-void
-kptllnd_rx_buffer_pool_unreserve(kptl_rx_buffer_pool_t *rxbp,
- int count)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- CDEBUG(D_NET, "kptllnd_rx_buffer_pool_unreserve(%d)\n", count);
- rxbp->rxbp_reserved -= count;
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-}
-
-void
-kptllnd_rx_buffer_pool_fini(kptl_rx_buffer_pool_t *rxbp)
-{
- kptl_rx_buffer_t *rxb;
- int rc;
- int i;
- unsigned long flags;
- struct list_head *tmp;
- struct list_head *nxt;
- ptl_handle_md_t mdh;
-
- /* CAVEAT EMPTOR: I'm racing with everything here!!!
- *
- * Buffers can still be posted after I set rxbp_shutdown because I
- * can't hold rxbp_lock while I'm posting them.
- *
- * Calling PtlMDUnlink() here races with auto-unlinks; i.e. a buffer's
- * MD handle could become invalid under me. I am vulnerable to portals
- * re-using handles (i.e. make the same handle valid again, but for a
- * different MD) from when the MD is actually unlinked, to when the
- * event callback tells me it has been unlinked. */
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- rxbp->rxbp_shutdown = 1;
-
- for (i = 9;; i++) {
- list_for_each_safe(tmp, nxt, &rxbp->rxbp_list) {
- rxb = list_entry (tmp, kptl_rx_buffer_t, rxb_list);
-
- if (rxb->rxb_idle) {
- spin_unlock_irqrestore(&rxbp->rxbp_lock,
- flags);
- kptllnd_rx_buffer_destroy(rxb);
- spin_lock_irqsave(&rxbp->rxbp_lock,
- flags);
- continue;
- }
-
- mdh = rxb->rxb_mdh;
- if (PtlHandleIsEqual(mdh, PTL_INVALID_HANDLE))
- continue;
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- rc = PtlMDUnlink(mdh);
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
- /* callback clears rxb_mdh and drops net's ref
- * (which causes repost, but since I set
- * shutdown, it will just set the buffer
- * idle) */
-#else
- if (rc == PTL_OK) {
- rxb->rxb_posted = 0;
- rxb->rxb_mdh = PTL_INVALID_HANDLE;
- kptllnd_rx_buffer_decref_locked(rxb);
- }
-#endif
- }
-
- if (list_empty(&rxbp->rxbp_list))
- break;
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- /* Wait a bit for references to be dropped */
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d Busy RX Buffers\n",
- rxbp->rxbp_count);
-
- cfs_pause(cfs_time_seconds(1));
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- }
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-}
-
-void
-kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb)
-{
- int rc;
- ptl_md_t md;
- ptl_handle_me_t meh;
- ptl_handle_md_t mdh;
- ptl_process_id_t any;
- kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool;
- unsigned long flags;
-
- LASSERT (!in_interrupt());
- LASSERT (rxb->rxb_refcount == 0);
- LASSERT (!rxb->rxb_idle);
- LASSERT (!rxb->rxb_posted);
- LASSERT (PtlHandleIsEqual(rxb->rxb_mdh, PTL_INVALID_HANDLE));
-
- any.nid = PTL_NID_ANY;
- any.pid = PTL_PID_ANY;
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- if (rxbp->rxbp_shutdown) {
- rxb->rxb_idle = 1;
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
- return;
- }
-
- rxb->rxb_refcount = 1; /* net's ref */
- rxb->rxb_posted = 1; /* I'm posting */
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- rc = PtlMEAttach(kptllnd_data.kptl_nih,
- *kptllnd_tunables.kptl_portal,
- any,
- LNET_MSG_MATCHBITS,
- 0, /* all matchbits are valid - ignore none */
- PTL_UNLINK,
- PTL_INS_AFTER,
- &meh);
- if (rc != PTL_OK) {
- CERROR("PtlMeAttach rxb failed %s(%d)\n",
- kptllnd_errtype2str(rc), rc);
- goto failed;
- }
-
- /*
- * Setup MD
- */
- md.start = rxb->rxb_buffer;
- md.length = PAGE_SIZE * *kptllnd_tunables.kptl_rxb_npages;
- md.threshold = PTL_MD_THRESH_INF;
- md.options = PTL_MD_OP_PUT |
- PTL_MD_LUSTRE_COMPLETION_SEMANTICS |
- PTL_MD_EVENT_START_DISABLE |
- PTL_MD_MAX_SIZE |
- PTL_MD_LOCAL_ALIGN8;
- md.user_ptr = &rxb->rxb_eventarg;
- md.max_size = *kptllnd_tunables.kptl_max_msg_size;
- md.eq_handle = kptllnd_data.kptl_eqh;
-
- rc = PtlMDAttach(meh, md, PTL_UNLINK, &mdh);
- if (rc == PTL_OK) {
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- if (rxb->rxb_posted) /* Not auto-unlinked yet!!! */
- rxb->rxb_mdh = mdh;
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
- return;
- }
-
- CERROR("PtlMDAttach rxb failed %s(%d)\n",
- kptllnd_errtype2str(rc), rc);
- rc = PtlMEUnlink(meh);
- LASSERT(rc == PTL_OK);
-
- failed:
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- rxb->rxb_posted = 0;
- /* XXX this will just try again immediately */
- kptllnd_rx_buffer_decref_locked(rxb);
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-}
-
-kptl_rx_t *
-kptllnd_rx_alloc(void)
-{
- kptl_rx_t* rx;
-
- if (IS_SIMULATION_ENABLED(FAIL_RX_ALLOC)) {
- CERROR ("FAIL_RX_ALLOC SIMULATION triggered\n");
- return NULL;
- }
-
- rx = cfs_mem_cache_alloc(kptllnd_data.kptl_rx_cache, CFS_ALLOC_ATOMIC);
- if (rx == NULL) {
- CERROR("Failed to allocate rx\n");
- return NULL;
- }
-
- memset(rx, 0, sizeof(*rx));
- return rx;
-}
-
-void
-kptllnd_rx_done(kptl_rx_t *rx, int post_credit)
-{
- kptl_rx_buffer_t *rxb = rx->rx_rxb;
- kptl_peer_t *peer = rx->rx_peer;
- unsigned long flags;
-
- LASSERT (post_credit == PTLLND_POSTRX_NO_CREDIT ||
- post_credit == PTLLND_POSTRX_PEER_CREDIT);
-
- CDEBUG(D_NET, "rx=%p rxb %p peer %p\n", rx, rxb, peer);
-
- if (rxb != NULL)
- kptllnd_rx_buffer_decref(rxb);
-
- if (peer != NULL) {
- /* Update credits (after I've decref-ed the buffer) */
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- if (post_credit == PTLLND_POSTRX_PEER_CREDIT)
- peer->peer_outstanding_credits++;
-
- LASSERT (peer->peer_outstanding_credits +
- peer->peer_sent_credits <=
- *kptllnd_tunables.kptl_peercredits);
-
- CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: rx %p done\n",
- libcfs_id2str(peer->peer_id), peer->peer_credits,
- peer->peer_outstanding_credits, peer->peer_sent_credits,
- rx);
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- /* I might have to send back credits */
- kptllnd_peer_check_sends(peer);
- kptllnd_peer_decref(peer);
- }
-
- cfs_mem_cache_free(kptllnd_data.kptl_rx_cache, rx);
-}
-
-void
-kptllnd_rx_buffer_callback (ptl_event_t *ev)
-{
- kptl_eventarg_t *eva = ev->md.user_ptr;
- kptl_rx_buffer_t *rxb = kptllnd_eventarg2obj(eva);
- kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool;
- kptl_rx_t *rx;
- int unlinked;
- unsigned long flags;
-
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
- unlinked = ev->unlinked;
-#else
- unlinked = ev->type == PTL_EVENT_UNLINK;
-#endif
-
- CDEBUG(D_NET, "%s: %s(%d) rxb=%p fail=%s(%d) unlink=%d\n",
- kptllnd_ptlid2str(ev->initiator),
- kptllnd_evtype2str(ev->type), ev->type, rxb,
- kptllnd_errtype2str(ev->ni_fail_type), ev->ni_fail_type,
- unlinked);
-
- LASSERT (!rxb->rxb_idle);
- LASSERT (ev->md.start == rxb->rxb_buffer);
- LASSERT (ev->offset + ev->mlength <=
- PAGE_SIZE * *kptllnd_tunables.kptl_rxb_npages);
- LASSERT (ev->type == PTL_EVENT_PUT_END ||
- ev->type == PTL_EVENT_UNLINK);
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- ev->match_bits == LNET_MSG_MATCHBITS);
-
- if (ev->ni_fail_type != PTL_NI_OK) {
- CERROR("Portals error from %s: %s(%d) rxb=%p fail=%s(%d) unlink=%dn",
- kptllnd_ptlid2str(ev->initiator),
- kptllnd_evtype2str(ev->type), ev->type, rxb,
- kptllnd_errtype2str(ev->ni_fail_type),
- ev->ni_fail_type, unlinked);
-
- } else if (ev->type == PTL_EVENT_PUT_END &&
- !rxbp->rxbp_shutdown) {
-
- /* rxbp_shutdown sampled without locking! I only treat it as a
- * hint since shutdown can start while rx's are queued on
- * kptl_sched_rxq. */
-#if (PTL_MD_LOCAL_ALIGN8 == 0)
- /* Portals can't force message alignment - someone sending an
- * odd-length message will misalign subsequent messages and
- * force the fixup below... */
- if ((ev->mlength & 7) != 0)
- CWARN("Message from %s has odd length "LPU64": "
- "probable version incompatibility\n",
- kptllnd_ptlid2str(ev->initiator),
- (__u64)ev->mlength);
-#endif
- rx = kptllnd_rx_alloc();
- if (rx == NULL) {
- CERROR("Message from %s dropped: ENOMEM",
- kptllnd_ptlid2str(ev->initiator));
- } else {
- if ((ev->offset & 7) == 0) {
- kptllnd_rx_buffer_addref(rxb);
- rx->rx_rxb = rxb;
- rx->rx_nob = ev->mlength;
- rx->rx_msg = (kptl_msg_t *)
- (rxb->rxb_buffer + ev->offset);
- } else {
-#if (PTL_MD_LOCAL_ALIGN8 == 0)
- /* Portals can't force alignment - copy into
- * rx_space (avoiding overflow) to fix */
- int maxlen = *kptllnd_tunables.kptl_max_msg_size;
-
- rx->rx_rxb = NULL;
- rx->rx_nob = MIN(maxlen, ev->mlength);
- rx->rx_msg = (kptl_msg_t *)rx->rx_space;
- memcpy(rx->rx_msg, rxb->rxb_buffer + ev->offset,
- rx->rx_nob);
-#else
- /* Portals should have forced the alignment */
- LBUG();
-#endif
- }
-
- rx->rx_initiator = ev->initiator;
- rx->rx_treceived = jiffies;
-#ifdef CRAY_XT3
- rx->rx_uid = ev->uid;
-#endif
- /* Queue for attention */
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock,
- flags);
-
- list_add_tail(&rx->rx_list,
- &kptllnd_data.kptl_sched_rxq);
- wake_up(&kptllnd_data.kptl_sched_waitq);
-
- spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock,
- flags);
- }
- }
-
- if (unlinked) {
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- rxb->rxb_posted = 0;
- rxb->rxb_mdh = PTL_INVALID_HANDLE;
- kptllnd_rx_buffer_decref_locked(rxb);
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
- }
-}
-
-void
-kptllnd_nak (kptl_rx_t *rx)
-{
- /* Fire-and-forget a stub message that will let the peer know my
- * protocol magic/version and make her drop/refresh any peer state she
- * might have with me. */
- ptl_md_t md = {
- .start = kptllnd_data.kptl_nak_msg,
- .length = kptllnd_data.kptl_nak_msg->ptlm_nob,
- .threshold = 1,
- .options = 0,
- .user_ptr = NULL,
- .eq_handle = PTL_EQ_NONE};
- ptl_handle_md_t mdh;
- int rc;
-
- rc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &mdh);
- if (rc != PTL_OK) {
- CWARN("Can't NAK %s: bind failed %s(%d)\n",
- kptllnd_ptlid2str(rx->rx_initiator),
- kptllnd_errtype2str(rc), rc);
- return;
- }
-
- rc = PtlPut(mdh, PTL_NOACK_REQ, rx->rx_initiator,
- *kptllnd_tunables.kptl_portal, 0,
- LNET_MSG_MATCHBITS, 0, 0);
-
- if (rc != PTL_OK)
- CWARN("Can't NAK %s: put failed %s(%d)\n",
- kptllnd_ptlid2str(rx->rx_initiator),
- kptllnd_errtype2str(rc), rc);
-}
-
-void
-kptllnd_rx_parse(kptl_rx_t *rx)
-{
- kptl_msg_t *msg = rx->rx_msg;
- int post_credit = PTLLND_POSTRX_PEER_CREDIT;
- kptl_peer_t *peer;
- int rc;
- unsigned long flags;
- lnet_process_id_t srcid;
-
- LASSERT (rx->rx_peer == NULL);
-
- if ((rx->rx_nob >= 4 &&
- (msg->ptlm_magic == LNET_PROTO_MAGIC ||
- msg->ptlm_magic == __swab32(LNET_PROTO_MAGIC))) ||
- (rx->rx_nob >= 6 &&
- ((msg->ptlm_magic == PTLLND_MSG_MAGIC &&
- msg->ptlm_version != PTLLND_MSG_VERSION) ||
- (msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC) &&
- msg->ptlm_version != __swab16(PTLLND_MSG_VERSION))))) {
- /* NAK incompatible versions
- * See other LNDs for how to handle this if/when ptllnd begins
- * to allow different versions to co-exist */
- CERROR("Bad version: got %04x expected %04x from %s\n",
- (__u32)(msg->ptlm_magic == PTLLND_MSG_MAGIC ?
- msg->ptlm_version : __swab16(msg->ptlm_version)),
- PTLLND_MSG_VERSION, kptllnd_ptlid2str(rx->rx_initiator));
- kptllnd_nak(rx);
- goto rx_done;
- }
-
- rc = kptllnd_msg_unpack(msg, rx->rx_nob);
- if (rc != 0) {
- CERROR ("Error %d unpacking rx from %s\n",
- rc, kptllnd_ptlid2str(rx->rx_initiator));
- goto rx_done;
- }
-
- srcid.nid = msg->ptlm_srcnid;
- srcid.pid = msg->ptlm_srcpid;
-
- CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks (%ld s)\n",
- libcfs_id2str(srcid), kptllnd_msgtype2str(msg->ptlm_type),
- msg->ptlm_credits, rx, rx->rx_rxb,
- jiffies - rx->rx_treceived,
- cfs_duration_sec(jiffies - rx->rx_treceived));
-
- if (srcid.nid != kptllnd_ptl2lnetnid(rx->rx_initiator.nid)) {
- CERROR("Bad source id %s from %s\n",
- libcfs_id2str(srcid),
- kptllnd_ptlid2str(rx->rx_initiator));
- goto rx_done;
- }
-
- if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) {
- peer = kptllnd_id2peer(srcid);
- if (peer == NULL)
- goto rx_done;
-
- CWARN("NAK from %s (%s)\n",
- libcfs_id2str(srcid),
- kptllnd_ptlid2str(rx->rx_initiator));
-
- rc = -EPROTO;
- goto failed;
- }
-
- if (msg->ptlm_dstnid != kptllnd_data.kptl_ni->ni_nid ||
- msg->ptlm_dstpid != the_lnet.ln_pid) {
- CERROR("Bad dstid %s (expected %s) from %s\n",
- libcfs_id2str((lnet_process_id_t) {
- .nid = msg->ptlm_dstnid,
- .pid = msg->ptlm_dstpid}),
- libcfs_id2str((lnet_process_id_t) {
- .nid = kptllnd_data.kptl_ni->ni_nid,
- .pid = the_lnet.ln_pid}),
- kptllnd_ptlid2str(rx->rx_initiator));
- goto rx_done;
- }
-
- if (msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) {
- peer = kptllnd_peer_handle_hello(rx->rx_initiator, msg);
- if (peer == NULL)
- goto rx_done;
- } else {
- peer = kptllnd_id2peer(srcid);
- if (peer == NULL) {
- CWARN("NAK %s: no connection; peer must reconnect\n",
- libcfs_id2str(srcid));
- /* NAK to make the peer reconnect */
- kptllnd_nak(rx);
- goto rx_done;
- }
-
- /* Ignore anything apart from HELLO while I'm waiting for it and
- * any messages for a previous incarnation of the connection */
- if (peer->peer_state == PEER_STATE_WAITING_HELLO ||
- msg->ptlm_dststamp < peer->peer_myincarnation) {
- kptllnd_peer_decref(peer);
- goto rx_done;
- }
-
- if (msg->ptlm_srcstamp != peer->peer_incarnation) {
- CERROR("%s: Unexpected srcstamp "LPX64" "
- "("LPX64" expected)\n",
- libcfs_id2str(peer->peer_id),
- msg->ptlm_srcstamp,
- peer->peer_incarnation);
- rc = -EPROTO;
- goto failed;
- }
-
- if (msg->ptlm_dststamp != peer->peer_myincarnation) {
- CERROR("%s: Unexpected dststamp "LPX64" "
- "("LPX64" expected)\n",
- libcfs_id2str(peer->peer_id), msg->ptlm_dststamp,
- peer->peer_myincarnation);
- rc = -EPROTO;
- goto failed;
- }
- }
-
- LASSERT (msg->ptlm_srcnid == peer->peer_id.nid &&
- msg->ptlm_srcpid == peer->peer_id.pid);
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- /* Check peer only sends when I've sent her credits */
- if (peer->peer_sent_credits == 0) {
- int c = peer->peer_credits;
- int oc = peer->peer_outstanding_credits;
- int sc = peer->peer_sent_credits;
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- CERROR("%s: buffer overrun [%d/%d+%d]\n",
- libcfs_id2str(peer->peer_id), c, sc, oc);
- goto failed;
- }
- peer->peer_sent_credits--;
-
- /* No check for credit overflow - the peer may post new
- * buffers after the startup handshake. */
- peer->peer_credits += msg->ptlm_credits;
-
- /* This ensures the credit taken by NOOP can be returned */
- if (msg->ptlm_type == PTLLND_MSG_TYPE_NOOP) {
- peer->peer_outstanding_credits++;
- post_credit = PTLLND_POSTRX_NO_CREDIT;
- }
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- /* See if something can go out now that credits have come in */
- if (msg->ptlm_credits != 0)
- kptllnd_peer_check_sends(peer);
-
- /* ptllnd-level protocol correct - rx takes my ref on peer and increments
- * peer_outstanding_credits when it completes */
- rx->rx_peer = peer;
- kptllnd_peer_alive(peer);
-
- switch (msg->ptlm_type) {
- default:
- /* already checked by kptllnd_msg_unpack() */
- LBUG();
-
- case PTLLND_MSG_TYPE_HELLO:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_HELLO\n");
- goto rx_done;
-
- case PTLLND_MSG_TYPE_NOOP:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_NOOP\n");
- goto rx_done;
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n");
- rc = lnet_parse(kptllnd_data.kptl_ni,
- &msg->ptlm_u.immediate.kptlim_hdr,
- msg->ptlm_srcnid,
- rx, 0);
- if (rc >= 0) /* kptllnd_recv owns 'rx' now */
- return;
- goto failed;
-
- case PTLLND_MSG_TYPE_PUT:
- case PTLLND_MSG_TYPE_GET:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_%s\n",
- msg->ptlm_type == PTLLND_MSG_TYPE_PUT ?
- "PUT" : "GET");
-
- /* checked in kptllnd_msg_unpack() */
- LASSERT (msg->ptlm_u.rdma.kptlrm_matchbits >=
- PTL_RESERVED_MATCHBITS);
-
- /* Update last match bits seen */
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- if (msg->ptlm_u.rdma.kptlrm_matchbits >
- rx->rx_peer->peer_last_matchbits_seen)
- rx->rx_peer->peer_last_matchbits_seen =
- msg->ptlm_u.rdma.kptlrm_matchbits;
-
- spin_unlock_irqrestore(&rx->rx_peer->peer_lock, flags);
-
- rc = lnet_parse(kptllnd_data.kptl_ni,
- &msg->ptlm_u.rdma.kptlrm_hdr,
- msg->ptlm_srcnid,
- rx, 1);
- if (rc >= 0) /* kptllnd_recv owns 'rx' now */
- return;
- goto failed;
- }
-
- failed:
- kptllnd_peer_close(peer, rc);
- if (rx->rx_peer == NULL) /* drop ref on peer */
- kptllnd_peer_decref(peer); /* unless rx_done will */
- rx_done:
- kptllnd_rx_done(rx, post_credit);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
- #include "ptllnd.h"
-
-void
-kptllnd_free_tx(kptl_tx_t *tx)
-{
- if (tx->tx_msg != NULL)
- LIBCFS_FREE(tx->tx_msg, sizeof(*tx->tx_msg));
-
- if (tx->tx_frags != NULL)
- LIBCFS_FREE(tx->tx_frags, sizeof(*tx->tx_frags));
-
- LIBCFS_FREE(tx, sizeof(*tx));
-
- atomic_dec(&kptllnd_data.kptl_ntx);
-
- /* Keep the tunable in step for visibility */
- *kptllnd_tunables.kptl_ntx = atomic_read(&kptllnd_data.kptl_ntx);
-}
-
-kptl_tx_t *
-kptllnd_alloc_tx(void)
-{
- kptl_tx_t *tx;
-
- LIBCFS_ALLOC(tx, sizeof(*tx));
- if (tx == NULL) {
- CERROR("Failed to allocate TX\n");
- return NULL;
- }
-
- atomic_inc(&kptllnd_data.kptl_ntx);
-
- /* Keep the tunable in step for visibility */
- *kptllnd_tunables.kptl_ntx = atomic_read(&kptllnd_data.kptl_ntx);
-
- tx->tx_idle = 1;
- tx->tx_rdma_mdh = PTL_INVALID_HANDLE;
- tx->tx_msg_mdh = PTL_INVALID_HANDLE;
- tx->tx_rdma_eventarg.eva_type = PTLLND_EVENTARG_TYPE_RDMA;
- tx->tx_msg_eventarg.eva_type = PTLLND_EVENTARG_TYPE_MSG;
- tx->tx_msg = NULL;
- tx->tx_frags = NULL;
-
- LIBCFS_ALLOC(tx->tx_msg, sizeof(*tx->tx_msg));
- if (tx->tx_msg == NULL) {
- CERROR("Failed to allocate TX payload\n");
- goto failed;
- }
-
- LIBCFS_ALLOC(tx->tx_frags, sizeof(*tx->tx_frags));
- if (tx->tx_frags == NULL) {
- CERROR("Failed to allocate TX frags\n");
- goto failed;
- }
-
- return tx;
-
- failed:
- kptllnd_free_tx(tx);
- return NULL;
-}
-
-int
-kptllnd_setup_tx_descs()
-{
- int n = *kptllnd_tunables.kptl_ntx;
- int i;
-
- for (i = 0; i < n; i++) {
- kptl_tx_t *tx = kptllnd_alloc_tx();
-
- if (tx == NULL)
- return -ENOMEM;
-
- spin_lock(&kptllnd_data.kptl_tx_lock);
-
- list_add_tail(&tx->tx_list, &kptllnd_data.kptl_idle_txs);
-
- spin_unlock(&kptllnd_data.kptl_tx_lock);
- }
-
- return 0;
-}
-
-void
-kptllnd_cleanup_tx_descs()
-{
- kptl_tx_t *tx;
-
- /* No locking; single threaded now */
- LASSERT (kptllnd_data.kptl_shutdown == 2);
-
- while (!list_empty(&kptllnd_data.kptl_idle_txs)) {
- tx = list_entry(kptllnd_data.kptl_idle_txs.next,
- kptl_tx_t, tx_list);
-
- list_del(&tx->tx_list);
- kptllnd_free_tx(tx);
- }
-
- LASSERT (atomic_read(&kptllnd_data.kptl_ntx) == 0);
-}
-
-kptl_tx_t *
-kptllnd_get_idle_tx(enum kptl_tx_type type)
-{
- kptl_tx_t *tx = NULL;
-
- if (IS_SIMULATION_ENABLED(FAIL_TX_PUT_ALLOC) &&
- type == TX_TYPE_PUT_REQUEST) {
- CERROR("FAIL_TX_PUT_ALLOC SIMULATION triggered\n");
- return NULL;
- }
-
- if (IS_SIMULATION_ENABLED(FAIL_TX_GET_ALLOC) &&
- type == TX_TYPE_GET_REQUEST) {
- CERROR ("FAIL_TX_GET_ALLOC SIMULATION triggered\n");
- return NULL;
- }
-
- if (IS_SIMULATION_ENABLED(FAIL_TX)) {
- CERROR ("FAIL_TX SIMULATION triggered\n");
- return NULL;
- }
-
- spin_lock(&kptllnd_data.kptl_tx_lock);
-
- if (list_empty (&kptllnd_data.kptl_idle_txs)) {
- spin_unlock(&kptllnd_data.kptl_tx_lock);
-
- tx = kptllnd_alloc_tx();
- if (tx == NULL)
- return NULL;
- } else {
- tx = list_entry(kptllnd_data.kptl_idle_txs.next,
- kptl_tx_t, tx_list);
- list_del(&tx->tx_list);
-
- spin_unlock(&kptllnd_data.kptl_tx_lock);
- }
-
- LASSERT (atomic_read(&tx->tx_refcount)== 0);
- LASSERT (tx->tx_idle);
- LASSERT (!tx->tx_active);
- LASSERT (tx->tx_lnet_msg == NULL);
- LASSERT (tx->tx_lnet_replymsg == NULL);
- LASSERT (tx->tx_peer == NULL);
- LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE));
- LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE));
-
- tx->tx_type = type;
- atomic_set(&tx->tx_refcount, 1);
- tx->tx_status = 0;
- tx->tx_idle = 0;
- tx->tx_tposted = 0;
- tx->tx_acked = *kptllnd_tunables.kptl_ack_puts;
-
- CDEBUG(D_NET, "tx=%p\n", tx);
- return tx;
-}
-
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
-int
-kptllnd_tx_abort_netio(kptl_tx_t *tx)
-{
- kptl_peer_t *peer = tx->tx_peer;
- ptl_handle_md_t msg_mdh;
- ptl_handle_md_t rdma_mdh;
- unsigned long flags;
-
- LASSERT (atomic_read(&tx->tx_refcount) == 0);
- LASSERT (!tx->tx_active);
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- msg_mdh = tx->tx_msg_mdh;
- rdma_mdh = tx->tx_rdma_mdh;
-
- if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) &&
- PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) {
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- return 0;
- }
-
- /* Uncompleted comms: there must have been some error and it must be
- * propagated to LNET... */
- LASSERT (tx->tx_status != 0 ||
- (tx->tx_lnet_msg == NULL &&
- tx->tx_lnet_replymsg == NULL));
-
- /* stash the tx on its peer until it completes */
- atomic_set(&tx->tx_refcount, 1);
- tx->tx_active = 1;
- list_add_tail(&tx->tx_list, &peer->peer_activeq);
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- /* These unlinks will ensure completion events (normal or unlink) will
- * happen ASAP */
-
- if (!PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE))
- PtlMDUnlink(msg_mdh);
-
- if (!PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE))
- PtlMDUnlink(rdma_mdh);
-
- return -EAGAIN;
-}
-#else
-int
-kptllnd_tx_abort_netio(kptl_tx_t *tx)
-{
- ptl_peer_t *peer = tx->tx_peer;
- ptl_handle_md_t msg_mdh;
- ptl_handle_md_t rdma_mdh;
- unsigned long flags;
- ptl_err_t prc;
-
- LASSERT (atomic_read(&tx->tx_refcount) == 0);
- LASSERT (!tx->tx_active);
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- msg_mdh = tx->tx_msg_mdh;
- rdma_mdh = tx->tx_rdma_mdh;
-
- if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) &&
- PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) {
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- return 0;
- }
-
- /* Uncompleted comms: there must have been some error and it must be
- * propagated to LNET... */
- LASSERT (tx->tx_status != 0 ||
- (tx->tx_lnet_msg == NULL &&
- tx->tx_replymsg == NULL));
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- if (!PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE)) {
- prc = PtlMDUnlink(msg_mdh);
- if (prc == PTL_OK)
- msg_mdh = PTL_INVALID_HANDLE;
- }
-
- if (!PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) {
- prc = PtlMDUnlink(rdma_mdh);
- if (prc == PTL_OK)
- rdma_mdh = PTL_INVALID_HANDLE;
- }
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- /* update tx_???_mdh if callback hasn't fired */
- if (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE))
- msg_mdh = PTL_INVALID_HANDLE;
- else
- tx->tx_msg_mdh = msg_mdh;
-
- if (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE))
- rdma_mdh = PTL_INVALID_HANDLE;
- else
- tx->tx_rdma_mdh = rdma_mdh;
-
- if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) &&
- PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) {
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- return 0;
- }
-
- /* stash the tx on its peer until it completes */
- atomic_set(&tx->tx_refcount, 1);
- tx->tx_active = 1;
- list_add_tail(&tx->tx_list, &peer->peer_activeq);
-
- kptllnd_peer_addref(peer); /* extra ref for me... */
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- /* This will get the watchdog thread to try aborting all the peer's
- * comms again. NB, this deems it fair that 1 failing tx which can't
- * be aborted immediately (i.e. its MDs are still busy) is valid cause
- * to nuke everything to the same peer! */
- kptllnd_peer_close(peer, tx->tx_status);
-
- kptllnd_peer_decref(peer);
-
- return -EAGAIN;
-}
-#endif
-
-void
-kptllnd_tx_fini (kptl_tx_t *tx)
-{
- lnet_msg_t *replymsg = tx->tx_lnet_replymsg;
- lnet_msg_t *msg = tx->tx_lnet_msg;
- kptl_peer_t *peer = tx->tx_peer;
- int status = tx->tx_status;
- int rc;
-
- LASSERT (!in_interrupt());
- LASSERT (atomic_read(&tx->tx_refcount) == 0);
- LASSERT (!tx->tx_idle);
- LASSERT (!tx->tx_active);
-
- /* TX has completed or failed */
-
- if (peer != NULL) {
- rc = kptllnd_tx_abort_netio(tx);
- if (rc != 0)
- return;
- }
-
- LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE));
- LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE));
-
- tx->tx_lnet_msg = tx->tx_lnet_replymsg = NULL;
- tx->tx_peer = NULL;
- tx->tx_idle = 1;
-
- spin_lock(&kptllnd_data.kptl_tx_lock);
- list_add_tail(&tx->tx_list, &kptllnd_data.kptl_idle_txs);
- spin_unlock(&kptllnd_data.kptl_tx_lock);
-
- /* Must finalize AFTER freeing 'tx' */
- if (msg != NULL)
- lnet_finalize(kptllnd_data.kptl_ni, msg,
- (replymsg == NULL) ? status : 0);
-
- if (replymsg != NULL)
- lnet_finalize(kptllnd_data.kptl_ni, replymsg, status);
-
- if (peer != NULL)
- kptllnd_peer_decref(peer);
-}
-
-const char *
-kptllnd_tx_typestr(int type)
-{
- switch (type) {
- default:
- return "<TYPE UNKNOWN>";
-
- case TX_TYPE_SMALL_MESSAGE:
- return "msg";
-
- case TX_TYPE_PUT_REQUEST:
- return "put_req";
-
- case TX_TYPE_GET_REQUEST:
- return "get_req";
- break;
-
- case TX_TYPE_PUT_RESPONSE:
- return "put_rsp";
- break;
-
- case TX_TYPE_GET_RESPONSE:
- return "get_rsp";
- }
-}
-
-void
-kptllnd_tx_callback(ptl_event_t *ev)
-{
- kptl_eventarg_t *eva = ev->md.user_ptr;
- int ismsg = (eva->eva_type == PTLLND_EVENTARG_TYPE_MSG);
- kptl_tx_t *tx = kptllnd_eventarg2obj(eva);
- kptl_peer_t *peer = tx->tx_peer;
- int ok = (ev->ni_fail_type == PTL_OK);
- int unlinked;
- unsigned long flags;
-
- LASSERT (peer != NULL);
- LASSERT (eva->eva_type == PTLLND_EVENTARG_TYPE_MSG ||
- eva->eva_type == PTLLND_EVENTARG_TYPE_RDMA);
- LASSERT (!ismsg || !PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE));
- LASSERT (ismsg || !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE));
-
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
- unlinked = ev->unlinked;
-#else
- unlinked = (ev->type == PTL_EVENT_UNLINK);
-#endif
- CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: %s(%d) tx=%p fail=%s(%d) unlinked=%d\n",
- libcfs_id2str(peer->peer_id), peer->peer_credits,
- peer->peer_outstanding_credits, peer->peer_sent_credits,
- kptllnd_evtype2str(ev->type), ev->type,
- tx, kptllnd_errtype2str(ev->ni_fail_type),
- ev->ni_fail_type, unlinked);
-
- switch (tx->tx_type) {
- default:
- LBUG();
-
- case TX_TYPE_SMALL_MESSAGE:
- LASSERT (ismsg);
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- ev->type == PTL_EVENT_SEND_END ||
- (ev->type == PTL_EVENT_ACK && tx->tx_acked));
- break;
-
- case TX_TYPE_PUT_REQUEST:
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- (ismsg && ev->type == PTL_EVENT_SEND_END) ||
- (ismsg && ev->type == PTL_EVENT_ACK && tx->tx_acked) ||
- (!ismsg && ev->type == PTL_EVENT_GET_END));
- break;
-
- case TX_TYPE_GET_REQUEST:
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- (ismsg && ev->type == PTL_EVENT_SEND_END) ||
- (ismsg && ev->type == PTL_EVENT_ACK && tx->tx_acked) ||
- (!ismsg && ev->type == PTL_EVENT_PUT_END));
-
- if (!ismsg && ok && ev->type == PTL_EVENT_PUT_END) {
- if (ev->hdr_data == PTLLND_RDMA_OK) {
- lnet_set_reply_msg_len(
- kptllnd_data.kptl_ni,
- tx->tx_lnet_replymsg,
- ev->mlength);
- } else {
- /* no match at peer */
- tx->tx_status = -EIO;
- }
- }
- break;
-
- case TX_TYPE_PUT_RESPONSE:
- LASSERT (!ismsg);
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- ev->type == PTL_EVENT_SEND_END ||
- ev->type == PTL_EVENT_REPLY_END);
- break;
-
- case TX_TYPE_GET_RESPONSE:
- LASSERT (!ismsg);
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- ev->type == PTL_EVENT_SEND_END ||
- (ev->type == PTL_EVENT_ACK && tx->tx_acked));
- break;
- }
-
- if (ok) {
- kptllnd_peer_alive(peer);
- } else {
- CERROR("Portals error to %s: %s(%d) tx=%p fail=%s(%d) unlinked=%d\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_evtype2str(ev->type), ev->type,
- tx, kptllnd_errtype2str(ev->ni_fail_type),
- ev->ni_fail_type, unlinked);
- tx->tx_status = -EIO;
- kptllnd_peer_close(peer, -EIO);
- }
-
- if (!unlinked)
- return;
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- if (ismsg)
- tx->tx_msg_mdh = PTL_INVALID_HANDLE;
- else
- tx->tx_rdma_mdh = PTL_INVALID_HANDLE;
-
- if (!PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) ||
- !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) ||
- !tx->tx_active) {
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- return;
- }
-
- list_del(&tx->tx_list);
- tx->tx_active = 0;
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- /* drop peer's ref, but if it was the last one... */
- if (atomic_dec_and_test(&tx->tx_refcount)) {
- /* ...finalize it in thread context! */
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags);
-
- list_add_tail(&tx->tx_list, &kptllnd_data.kptl_sched_txq);
- wake_up(&kptllnd_data.kptl_sched_waitq);
-
- spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags);
- }
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-
-#include <config.h>
-
-#include <lnet/api-support.h>
-
-/* This ghastly hack to allows me to include lib-types.h It doesn't affect any
- * assertions generated here (but fails-safe if it ever does) */
-typedef struct {
- int counter;
-} atomic_t;
-
-#include <lnet/lib-types.h>
-#include <lnet/ptllnd_wire.h>
-
-#ifndef HAVE_STRNLEN
-#define strnlen(s, i) strlen(s)
-#endif
-
-#define BLANK_LINE() \
-do { \
- printf ("\n"); \
-} while (0)
-
-#define COMMENT(c) \
-do { \
- printf (" /* "c" */\n"); \
-} while (0)
-
-#undef STRINGIFY
-#define STRINGIFY(a) #a
-
-#define CHECK_DEFINE(a) \
-do { \
- printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \
-} while (0)
-
-#define CHECK_VALUE(a) \
-do { \
- printf (" CLASSERT ("#a" == %d);\n", a); \
-} while (0)
-
-#define CHECK_MEMBER_OFFSET(s,m) \
-do { \
- CHECK_VALUE((int)offsetof(s, m)); \
-} while (0)
-
-#define CHECK_MEMBER_SIZEOF(s,m) \
-do { \
- CHECK_VALUE((int)sizeof(((s *)0)->m)); \
-} while (0)
-
-#define CHECK_MEMBER(s,m) \
-do { \
- CHECK_MEMBER_OFFSET(s, m); \
- CHECK_MEMBER_SIZEOF(s, m); \
-} while (0)
-
-#define CHECK_STRUCT(s) \
-do { \
- BLANK_LINE (); \
- COMMENT ("Checks for struct "#s); \
- CHECK_VALUE((int)sizeof(s)); \
-} while (0)
-
-void
-system_string (char *cmdline, char *str, int len)
-{
- int fds[2];
- int rc;
- pid_t pid;
-
- rc = pipe (fds);
- if (rc != 0)
- abort ();
-
- pid = fork ();
- if (pid == 0) {
- /* child */
- int fd = fileno(stdout);
-
- rc = dup2(fds[1], fd);
- if (rc != fd)
- abort();
-
- exit(system(cmdline));
- /* notreached */
- } else if ((int)pid < 0) {
- abort();
- } else {
- FILE *f = fdopen (fds[0], "r");
-
- if (f == NULL)
- abort();
-
- close(fds[1]);
-
- if (fgets(str, len, f) == NULL)
- abort();
-
- if (waitpid(pid, &rc, 0) != pid)
- abort();
-
- if (!WIFEXITED(rc) ||
- WEXITSTATUS(rc) != 0)
- abort();
-
- if (strnlen(str, len) == len)
- str[len - 1] = 0;
-
- if (str[strlen(str) - 1] == '\n')
- str[strlen(str) - 1] = 0;
-
- fclose(f);
- }
-}
-
-int
-main (int argc, char **argv)
-{
- char unameinfo[80];
- char gccinfo[80];
-
- system_string("uname -a", unameinfo, sizeof(unameinfo));
- system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo));
-
- printf ("void kptllnd_assert_wire_constants (void)\n"
- "{\n"
- " /* Wire protocol assertions generated by 'wirecheck'\n"
- " * running on %s\n"
- " * with %s */\n"
- "\n", unameinfo, gccinfo);
-
- BLANK_LINE ();
-
- COMMENT ("Constants...");
- CHECK_DEFINE (PTL_RESERVED_MATCHBITS);
- CHECK_DEFINE (LNET_MSG_MATCHBITS);
-
- CHECK_DEFINE (PTLLND_MSG_MAGIC);
- CHECK_DEFINE (PTLLND_MSG_VERSION);
-
- CHECK_DEFINE (PTLLND_RDMA_OK);
- CHECK_DEFINE (PTLLND_RDMA_FAIL);
-
- CHECK_DEFINE (PTLLND_MSG_TYPE_INVALID);
- CHECK_DEFINE (PTLLND_MSG_TYPE_PUT);
- CHECK_DEFINE (PTLLND_MSG_TYPE_GET);
- CHECK_DEFINE (PTLLND_MSG_TYPE_IMMEDIATE);
- CHECK_DEFINE (PTLLND_MSG_TYPE_NOOP);
- CHECK_DEFINE (PTLLND_MSG_TYPE_HELLO);
- CHECK_DEFINE (PTLLND_MSG_TYPE_NAK);
-
- CHECK_STRUCT (kptl_msg_t);
- CHECK_MEMBER (kptl_msg_t, ptlm_magic);
- CHECK_MEMBER (kptl_msg_t, ptlm_version);
- CHECK_MEMBER (kptl_msg_t, ptlm_type);
- CHECK_MEMBER (kptl_msg_t, ptlm_credits);
- CHECK_MEMBER (kptl_msg_t, ptlm_nob);
- CHECK_MEMBER (kptl_msg_t, ptlm_cksum);
- CHECK_MEMBER (kptl_msg_t, ptlm_srcnid);
- CHECK_MEMBER (kptl_msg_t, ptlm_srcstamp);
- CHECK_MEMBER (kptl_msg_t, ptlm_dstnid);
- CHECK_MEMBER (kptl_msg_t, ptlm_dststamp);
- CHECK_MEMBER (kptl_msg_t, ptlm_srcpid);
- CHECK_MEMBER (kptl_msg_t, ptlm_dstpid);
- CHECK_MEMBER (kptl_msg_t, ptlm_u.immediate);
- CHECK_MEMBER (kptl_msg_t, ptlm_u.rdma);
- CHECK_MEMBER (kptl_msg_t, ptlm_u.hello);
-
- CHECK_STRUCT (kptl_immediate_msg_t);
- CHECK_MEMBER (kptl_immediate_msg_t, kptlim_hdr);
- CHECK_MEMBER (kptl_immediate_msg_t, kptlim_payload[13]);
-
- CHECK_STRUCT (kptl_rdma_msg_t);
- CHECK_MEMBER (kptl_rdma_msg_t, kptlrm_hdr);
- CHECK_MEMBER (kptl_rdma_msg_t, kptlrm_matchbits);
-
- CHECK_STRUCT (kptl_hello_msg_t);
- CHECK_MEMBER (kptl_hello_msg_t, kptlhm_matchbits);
- CHECK_MEMBER (kptl_hello_msg_t, kptlhm_max_msg_size);
-
- printf ("}\n\n");
-
- return (0);
-}
+++ /dev/null
-.deps
-Makefile
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.*.cmd
-.tmp_versions
-.depend
+++ /dev/null
-MODULES := kqswlnd
-kqswlnd-objs := qswlnd.o qswlnd_cb.o qswlnd_modparams.o
-
-EXTRA_POST_CFLAGS := @QSWCPPFLAGS@ -I/usr/include
-
-@INCLUDE_RULES@
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if BUILD_QSWLND
-modulenet_DATA = kqswlnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kqswlnd-objs:%.o=%.c) qswlnd.h
+++ /dev/null
- /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- * Copyright (C) 2002-2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.lustre.org
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "qswlnd.h"
-
-
-lnd_t the_kqswlnd =
-{
- .lnd_type = QSWLND,
- .lnd_startup = kqswnal_startup,
- .lnd_shutdown = kqswnal_shutdown,
- .lnd_ctl = kqswnal_ctl,
- .lnd_send = kqswnal_send,
- .lnd_recv = kqswnal_recv,
-};
-
-kqswnal_data_t kqswnal_data;
-
-int
-kqswnal_get_tx_desc (struct libcfs_ioctl_data *data)
-{
- unsigned long flags;
- struct list_head *tmp;
- kqswnal_tx_t *ktx;
- lnet_hdr_t *hdr;
- int index = data->ioc_count;
- int rc = -ENOENT;
-
- spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
-
- list_for_each (tmp, &kqswnal_data.kqn_activetxds) {
- if (index-- != 0)
- continue;
-
- ktx = list_entry (tmp, kqswnal_tx_t, ktx_list);
- hdr = (lnet_hdr_t *)ktx->ktx_buffer;
-
- data->ioc_count = le32_to_cpu(hdr->payload_length);
- data->ioc_nid = le64_to_cpu(hdr->dest_nid);
- data->ioc_u64[0] = ktx->ktx_nid;
- data->ioc_u32[0] = le32_to_cpu(hdr->type);
- data->ioc_u32[1] = ktx->ktx_launcher;
- data->ioc_flags = (list_empty (&ktx->ktx_schedlist) ? 0 : 1) |
- (ktx->ktx_state << 2);
- rc = 0;
- break;
- }
-
- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
- return (rc);
-}
-
-int
-kqswnal_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
-
- LASSERT (ni == kqswnal_data.kqn_ni);
-
- switch (cmd) {
- case IOC_LIBCFS_GET_TXDESC:
- return (kqswnal_get_tx_desc (data));
-
- case IOC_LIBCFS_REGISTER_MYNID:
- if (data->ioc_nid == ni->ni_nid)
- return 0;
-
- LASSERT (LNET_NIDNET(data->ioc_nid) == LNET_NIDNET(ni->ni_nid));
-
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID for %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- return 0;
-
- default:
- return (-EINVAL);
- }
-}
-
-void
-kqswnal_shutdown(lnet_ni_t *ni)
-{
- unsigned long flags;
- kqswnal_tx_t *ktx;
- kqswnal_rx_t *krx;
-
- CDEBUG (D_NET, "shutdown\n");
- LASSERT (ni->ni_data == &kqswnal_data);
- LASSERT (ni == kqswnal_data.kqn_ni);
-
- switch (kqswnal_data.kqn_init)
- {
- default:
- LASSERT (0);
-
- case KQN_INIT_ALL:
- case KQN_INIT_DATA:
- break;
- }
-
- /**********************************************************************/
- /* Signal the start of shutdown... */
- spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags);
- kqswnal_data.kqn_shuttingdown = 1;
- spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags);
-
- /**********************************************************************/
- /* wait for sends that have allocated a tx desc to launch or give up */
- while (atomic_read (&kqswnal_data.kqn_pending_txs) != 0) {
- CDEBUG(D_NET, "waiting for %d pending sends\n",
- atomic_read (&kqswnal_data.kqn_pending_txs));
- cfs_pause(cfs_time_seconds(1));
- }
-
- /**********************************************************************/
- /* close elan comms */
- /* Shut down receivers first; rx callbacks might try sending... */
- if (kqswnal_data.kqn_eprx_small != NULL)
- ep_free_rcvr (kqswnal_data.kqn_eprx_small);
-
- if (kqswnal_data.kqn_eprx_large != NULL)
- ep_free_rcvr (kqswnal_data.kqn_eprx_large);
-
- /* NB ep_free_rcvr() returns only after we've freed off all receive
- * buffers (see shutdown handling in kqswnal_requeue_rx()). This
- * means we must have completed any messages we passed to
- * lnet_parse() */
-
- if (kqswnal_data.kqn_eptx != NULL)
- ep_free_xmtr (kqswnal_data.kqn_eptx);
-
- /* NB ep_free_xmtr() returns only after all outstanding transmits
- * have called their callback... */
- LASSERT(list_empty(&kqswnal_data.kqn_activetxds));
-
- /**********************************************************************/
- /* flag threads to terminate, wake them and wait for them to die */
- kqswnal_data.kqn_shuttingdown = 2;
- wake_up_all (&kqswnal_data.kqn_sched_waitq);
-
- while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
- CDEBUG(D_NET, "waiting for %d threads to terminate\n",
- atomic_read (&kqswnal_data.kqn_nthreads));
- cfs_pause(cfs_time_seconds(1));
- }
-
- /**********************************************************************/
- /* No more threads. No more portals, router or comms callbacks!
- * I control the horizontals and the verticals...
- */
-
- LASSERT (list_empty (&kqswnal_data.kqn_readyrxds));
- LASSERT (list_empty (&kqswnal_data.kqn_donetxds));
- LASSERT (list_empty (&kqswnal_data.kqn_delayedtxds));
-
- /**********************************************************************/
- /* Unmap message buffers and free all descriptors and buffers
- */
-
- /* FTTB, we need to unmap any remaining mapped memory. When
- * ep_dvma_release() get fixed (and releases any mappings in the
- * region), we can delete all the code from here --------> */
-
- for (ktx = kqswnal_data.kqn_txds; ktx != NULL; ktx = ktx->ktx_alloclist) {
- /* If ktx has a buffer, it got mapped; unmap now. NB only
- * the pre-mapped stuff is still mapped since all tx descs
- * must be idle */
-
- if (ktx->ktx_buffer != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_tx_nmh,
- &ktx->ktx_ebuffer);
- }
-
- for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) {
- /* If krx_kiov[0].kiov_page got allocated, it got mapped.
- * NB subsequent pages get merged */
-
- if (krx->krx_kiov[0].kiov_page != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_rx_nmh,
- &krx->krx_elanbuffer);
- }
- /* <----------- to here */
-
- if (kqswnal_data.kqn_ep_rx_nmh != NULL)
- ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh);
-
- if (kqswnal_data.kqn_ep_tx_nmh != NULL)
- ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh);
-
- while (kqswnal_data.kqn_txds != NULL) {
- ktx = kqswnal_data.kqn_txds;
-
- if (ktx->ktx_buffer != NULL)
- LIBCFS_FREE(ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
-
- kqswnal_data.kqn_txds = ktx->ktx_alloclist;
- LIBCFS_FREE(ktx, sizeof(*ktx));
- }
-
- while (kqswnal_data.kqn_rxds != NULL) {
- int i;
-
- krx = kqswnal_data.kqn_rxds;
- for (i = 0; i < krx->krx_npages; i++)
- if (krx->krx_kiov[i].kiov_page != NULL)
- __free_page (krx->krx_kiov[i].kiov_page);
-
- kqswnal_data.kqn_rxds = krx->krx_alloclist;
- LIBCFS_FREE(krx, sizeof (*krx));
- }
-
- /* resets flags, pointers to NULL etc */
- memset(&kqswnal_data, 0, sizeof (kqswnal_data));
-
- CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&libcfs_kmemory));
-
- PORTAL_MODULE_UNUSE;
-}
-
-int
-kqswnal_startup (lnet_ni_t *ni)
-{
- EP_RAILMASK all_rails = EP_RAILMASK_ALL;
- int rc;
- int i;
- kqswnal_rx_t *krx;
- kqswnal_tx_t *ktx;
- int elan_page_idx;
-
- LASSERT (ni->ni_lnd == &the_kqswlnd);
-
-#if KQSW_CKSUM
- if (the_lnet.ln_ptlcompat != 0) {
- CERROR("Checksumming version not portals compatible\n");
- return -ENODEV;
- }
-#endif
- /* Only 1 instance supported */
- if (kqswnal_data.kqn_init != KQN_INIT_NOTHING) {
- CERROR ("Only 1 instance supported\n");
- return -EPERM;
- }
-
- if (ni->ni_interfaces[0] != NULL) {
- CERROR("Explicit interface config not supported\n");
- return -EPERM;
- }
-
- if (*kqswnal_tunables.kqn_credits >=
- *kqswnal_tunables.kqn_ntxmsgs) {
- LCONSOLE_ERROR_MSG(0x12e, "Configuration error: please set "
- "ntxmsgs(%d) > credits(%d)\n",
- *kqswnal_tunables.kqn_ntxmsgs,
- *kqswnal_tunables.kqn_credits);
- }
-
- CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&libcfs_kmemory));
-
- /* ensure all pointers NULL etc */
- memset (&kqswnal_data, 0, sizeof (kqswnal_data));
-
- kqswnal_data.kqn_ni = ni;
- ni->ni_data = &kqswnal_data;
- ni->ni_peertxcredits = *kqswnal_tunables.kqn_peercredits;
- ni->ni_maxtxcredits = *kqswnal_tunables.kqn_credits;
-
- INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
- INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
- spin_lock_init (&kqswnal_data.kqn_idletxd_lock);
-
- INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
- INIT_LIST_HEAD (&kqswnal_data.kqn_donetxds);
- INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
-
- spin_lock_init (&kqswnal_data.kqn_sched_lock);
- init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
-
- /* pointers/lists/locks initialised */
- kqswnal_data.kqn_init = KQN_INIT_DATA;
- PORTAL_MODULE_USE;
-
- kqswnal_data.kqn_ep = ep_system();
- if (kqswnal_data.kqn_ep == NULL) {
- CERROR("Can't initialise EKC\n");
- kqswnal_shutdown(ni);
- return (-ENODEV);
- }
-
- if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
- CERROR("Can't get elan ID\n");
- kqswnal_shutdown(ni);
- return (-ENODEV);
- }
-
- kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_ep);
- kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_ep);
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), kqswnal_data.kqn_elanid);
-
- /**********************************************************************/
- /* Get the transmitter */
-
- kqswnal_data.kqn_eptx = ep_alloc_xmtr (kqswnal_data.kqn_ep);
- if (kqswnal_data.kqn_eptx == NULL)
- {
- CERROR ("Can't allocate transmitter\n");
- kqswnal_shutdown (ni);
- return (-ENOMEM);
- }
-
- /**********************************************************************/
- /* Get the receivers */
-
- kqswnal_data.kqn_eprx_small =
- ep_alloc_rcvr (kqswnal_data.kqn_ep,
- EP_MSG_SVC_PORTALS_SMALL,
- *kqswnal_tunables.kqn_ep_envelopes_small);
- if (kqswnal_data.kqn_eprx_small == NULL)
- {
- CERROR ("Can't install small msg receiver\n");
- kqswnal_shutdown (ni);
- return (-ENOMEM);
- }
-
- kqswnal_data.kqn_eprx_large =
- ep_alloc_rcvr (kqswnal_data.kqn_ep,
- EP_MSG_SVC_PORTALS_LARGE,
- *kqswnal_tunables.kqn_ep_envelopes_large);
- if (kqswnal_data.kqn_eprx_large == NULL)
- {
- CERROR ("Can't install large msg receiver\n");
- kqswnal_shutdown (ni);
- return (-ENOMEM);
- }
-
- /**********************************************************************/
- /* Reserve Elan address space for transmit descriptors NB we may
- * either send the contents of associated buffers immediately, or
- * map them for the peer to suck/blow... */
- kqswnal_data.kqn_ep_tx_nmh =
- ep_dvma_reserve(kqswnal_data.kqn_ep,
- KQSW_NTXMSGPAGES*(*kqswnal_tunables.kqn_ntxmsgs),
- EP_PERM_WRITE);
- if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
- CERROR("Can't reserve tx dma space\n");
- kqswnal_shutdown(ni);
- return (-ENOMEM);
- }
-
- /**********************************************************************/
- /* Reserve Elan address space for receive buffers */
- kqswnal_data.kqn_ep_rx_nmh =
- ep_dvma_reserve(kqswnal_data.kqn_ep,
- KQSW_NRXMSGPAGES_SMALL *
- (*kqswnal_tunables.kqn_nrxmsgs_small) +
- KQSW_NRXMSGPAGES_LARGE *
- (*kqswnal_tunables.kqn_nrxmsgs_large),
- EP_PERM_WRITE);
- if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
- CERROR("Can't reserve rx dma space\n");
- kqswnal_shutdown(ni);
- return (-ENOMEM);
- }
-
- /**********************************************************************/
- /* Allocate/Initialise transmit descriptors */
-
- kqswnal_data.kqn_txds = NULL;
- for (i = 0; i < (*kqswnal_tunables.kqn_ntxmsgs); i++)
- {
- int premapped_pages;
- int basepage = i * KQSW_NTXMSGPAGES;
-
- LIBCFS_ALLOC (ktx, sizeof(*ktx));
- if (ktx == NULL) {
- kqswnal_shutdown (ni);
- return (-ENOMEM);
- }
-
- memset(ktx, 0, sizeof(*ktx)); /* NULL pointers; zero flags */
- ktx->ktx_alloclist = kqswnal_data.kqn_txds;
- kqswnal_data.kqn_txds = ktx;
-
- LIBCFS_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
- if (ktx->ktx_buffer == NULL)
- {
- kqswnal_shutdown (ni);
- return (-ENOMEM);
- }
-
- /* Map pre-allocated buffer NOW, to save latency on transmit */
- premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
- KQSW_TX_BUFFER_SIZE);
- ep_dvma_load(kqswnal_data.kqn_ep, NULL,
- ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
- kqswnal_data.kqn_ep_tx_nmh, basepage,
- &all_rails, &ktx->ktx_ebuffer);
-
- ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
- ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
-
- INIT_LIST_HEAD (&ktx->ktx_schedlist);
-
- ktx->ktx_state = KTX_IDLE;
- ktx->ktx_rail = -1; /* unset rail */
-
- list_add_tail (&ktx->ktx_list, &kqswnal_data.kqn_idletxds);
- }
-
- /**********************************************************************/
- /* Allocate/Initialise receive descriptors */
- kqswnal_data.kqn_rxds = NULL;
- elan_page_idx = 0;
- for (i = 0; i < *kqswnal_tunables.kqn_nrxmsgs_small + *kqswnal_tunables.kqn_nrxmsgs_large; i++)
- {
- EP_NMD elanbuffer;
- int j;
-
- LIBCFS_ALLOC(krx, sizeof(*krx));
- if (krx == NULL) {
- kqswnal_shutdown(ni);
- return (-ENOMEM);
- }
-
- memset(krx, 0, sizeof(*krx)); /* clear flags, null pointers etc */
- krx->krx_alloclist = kqswnal_data.kqn_rxds;
- kqswnal_data.kqn_rxds = krx;
-
- if (i < *kqswnal_tunables.kqn_nrxmsgs_small)
- {
- krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
- krx->krx_eprx = kqswnal_data.kqn_eprx_small;
- }
- else
- {
- krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
- krx->krx_eprx = kqswnal_data.kqn_eprx_large;
- }
-
- LASSERT (krx->krx_npages > 0);
- for (j = 0; j < krx->krx_npages; j++)
- {
- struct page *page = alloc_page(GFP_KERNEL);
-
- if (page == NULL) {
- kqswnal_shutdown (ni);
- return (-ENOMEM);
- }
-
- krx->krx_kiov[j] = (lnet_kiov_t) {.kiov_page = page,
- .kiov_offset = 0,
- .kiov_len = PAGE_SIZE};
- LASSERT(page_address(page) != NULL);
-
- ep_dvma_load(kqswnal_data.kqn_ep, NULL,
- page_address(page),
- PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
- elan_page_idx, &all_rails, &elanbuffer);
-
- if (j == 0) {
- krx->krx_elanbuffer = elanbuffer;
- } else {
- rc = ep_nmd_merge(&krx->krx_elanbuffer,
- &krx->krx_elanbuffer,
- &elanbuffer);
- /* NB contiguous mapping */
- LASSERT(rc);
- }
- elan_page_idx++;
-
- }
- }
- LASSERT (elan_page_idx ==
- (*kqswnal_tunables.kqn_nrxmsgs_small * KQSW_NRXMSGPAGES_SMALL) +
- (*kqswnal_tunables.kqn_nrxmsgs_large * KQSW_NRXMSGPAGES_LARGE));
-
- /**********************************************************************/
- /* Queue receives, now that it's OK to run their completion callbacks */
-
- for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) {
- /* NB this enqueue can allocate/sleep (attr == 0) */
- krx->krx_state = KRX_POSTED;
- rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
- &krx->krx_elanbuffer, 0);
- if (rc != EP_SUCCESS) {
- CERROR ("failed ep_queue_receive %d\n", rc);
- kqswnal_shutdown (ni);
- return (-EIO);
- }
- }
-
- /**********************************************************************/
- /* Spawn scheduling threads */
- for (i = 0; i < num_online_cpus(); i++) {
- rc = kqswnal_thread_start (kqswnal_scheduler, NULL);
- if (rc != 0)
- {
- CERROR ("failed to spawn scheduling thread: %d\n", rc);
- kqswnal_shutdown (ni);
- return (-ESRCH);
- }
- }
-
- kqswnal_data.kqn_init = KQN_INIT_ALL;
- return (0);
-}
-
-void __exit
-kqswnal_finalise (void)
-{
- lnet_unregister_lnd(&the_kqswlnd);
- kqswnal_tunables_fini();
-}
-
-static int __init
-kqswnal_initialise (void)
-{
- int rc = kqswnal_tunables_init();
-
- if (rc != 0)
- return rc;
-
- lnet_register_lnd(&the_kqswlnd);
- return (0);
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel Quadrics/Elan LND v1.01");
-MODULE_LICENSE("GPL");
-
-module_init (kqswnal_initialise);
-module_exit (kqswnal_finalise);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines.
- *
- */
-
-#ifndef _QSWNAL_H
-#define _QSWNAL_H
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <qsnet/kernel.h>
-#undef printf /* nasty QSW #define */
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-
-#include <elan/epcomms.h>
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/locks.h> /* wait_on_buffer */
-#else
-#include <linux/buffer_head.h> /* wait_on_buffer */
-#endif
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/sysctl.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-
-/* fixed constants */
-#define KQSW_SMALLMSG (4<<10) /* small/large ep receiver breakpoint */
-#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */
-
-#define KQSW_CKSUM 0 /* enable checksumming (protocol incompatible) */
-
-/*
- * derived constants
- */
-
-#define KQSW_TX_BUFFER_SIZE (offsetof(kqswnal_msg_t, \
- kqm_u.immediate.kqim_payload[*kqswnal_tunables.kqn_tx_maxcontig]))
-/* The pre-allocated tx buffer (hdr + small payload) */
-
-#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(LNET_MAX_PAYLOAD) + 1)
-/* Reserve elan address space for pre-allocated and pre-mapped transmit
- * buffer and a full payload too. Extra pages allow for page alignment */
-
-#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_SMALLMSG))
-/* receive hdr/payload always contiguous and page aligned */
-#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE)
-
-#define KQSW_NRXMSGPAGES_LARGE (btopr(sizeof(lnet_msg_t) + LNET_MAX_PAYLOAD))
-/* receive hdr/payload always contiguous and page aligned */
-#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE)
-/* biggest complete packet we can receive (or transmit) */
-
-/* Wire messages */
-/* Remote memory descriptor */
-typedef struct
-{
- __u32 kqrmd_nfrag; /* # frags */
- EP_NMD kqrmd_frag[0]; /* actual frags */
-} kqswnal_remotemd_t;
-
-/* Immediate data */
-typedef struct
-{
- lnet_hdr_t kqim_hdr; /* LNET header */
- char kqim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR kqswnal_immediate_msg_t;
-
-/* RDMA request */
-typedef struct
-{
- lnet_hdr_t kqrm_hdr; /* LNET header */
- kqswnal_remotemd_t kqrm_rmd; /* peer's buffer */
-} WIRE_ATTR kqswnal_rdma_msg_t;
-
-typedef struct
-{
- __u32 kqm_magic; /* I'm a qswlnd message */
- __u16 kqm_version; /* this is my version number */
- __u16 kqm_type; /* msg type */
-#if KQSW_CKSUM
- __u32 kqm_cksum; /* crc32 checksum */
- __u32 kqm_nob; /* original msg length */
-#endif
- union {
- kqswnal_immediate_msg_t immediate;
- kqswnal_rdma_msg_t rdma;
- } WIRE_ATTR kqm_u;
-} WIRE_ATTR kqswnal_msg_t;
-
-#if KQSW_CKSUM /* enable checksums ? */
-# include <linux/crc32.h>
-static inline __u32 kqswnal_csum(__u32 crc, unsigned char const *p, size_t len)
-{
-#if 1
- return crc32_le(crc, p, len);
-#else
- while (len-- > 0)
- crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ;
- return crc;
-#endif
-}
-# define QSWLND_PROTO_VERSION 0xbeef
-#else
-# define QSWLND_PROTO_VERSION 1
-#endif
-
-#define QSWLND_MSG_IMMEDIATE 0
-#define QSWLND_MSG_RDMA 1
-
-typedef union {
- EP_STATUSBLK ep_statusblk;
- struct {
- __u32 status;
- __u32 magic;
- __u32 version;
- union {
- struct {
- __u32 len;
- __u32 cksum;
- } WIRE_ATTR get;
- } WIRE_ATTR u;
- } WIRE_ATTR msg;
-} kqswnal_rpc_reply_t;
-
-typedef struct kqswnal_rx
-{
- struct list_head krx_list; /* enqueue -> thread */
- struct kqswnal_rx *krx_alloclist; /* stack in kqn_rxds */
- EP_RCVR *krx_eprx; /* port to post receives to */
- EP_RXD *krx_rxd; /* receive descriptor (for repost) */
- EP_NMD krx_elanbuffer; /* contiguous Elan buffer */
- int krx_npages; /* # pages in receive buffer */
- int krx_nob; /* Number Of Bytes received into buffer */
- int krx_rpc_reply_needed:1; /* peer waiting for EKC RPC reply */
- int krx_raw_lnet_hdr:1; /* msg is a raw lnet hdr (portals compatible) */
- int krx_state; /* what this RX is doing */
- atomic_t krx_refcount; /* how to tell when rpc is done */
-#if KQSW_CKSUM
- __u32 krx_cksum; /* checksum */
-#endif
- kqswnal_rpc_reply_t krx_rpc_reply; /* rpc reply status block */
- lnet_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
-} kqswnal_rx_t;
-
-#define KRX_POSTED 1 /* receiving */
-#define KRX_PARSE 2 /* ready to be parsed */
-#define KRX_COMPLETING 3 /* waiting to be completed */
-
-
-typedef struct kqswnal_tx
-{
- struct list_head ktx_list; /* enqueue idle/active */
- struct list_head ktx_schedlist; /* enqueue on scheduler */
- struct kqswnal_tx *ktx_alloclist; /* stack in kqn_txds */
- unsigned int ktx_state:7; /* What I'm doing */
- unsigned int ktx_firsttmpfrag:1; /* ktx_frags[0] is in my ebuffer ? 0 : 1 */
- __u32 ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */
- int ktx_npages; /* pages reserved for mapping messages */
- int ktx_nmappedpages; /* # pages mapped for current message */
- int ktx_port; /* destination ep port */
- lnet_nid_t ktx_nid; /* destination node */
- void *ktx_args[3]; /* completion passthru */
- char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */
- cfs_time_t ktx_launchtime; /* when (in jiffies) the transmit
- * was launched */
- int ktx_status; /* completion status */
-#if KQSW_CKSUM
- __u32 ktx_cksum; /* optimized GET payload checksum */
-#endif
- /* debug/info fields */
- pid_t ktx_launcher; /* pid of launching process */
-
- int ktx_nfrag; /* # message frags */
- int ktx_rail; /* preferred rail */
- EP_NMD ktx_ebuffer; /* elan mapping of ktx_buffer */
- EP_NMD ktx_frags[EP_MAXFRAG];/* elan mapping of msg frags */
-} kqswnal_tx_t;
-
-#define KTX_IDLE 0 /* on kqn_idletxds */
-#define KTX_SENDING 1 /* normal send */
-#define KTX_GETTING 2 /* sending optimised get */
-#define KTX_PUTTING 3 /* sending optimised put */
-#define KTX_RDMA_FETCH 4 /* handling optimised put */
-#define KTX_RDMA_STORE 5 /* handling optimised get */
-
-typedef struct
-{
- int *kqn_tx_maxcontig; /* maximum payload to defrag */
- int *kqn_ntxmsgs; /* # normal tx msgs */
- int *kqn_credits; /* # concurrent sends */
- int *kqn_peercredits; /* # concurrent sends to 1 peer */
- int *kqn_nrxmsgs_large; /* # 'large' rx msgs */
- int *kqn_ep_envelopes_large; /* # 'large' rx ep envelopes */
- int *kqn_nrxmsgs_small; /* # 'small' rx msgs */
- int *kqn_ep_envelopes_small; /* # 'small' rx ep envelopes */
- int *kqn_optimized_puts; /* optimized PUTs? */
- int *kqn_optimized_gets; /* optimized GETs? */
-#if KQSW_CKSUM
- int *kqn_inject_csum_error; /* # csum errors to inject */
-#endif
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *kqn_sysctl; /* sysctl interface */
-#endif
-} kqswnal_tunables_t;
-
-typedef struct
-{
- char kqn_init; /* what's been initialised */
- char kqn_shuttingdown; /* I'm trying to shut down */
- atomic_t kqn_nthreads; /* # threads running */
- lnet_ni_t *kqn_ni; /* _the_ instance of me */
-
- kqswnal_rx_t *kqn_rxds; /* stack of all the receive descriptors */
- kqswnal_tx_t *kqn_txds; /* stack of all the transmit descriptors */
-
- struct list_head kqn_idletxds; /* transmit descriptors free to use */
- struct list_head kqn_activetxds; /* transmit descriptors being used */
- spinlock_t kqn_idletxd_lock; /* serialise idle txd access */
- atomic_t kqn_pending_txs; /* # transmits being prepped */
-
- spinlock_t kqn_sched_lock; /* serialise packet schedulers */
- wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */
-
- struct list_head kqn_readyrxds; /* rxds full of data */
- struct list_head kqn_donetxds; /* completed transmits */
- struct list_head kqn_delayedtxds; /* delayed transmits */
-
- EP_SYS *kqn_ep; /* elan system */
- EP_NMH *kqn_ep_tx_nmh; /* elan reserved tx vaddrs */
- EP_NMH *kqn_ep_rx_nmh; /* elan reserved rx vaddrs */
- EP_XMTR *kqn_eptx; /* elan transmitter */
- EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */
- EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */
-
- int kqn_nnodes; /* this cluster's size */
- int kqn_elanid; /* this nodes's elan ID */
-
- EP_STATUSBLK kqn_rpc_success; /* preset RPC reply status blocks */
- EP_STATUSBLK kqn_rpc_failed;
- EP_STATUSBLK kqn_rpc_version; /* reply to future version query */
- EP_STATUSBLK kqn_rpc_magic; /* reply to future version query */
-} kqswnal_data_t;
-
-/* kqn_init state */
-#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */
-#define KQN_INIT_DATA 1
-#define KQN_INIT_ALL 2
-
-extern kqswnal_tunables_t kqswnal_tunables;
-extern kqswnal_data_t kqswnal_data;
-
-extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
-extern void kqswnal_rxhandler(EP_RXD *rxd);
-extern int kqswnal_scheduler (void *);
-extern void kqswnal_rx_done (kqswnal_rx_t *krx);
-
-static inline lnet_nid_t
-kqswnal_elanid2nid (int elanid)
-{
- return LNET_MKNID(LNET_NIDNET(kqswnal_data.kqn_ni->ni_nid), elanid);
-}
-
-static inline int
-kqswnal_nid2elanid (lnet_nid_t nid)
-{
- __u32 elanid = LNET_NIDADDR(nid);
-
- /* not in this cluster? */
- return (elanid >= kqswnal_data.kqn_nnodes) ? -1 : elanid;
-}
-
-static inline lnet_nid_t
-kqswnal_rx_nid(kqswnal_rx_t *krx)
-{
- return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)));
-}
-
-static inline int
-kqswnal_pages_spanned (void *base, int nob)
-{
- unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT;
- unsigned long last_page = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT;
-
- LASSERT (last_page >= first_page); /* can't wrap address space */
- return (last_page - first_page + 1);
-}
-
-static inline void kqswnal_rx_decref (kqswnal_rx_t *krx)
-{
- LASSERT (atomic_read (&krx->krx_refcount) > 0);
- if (atomic_dec_and_test (&krx->krx_refcount))
- kqswnal_rx_done(krx);
-}
-
-int kqswnal_startup (lnet_ni_t *ni);
-void kqswnal_shutdown (lnet_ni_t *ni);
-int kqswnal_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg);
-int kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kqswnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-
-int kqswnal_tunables_init(void);
-void kqswnal_tunables_fini(void);
-
-#endif /* _QSWNAL_H */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.lustre.org
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "qswlnd.h"
-
-void
-kqswnal_notify_peer_down(kqswnal_tx_t *ktx)
-{
- time_t then;
-
- then = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() -
- ktx->ktx_launchtime);
-
- lnet_notify(kqswnal_data.kqn_ni, ktx->ktx_nid, 0, then);
-}
-
-void
-kqswnal_unmap_tx (kqswnal_tx_t *ktx)
-{
- int i;
-
- ktx->ktx_rail = -1; /* unset rail */
-
- if (ktx->ktx_nmappedpages == 0)
- return;
-
- CDEBUG(D_NET, "%p unloading %d frags starting at %d\n",
- ktx, ktx->ktx_nfrag, ktx->ktx_firsttmpfrag);
-
- for (i = ktx->ktx_firsttmpfrag; i < ktx->ktx_nfrag; i++)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_tx_nmh,
- &ktx->ktx_frags[i]);
-
- ktx->ktx_nmappedpages = 0;
-}
-
-int
-kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int offset, int nob,
- unsigned int niov, lnet_kiov_t *kiov)
-{
- int nfrags = ktx->ktx_nfrag;
- int nmapped = ktx->ktx_nmappedpages;
- int maxmapped = ktx->ktx_npages;
- __u32 basepage = ktx->ktx_basepage + nmapped;
- char *ptr;
-
- EP_RAILMASK railmask;
- int rail;
-
- if (ktx->ktx_rail < 0)
- ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx,
- EP_RAILMASK_ALL,
- kqswnal_nid2elanid(ktx->ktx_nid));
- rail = ktx->ktx_rail;
- if (rail < 0) {
- CERROR("No rails available for %s\n", libcfs_nid2str(ktx->ktx_nid));
- return (-ENETDOWN);
- }
- railmask = 1 << rail;
-
- LASSERT (nmapped <= maxmapped);
- LASSERT (nfrags >= ktx->ktx_firsttmpfrag);
- LASSERT (nfrags <= EP_MAXFRAG);
- LASSERT (niov > 0);
- LASSERT (nob > 0);
-
- /* skip complete frags before 'offset' */
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- kiov++;
- niov--;
- LASSERT (niov > 0);
- }
-
- do {
- int fraglen = kiov->kiov_len - offset;
-
- /* each page frag is contained in one page */
- LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
-
- if (fraglen > nob)
- fraglen = nob;
-
- nmapped++;
- if (nmapped > maxmapped) {
- CERROR("Can't map message in %d pages (max %d)\n",
- nmapped, maxmapped);
- return (-EMSGSIZE);
- }
-
- if (nfrags == EP_MAXFRAG) {
- CERROR("Message too fragmented in Elan VM (max %d frags)\n",
- EP_MAXFRAG);
- return (-EMSGSIZE);
- }
-
- /* XXX this is really crap, but we'll have to kmap until
- * EKC has a page (rather than vaddr) mapping interface */
-
- ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
-
- CDEBUG(D_NET,
- "%p[%d] loading %p for %d, page %d, %d total\n",
- ktx, nfrags, ptr, fraglen, basepage, nmapped);
-
- ep_dvma_load(kqswnal_data.kqn_ep, NULL,
- ptr, fraglen,
- kqswnal_data.kqn_ep_tx_nmh, basepage,
- &railmask, &ktx->ktx_frags[nfrags]);
-
- if (nfrags == ktx->ktx_firsttmpfrag ||
- !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1],
- &ktx->ktx_frags[nfrags - 1],
- &ktx->ktx_frags[nfrags])) {
- /* new frag if this is the first or can't merge */
- nfrags++;
- }
-
- kunmap (kiov->kiov_page);
-
- /* keep in loop for failure case */
- ktx->ktx_nmappedpages = nmapped;
-
- basepage++;
- kiov++;
- niov--;
- nob -= fraglen;
- offset = 0;
-
- /* iov must not run out before end of data */
- LASSERT (nob == 0 || niov > 0);
-
- } while (nob > 0);
-
- ktx->ktx_nfrag = nfrags;
- CDEBUG (D_NET, "%p got %d frags over %d pages\n",
- ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages);
-
- return (0);
-}
-
-#if KQSW_CKSUM
-__u32
-kqswnal_csum_kiov (__u32 csum, int offset, int nob,
- unsigned int niov, lnet_kiov_t *kiov)
-{
- char *ptr;
-
- if (nob == 0)
- return csum;
-
- LASSERT (niov > 0);
- LASSERT (nob > 0);
-
- /* skip complete frags before 'offset' */
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- kiov++;
- niov--;
- LASSERT (niov > 0);
- }
-
- do {
- int fraglen = kiov->kiov_len - offset;
-
- /* each page frag is contained in one page */
- LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
-
- if (fraglen > nob)
- fraglen = nob;
-
- ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
-
- csum = kqswnal_csum(csum, ptr, fraglen);
-
- kunmap (kiov->kiov_page);
-
- kiov++;
- niov--;
- nob -= fraglen;
- offset = 0;
-
- /* iov must not run out before end of data */
- LASSERT (nob == 0 || niov > 0);
-
- } while (nob > 0);
-
- return csum;
-}
-#endif
-
-int
-kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int offset, int nob,
- unsigned int niov, struct iovec *iov)
-{
- int nfrags = ktx->ktx_nfrag;
- int nmapped = ktx->ktx_nmappedpages;
- int maxmapped = ktx->ktx_npages;
- __u32 basepage = ktx->ktx_basepage + nmapped;
-
- EP_RAILMASK railmask;
- int rail;
-
- if (ktx->ktx_rail < 0)
- ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx,
- EP_RAILMASK_ALL,
- kqswnal_nid2elanid(ktx->ktx_nid));
- rail = ktx->ktx_rail;
- if (rail < 0) {
- CERROR("No rails available for %s\n", libcfs_nid2str(ktx->ktx_nid));
- return (-ENETDOWN);
- }
- railmask = 1 << rail;
-
- LASSERT (nmapped <= maxmapped);
- LASSERT (nfrags >= ktx->ktx_firsttmpfrag);
- LASSERT (nfrags <= EP_MAXFRAG);
- LASSERT (niov > 0);
- LASSERT (nob > 0);
-
- /* skip complete frags before offset */
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- iov++;
- niov--;
- LASSERT (niov > 0);
- }
-
- do {
- int fraglen = iov->iov_len - offset;
- long npages;
-
- if (fraglen > nob)
- fraglen = nob;
- npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
-
- nmapped += npages;
- if (nmapped > maxmapped) {
- CERROR("Can't map message in %d pages (max %d)\n",
- nmapped, maxmapped);
- return (-EMSGSIZE);
- }
-
- if (nfrags == EP_MAXFRAG) {
- CERROR("Message too fragmented in Elan VM (max %d frags)\n",
- EP_MAXFRAG);
- return (-EMSGSIZE);
- }
-
- CDEBUG(D_NET,
- "%p[%d] loading %p for %d, pages %d for %ld, %d total\n",
- ktx, nfrags, iov->iov_base + offset, fraglen,
- basepage, npages, nmapped);
-
- ep_dvma_load(kqswnal_data.kqn_ep, NULL,
- iov->iov_base + offset, fraglen,
- kqswnal_data.kqn_ep_tx_nmh, basepage,
- &railmask, &ktx->ktx_frags[nfrags]);
-
- if (nfrags == ktx->ktx_firsttmpfrag ||
- !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1],
- &ktx->ktx_frags[nfrags - 1],
- &ktx->ktx_frags[nfrags])) {
- /* new frag if this is the first or can't merge */
- nfrags++;
- }
-
- /* keep in loop for failure case */
- ktx->ktx_nmappedpages = nmapped;
-
- basepage += npages;
- iov++;
- niov--;
- nob -= fraglen;
- offset = 0;
-
- /* iov must not run out before end of data */
- LASSERT (nob == 0 || niov > 0);
-
- } while (nob > 0);
-
- ktx->ktx_nfrag = nfrags;
- CDEBUG (D_NET, "%p got %d frags over %d pages\n",
- ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages);
-
- return (0);
-}
-
-#if KQSW_CKSUM
-__u32
-kqswnal_csum_iov (__u32 csum, int offset, int nob,
- unsigned int niov, struct iovec *iov)
-{
- if (nob == 0)
- return csum;
-
- LASSERT (niov > 0);
- LASSERT (nob > 0);
-
- /* skip complete frags before offset */
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- iov++;
- niov--;
- LASSERT (niov > 0);
- }
-
- do {
- int fraglen = iov->iov_len - offset;
-
- if (fraglen > nob)
- fraglen = nob;
-
- csum = kqswnal_csum(csum, iov->iov_base + offset, fraglen);
-
- iov++;
- niov--;
- nob -= fraglen;
- offset = 0;
-
- /* iov must not run out before end of data */
- LASSERT (nob == 0 || niov > 0);
-
- } while (nob > 0);
-
- return csum;
-}
-#endif
-
-void
-kqswnal_put_idle_tx (kqswnal_tx_t *ktx)
-{
- unsigned long flags;
-
- kqswnal_unmap_tx (ktx); /* release temporary mappings */
- ktx->ktx_state = KTX_IDLE;
-
- spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
-
- list_del (&ktx->ktx_list); /* take off active list */
- list_add (&ktx->ktx_list, &kqswnal_data.kqn_idletxds);
-
- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
-}
-
-kqswnal_tx_t *
-kqswnal_get_idle_tx (void)
-{
- unsigned long flags;
- kqswnal_tx_t *ktx;
-
- spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
-
- if (kqswnal_data.kqn_shuttingdown ||
- list_empty (&kqswnal_data.kqn_idletxds)) {
- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
-
- return NULL;
- }
-
- ktx = list_entry (kqswnal_data.kqn_idletxds.next, kqswnal_tx_t, ktx_list);
- list_del (&ktx->ktx_list);
-
- list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds);
- ktx->ktx_launcher = current->pid;
- atomic_inc(&kqswnal_data.kqn_pending_txs);
-
- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
-
- /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */
- LASSERT (ktx->ktx_nmappedpages == 0);
- return (ktx);
-}
-
-void
-kqswnal_tx_done_in_thread_context (kqswnal_tx_t *ktx)
-{
- lnet_msg_t *lnetmsg0 = NULL;
- lnet_msg_t *lnetmsg1 = NULL;
- int status0 = 0;
- int status1 = 0;
- kqswnal_rx_t *krx;
-
- LASSERT (!in_interrupt());
-
- if (ktx->ktx_status == -EHOSTDOWN)
- kqswnal_notify_peer_down(ktx);
-
- switch (ktx->ktx_state) {
- case KTX_RDMA_FETCH: /* optimized PUT/REPLY handled */
- krx = (kqswnal_rx_t *)ktx->ktx_args[0];
- lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1];
- status0 = ktx->ktx_status;
-#if KQSW_CKSUM
- if (status0 == 0) { /* RDMA succeeded */
- kqswnal_msg_t *msg;
- __u32 csum;
-
- msg = (kqswnal_msg_t *)
- page_address(krx->krx_kiov[0].kiov_page);
-
- csum = (lnetmsg0->msg_kiov != NULL) ?
- kqswnal_csum_kiov(krx->krx_cksum,
- lnetmsg0->msg_offset,
- lnetmsg0->msg_wanted,
- lnetmsg0->msg_niov,
- lnetmsg0->msg_kiov) :
- kqswnal_csum_iov(krx->krx_cksum,
- lnetmsg0->msg_offset,
- lnetmsg0->msg_wanted,
- lnetmsg0->msg_niov,
- lnetmsg0->msg_iov);
-
- /* Can only check csum if I got it all */
- if (lnetmsg0->msg_wanted == lnetmsg0->msg_len &&
- csum != msg->kqm_cksum) {
- ktx->ktx_status = -EIO;
- krx->krx_rpc_reply.msg.status = -EIO;
- CERROR("RDMA checksum failed %u(%u) from %s\n",
- csum, msg->kqm_cksum,
- libcfs_nid2str(kqswnal_rx_nid(krx)));
- }
- }
-#endif
- LASSERT (krx->krx_state == KRX_COMPLETING);
- kqswnal_rx_decref (krx);
- break;
-
- case KTX_RDMA_STORE: /* optimized GET handled */
- case KTX_PUTTING: /* optimized PUT sent */
- case KTX_SENDING: /* normal send */
- lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1];
- status0 = ktx->ktx_status;
- break;
-
- case KTX_GETTING: /* optimized GET sent & payload received */
- /* Complete the GET with success since we can't avoid
- * delivering a REPLY event; we committed to it when we
- * launched the GET */
- lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1];
- status0 = 0;
- lnetmsg1 = (lnet_msg_t *)ktx->ktx_args[2];
- status1 = ktx->ktx_status;
-#if KQSW_CKSUM
- if (status1 == 0) { /* RDMA succeeded */
- lnet_msg_t *lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1];
- lnet_libmd_t *md = lnetmsg0->msg_md;
- __u32 csum;
-
- csum = ((md->md_options & LNET_MD_KIOV) != 0) ?
- kqswnal_csum_kiov(~0, 0,
- md->md_length,
- md->md_niov,
- md->md_iov.kiov) :
- kqswnal_csum_iov(~0, 0,
- md->md_length,
- md->md_niov,
- md->md_iov.iov);
-
- if (csum != ktx->ktx_cksum) {
- CERROR("RDMA checksum failed %u(%u) from %s\n",
- csum, ktx->ktx_cksum,
- libcfs_nid2str(ktx->ktx_nid));
- status1 = -EIO;
- }
- }
-#endif
- break;
-
- default:
- LASSERT (0);
- }
-
- kqswnal_put_idle_tx (ktx);
-
- lnet_finalize (kqswnal_data.kqn_ni, lnetmsg0, status0);
- if (lnetmsg1 != NULL)
- lnet_finalize (kqswnal_data.kqn_ni, lnetmsg1, status1);
-}
-
-void
-kqswnal_tx_done (kqswnal_tx_t *ktx, int status)
-{
- unsigned long flags;
-
- ktx->ktx_status = status;
-
- if (!in_interrupt()) {
- kqswnal_tx_done_in_thread_context(ktx);
- return;
- }
-
- /* Complete the send in thread context */
- spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
-
- list_add_tail(&ktx->ktx_schedlist,
- &kqswnal_data.kqn_donetxds);
- wake_up(&kqswnal_data.kqn_sched_waitq);
-
- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags);
-}
-
-static void
-kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
-{
- kqswnal_tx_t *ktx = (kqswnal_tx_t *)arg;
- kqswnal_rpc_reply_t *reply;
-
- LASSERT (txd != NULL);
- LASSERT (ktx != NULL);
-
- CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status);
-
- if (status != EP_SUCCESS) {
-
- CDEBUG (D_NETERROR, "Tx completion to %s failed: %d\n",
- libcfs_nid2str(ktx->ktx_nid), status);
-
- status = -EHOSTDOWN;
-
- } else switch (ktx->ktx_state) {
-
- case KTX_GETTING:
- case KTX_PUTTING:
- /* RPC complete! */
- reply = (kqswnal_rpc_reply_t *)ep_txd_statusblk(txd);
- if (reply->msg.magic == 0) { /* "old" peer */
- status = reply->msg.status;
- break;
- }
-
- if (reply->msg.magic != LNET_PROTO_QSW_MAGIC) {
- if (reply->msg.magic != swab32(LNET_PROTO_QSW_MAGIC)) {
- CERROR("%s unexpected rpc reply magic %08x\n",
- libcfs_nid2str(ktx->ktx_nid),
- reply->msg.magic);
- status = -EPROTO;
- break;
- }
-
- __swab32s(&reply->msg.status);
- __swab32s(&reply->msg.version);
-
- if (ktx->ktx_state == KTX_GETTING) {
- __swab32s(&reply->msg.u.get.len);
- __swab32s(&reply->msg.u.get.cksum);
- }
- }
-
- status = reply->msg.status;
- if (status != 0) {
- CERROR("%s RPC status %08x\n",
- libcfs_nid2str(ktx->ktx_nid), status);
- break;
- }
-
- if (ktx->ktx_state == KTX_GETTING) {
- lnet_set_reply_msg_len(kqswnal_data.kqn_ni,
- (lnet_msg_t *)ktx->ktx_args[2],
- reply->msg.u.get.len);
-#if KQSW_CKSUM
- ktx->ktx_cksum = reply->msg.u.get.cksum;
-#endif
- }
- break;
-
- case KTX_SENDING:
- status = 0;
- break;
-
- default:
- LBUG();
- break;
- }
-
- kqswnal_tx_done(ktx, status);
-}
-
-int
-kqswnal_launch (kqswnal_tx_t *ktx)
-{
- /* Don't block for transmit descriptor if we're in interrupt context */
- int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0;
- int dest = kqswnal_nid2elanid (ktx->ktx_nid);
- unsigned long flags;
- int rc;
-
- ktx->ktx_launchtime = cfs_time_current();
-
- if (kqswnal_data.kqn_shuttingdown)
- return (-ESHUTDOWN);
-
- LASSERT (dest >= 0); /* must be a peer */
-
- if (ktx->ktx_nmappedpages != 0)
- attr = EP_SET_PREFRAIL(attr, ktx->ktx_rail);
-
- switch (ktx->ktx_state) {
- case KTX_GETTING:
- case KTX_PUTTING:
- if (the_lnet.ln_testprotocompat != 0 &&
- the_lnet.ln_ptlcompat == 0) {
- kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer;
-
- /* single-shot proto test:
- * Future version queries will use an RPC, so I'll
- * co-opt one of the existing ones */
- LNET_LOCK();
- if ((the_lnet.ln_testprotocompat & 1) != 0) {
- msg->kqm_version++;
- the_lnet.ln_testprotocompat &= ~1;
- }
- if ((the_lnet.ln_testprotocompat & 2) != 0) {
- msg->kqm_magic = LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~2;
- }
- LNET_UNLOCK();
- }
-
- /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t.
- * The other frags are the payload, awaiting RDMA */
- rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
- ktx->ktx_port, attr,
- kqswnal_txhandler, ktx,
- NULL, ktx->ktx_frags, 1);
- break;
-
- case KTX_SENDING:
- rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest,
- ktx->ktx_port, attr,
- kqswnal_txhandler, ktx,
- NULL, ktx->ktx_frags, ktx->ktx_nfrag);
- break;
-
- default:
- LBUG();
- rc = -EINVAL; /* no compiler warning please */
- break;
- }
-
- switch (rc) {
- case EP_SUCCESS: /* success */
- return (0);
-
- case EP_ENOMEM: /* can't allocate ep txd => queue for later */
- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-
- list_add_tail (&ktx->ktx_schedlist, &kqswnal_data.kqn_delayedtxds);
- wake_up (&kqswnal_data.kqn_sched_waitq);
-
- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
- return (0);
-
- default: /* fatal error */
- CDEBUG (D_NETERROR, "Tx to %s failed: %d\n", libcfs_nid2str(ktx->ktx_nid), rc);
- kqswnal_notify_peer_down(ktx);
- return (-EHOSTUNREACH);
- }
-}
-
-#if 0
-static char *
-hdr_type_string (lnet_hdr_t *hdr)
-{
- switch (hdr->type) {
- case LNET_MSG_ACK:
- return ("ACK");
- case LNET_MSG_PUT:
- return ("PUT");
- case LNET_MSG_GET:
- return ("GET");
- case LNET_MSG_REPLY:
- return ("REPLY");
- default:
- return ("<UNKNOWN>");
- }
-}
-
-static void
-kqswnal_cerror_hdr(lnet_hdr_t * hdr)
-{
- char *type_str = hdr_type_string (hdr);
-
- CERROR("P3 Header at %p of type %s length %d\n", hdr, type_str,
- le32_to_cpu(hdr->payload_length));
- CERROR(" From nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->src_nid),
- le32_to_cpu(hdr->src_pid));
- CERROR(" To nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->dest_nid),
- le32_to_cpu(hdr->dest_pid));
-
- switch (le32_to_cpu(hdr->type)) {
- case LNET_MSG_PUT:
- CERROR(" Ptl index %d, ack md "LPX64"."LPX64", "
- "match bits "LPX64"\n",
- le32_to_cpu(hdr->msg.put.ptl_index),
- hdr->msg.put.ack_wmd.wh_interface_cookie,
- hdr->msg.put.ack_wmd.wh_object_cookie,
- le64_to_cpu(hdr->msg.put.match_bits));
- CERROR(" offset %d, hdr data "LPX64"\n",
- le32_to_cpu(hdr->msg.put.offset),
- hdr->msg.put.hdr_data);
- break;
-
- case LNET_MSG_GET:
- CERROR(" Ptl index %d, return md "LPX64"."LPX64", "
- "match bits "LPX64"\n",
- le32_to_cpu(hdr->msg.get.ptl_index),
- hdr->msg.get.return_wmd.wh_interface_cookie,
- hdr->msg.get.return_wmd.wh_object_cookie,
- hdr->msg.get.match_bits);
- CERROR(" Length %d, src offset %d\n",
- le32_to_cpu(hdr->msg.get.sink_length),
- le32_to_cpu(hdr->msg.get.src_offset));
- break;
-
- case LNET_MSG_ACK:
- CERROR(" dst md "LPX64"."LPX64", manipulated length %d\n",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie,
- le32_to_cpu(hdr->msg.ack.mlength));
- break;
-
- case LNET_MSG_REPLY:
- CERROR(" dst md "LPX64"."LPX64"\n",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie);
- }
-
-} /* end of print_hdr() */
-#endif
-
-int
-kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag,
- int nrfrag, EP_NMD *rfrag)
-{
- int i;
-
- if (nlfrag != nrfrag) {
- CERROR("Can't cope with unequal # frags: %d local %d remote\n",
- nlfrag, nrfrag);
- return (-EINVAL);
- }
-
- for (i = 0; i < nlfrag; i++)
- if (lfrag[i].nmd_len != rfrag[i].nmd_len) {
- CERROR("Can't cope with unequal frags %d(%d):"
- " %d local %d remote\n",
- i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len);
- return (-EINVAL);
- }
-
- return (0);
-}
-
-kqswnal_remotemd_t *
-kqswnal_get_portalscompat_rmd (kqswnal_rx_t *krx)
-{
- /* Check that the RMD sent after the "raw" LNET header in a
- * portals-compatible QSWLND message is OK */
- char *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
- kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + sizeof(lnet_hdr_t));
-
- /* Note RDMA addresses are sent in native endian-ness in the "old"
- * portals protocol so no swabbing... */
-
- if (buffer + krx->krx_nob < (char *)(rmd + 1)) {
- /* msg too small to discover rmd size */
- CERROR ("Incoming message [%d] too small for RMD (%d needed)\n",
- krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer));
- return (NULL);
- }
-
- if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) {
- /* rmd doesn't fit in the incoming message */
- CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n",
- krx->krx_nob, rmd->kqrmd_nfrag,
- (int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer));
- return (NULL);
- }
-
- return (rmd);
-}
-
-void
-kqswnal_rdma_store_complete (EP_RXD *rxd)
-{
- int status = ep_rxd_status(rxd);
- kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-
- CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
- "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
-
- LASSERT (ktx->ktx_state == KTX_RDMA_STORE);
- LASSERT (krx->krx_rxd == rxd);
- LASSERT (krx->krx_rpc_reply_needed);
-
- krx->krx_rpc_reply_needed = 0;
- kqswnal_rx_decref (krx);
-
- /* free ktx & finalize() its lnet_msg_t */
- kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED);
-}
-
-void
-kqswnal_rdma_fetch_complete (EP_RXD *rxd)
-{
- /* Completed fetching the PUT/REPLY data */
- int status = ep_rxd_status(rxd);
- kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-
- CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
- "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
-
- LASSERT (ktx->ktx_state == KTX_RDMA_FETCH);
- LASSERT (krx->krx_rxd == rxd);
- /* RPC completes with failure by default */
- LASSERT (krx->krx_rpc_reply_needed);
- LASSERT (krx->krx_rpc_reply.msg.status != 0);
-
- if (status == EP_SUCCESS) {
- krx->krx_rpc_reply.msg.status = 0;
- status = 0;
- } else {
- /* Abandon RPC since get failed */
- krx->krx_rpc_reply_needed = 0;
- status = -ECONNABORTED;
- }
-
- /* krx gets decref'd in kqswnal_tx_done_in_thread_context() */
- LASSERT (krx->krx_state == KRX_PARSE);
- krx->krx_state = KRX_COMPLETING;
-
- /* free ktx & finalize() its lnet_msg_t */
- kqswnal_tx_done(ktx, status);
-}
-
-int
-kqswnal_rdma (kqswnal_rx_t *krx, lnet_msg_t *lntmsg,
- int type, kqswnal_remotemd_t *rmd,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int len)
-{
- kqswnal_tx_t *ktx;
- int eprc;
- int rc;
-
- /* Not both mapped and paged payload */
- LASSERT (iov == NULL || kiov == NULL);
- /* RPC completes with failure by default */
- LASSERT (krx->krx_rpc_reply_needed);
- LASSERT (krx->krx_rpc_reply.msg.status != 0);
-
- if (len == 0) {
- /* data got truncated to nothing. */
- lnet_finalize(kqswnal_data.kqn_ni, lntmsg, 0);
- /* Let kqswnal_rx_done() complete the RPC with success */
- krx->krx_rpc_reply.msg.status = 0;
- return (0);
- }
-
- /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not
- actually sending a portals message with it */
- ktx = kqswnal_get_idle_tx();
- if (ktx == NULL) {
- CERROR ("Can't get txd for RDMA with %s\n",
- libcfs_nid2str(kqswnal_rx_nid(krx)));
- return (-ENOMEM);
- }
-
- ktx->ktx_state = type;
- ktx->ktx_nid = kqswnal_rx_nid(krx);
- ktx->ktx_args[0] = krx;
- ktx->ktx_args[1] = lntmsg;
-
- LASSERT (atomic_read(&krx->krx_refcount) > 0);
- /* Take an extra ref for the completion callback */
- atomic_inc(&krx->krx_refcount);
-
- /* Map on the rail the RPC prefers */
- ktx->ktx_rail = ep_rcvr_prefrail(krx->krx_eprx,
- ep_rxd_railmask(krx->krx_rxd));
-
- /* Start mapping at offset 0 (we're not mapping any headers) */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
-
- if (kiov != NULL)
- rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov);
- else
- rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov);
-
- if (rc != 0) {
- CERROR ("Can't map local RDMA data: %d\n", rc);
- goto out;
- }
-
- rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags,
- rmd->kqrmd_nfrag, rmd->kqrmd_frag);
- if (rc != 0) {
- CERROR ("Incompatible RDMA descriptors\n");
- goto out;
- }
-
- switch (type) {
- default:
- LBUG();
-
- case KTX_RDMA_STORE:
- krx->krx_rpc_reply.msg.status = 0;
- krx->krx_rpc_reply.msg.magic = LNET_PROTO_QSW_MAGIC;
- krx->krx_rpc_reply.msg.version = QSWLND_PROTO_VERSION;
- krx->krx_rpc_reply.msg.u.get.len = len;
-#if KQSW_CKSUM
- krx->krx_rpc_reply.msg.u.get.cksum = (kiov != NULL) ?
- kqswnal_csum_kiov(~0, offset, len, niov, kiov) :
- kqswnal_csum_iov(~0, offset, len, niov, iov);
- if (*kqswnal_tunables.kqn_inject_csum_error == 4) {
- krx->krx_rpc_reply.msg.u.get.cksum++;
- *kqswnal_tunables.kqn_inject_csum_error = 0;
- }
-#endif
- eprc = ep_complete_rpc(krx->krx_rxd,
- kqswnal_rdma_store_complete, ktx,
- &krx->krx_rpc_reply.ep_statusblk,
- ktx->ktx_frags, rmd->kqrmd_frag,
- rmd->kqrmd_nfrag);
- if (eprc != EP_SUCCESS) {
- CERROR("can't complete RPC: %d\n", eprc);
- /* don't re-attempt RPC completion */
- krx->krx_rpc_reply_needed = 0;
- rc = -ECONNABORTED;
- }
- break;
-
- case KTX_RDMA_FETCH:
- eprc = ep_rpc_get (krx->krx_rxd,
- kqswnal_rdma_fetch_complete, ktx,
- rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag);
- if (eprc != EP_SUCCESS) {
- CERROR("ep_rpc_get failed: %d\n", eprc);
- /* Don't attempt RPC completion:
- * EKC nuked it when the get failed */
- krx->krx_rpc_reply_needed = 0;
- rc = -ECONNABORTED;
- }
- break;
- }
-
- out:
- if (rc != 0) {
- kqswnal_rx_decref(krx); /* drop callback's ref */
- kqswnal_put_idle_tx (ktx);
- }
-
- atomic_dec(&kqswnal_data.kqn_pending_txs);
- return (rc);
-}
-
-int
-kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- lnet_hdr_t *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- int nob;
- kqswnal_tx_t *ktx;
- int rc;
-
- /* NB 1. hdr is in network byte order */
- /* 2. 'private' depends on the message type */
-
- CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT (payload_nob == 0 || payload_niov > 0);
- LASSERT (payload_niov <= LNET_MAX_IOV);
-
- /* It must be OK to kmap() if required */
- LASSERT (payload_kiov == NULL || !in_interrupt ());
- /* payload is either all vaddrs or all pages */
- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
- if (kqswnal_nid2elanid (target.nid) < 0) {
- CERROR("%s not in my cluster\n", libcfs_nid2str(target.nid));
- return -EIO;
- }
-
- /* I may not block for a transmit descriptor if I might block the
- * router, receiver, or an interrupt handler. */
- ktx = kqswnal_get_idle_tx();
- if (ktx == NULL) {
- CERROR ("Can't get txd for msg type %d for %s\n",
- type, libcfs_nid2str(target.nid));
- return (-ENOMEM);
- }
-
- ktx->ktx_state = KTX_SENDING;
- ktx->ktx_nid = target.nid;
- ktx->ktx_args[0] = private;
- ktx->ktx_args[1] = lntmsg;
- ktx->ktx_args[2] = NULL; /* set when a GET commits to REPLY */
-
- /* The first frag will be the pre-mapped buffer. */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-
- if ((!target_is_router && /* target.nid is final dest */
- !routing && /* I'm the source */
- type == LNET_MSG_GET && /* optimize GET? */
- *kqswnal_tunables.kqn_optimized_gets != 0 &&
- lntmsg->msg_md->md_length >=
- *kqswnal_tunables.kqn_optimized_gets) ||
- ((type == LNET_MSG_PUT || /* optimize PUT? */
- type == LNET_MSG_REPLY) && /* optimize REPLY? */
- *kqswnal_tunables.kqn_optimized_puts != 0 &&
- payload_nob >= *kqswnal_tunables.kqn_optimized_puts)) {
- lnet_libmd_t *md = lntmsg->msg_md;
- kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer;
- lnet_hdr_t *mhdr;
- kqswnal_remotemd_t *rmd;
-
- /* Optimised path: I send over the Elan vaddrs of the local
- * buffers, and my peer DMAs directly to/from them.
- *
- * First I set up ktx as if it was going to send this
- * payload, (it needs to map it anyway). This fills
- * ktx_frags[1] and onward with the network addresses
- * of the buffer frags. */
-
- if (the_lnet.ln_ptlcompat == 2) {
- /* Strong portals compatibility: send "raw" LNET
- * header + rdma descriptor */
- mhdr = (lnet_hdr_t *)ktx->ktx_buffer;
- rmd = (kqswnal_remotemd_t *)(mhdr + 1);
- } else {
- /* Send an RDMA message */
- msg->kqm_magic = LNET_PROTO_QSW_MAGIC;
- msg->kqm_version = QSWLND_PROTO_VERSION;
- msg->kqm_type = QSWLND_MSG_RDMA;
-
- mhdr = &msg->kqm_u.rdma.kqrm_hdr;
- rmd = &msg->kqm_u.rdma.kqrm_rmd;
- }
-
- *mhdr = *hdr;
- nob = (((char *)rmd) - ktx->ktx_buffer);
-
- if (type == LNET_MSG_GET) {
- if ((md->md_options & LNET_MD_KIOV) != 0)
- rc = kqswnal_map_tx_kiov (ktx, 0, md->md_length,
- md->md_niov, md->md_iov.kiov);
- else
- rc = kqswnal_map_tx_iov (ktx, 0, md->md_length,
- md->md_niov, md->md_iov.iov);
- ktx->ktx_state = KTX_GETTING;
- } else {
- if (payload_kiov != NULL)
- rc = kqswnal_map_tx_kiov(ktx, 0, payload_nob,
- payload_niov, payload_kiov);
- else
- rc = kqswnal_map_tx_iov(ktx, 0, payload_nob,
- payload_niov, payload_iov);
- ktx->ktx_state = KTX_PUTTING;
- }
-
- if (rc != 0)
- goto out;
-
- rmd->kqrmd_nfrag = ktx->ktx_nfrag - 1;
- nob += offsetof(kqswnal_remotemd_t,
- kqrmd_frag[rmd->kqrmd_nfrag]);
- LASSERT (nob <= KQSW_TX_BUFFER_SIZE);
-
- memcpy(&rmd->kqrmd_frag[0], &ktx->ktx_frags[1],
- rmd->kqrmd_nfrag * sizeof(EP_NMD));
-
- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob);
-#if KQSW_CKSUM
- LASSERT (the_lnet.ln_ptlcompat != 2);
- msg->kqm_nob = nob + payload_nob;
- msg->kqm_cksum = 0;
- msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob);
-#endif
- if (type == LNET_MSG_GET) {
- /* Allocate reply message now while I'm in thread context */
- ktx->ktx_args[2] = lnet_create_reply_msg (
- kqswnal_data.kqn_ni, lntmsg);
- if (ktx->ktx_args[2] == NULL)
- goto out;
-
- /* NB finalizing the REPLY message is my
- * responsibility now, whatever happens. */
-#if KQSW_CKSUM
- if (*kqswnal_tunables.kqn_inject_csum_error == 3) {
- msg->kqm_cksum++;
- *kqswnal_tunables.kqn_inject_csum_error = 0;
- }
-
- } else if (payload_kiov != NULL) {
- /* must checksum payload after header so receiver can
- * compute partial header cksum before swab. Sadly
- * this causes 2 rounds of kmap */
- msg->kqm_cksum =
- kqswnal_csum_kiov(msg->kqm_cksum, 0, payload_nob,
- payload_niov, payload_kiov);
- if (*kqswnal_tunables.kqn_inject_csum_error == 2) {
- msg->kqm_cksum++;
- *kqswnal_tunables.kqn_inject_csum_error = 0;
- }
- } else {
- msg->kqm_cksum =
- kqswnal_csum_iov(msg->kqm_cksum, 0, payload_nob,
- payload_niov, payload_iov);
- if (*kqswnal_tunables.kqn_inject_csum_error == 2) {
- msg->kqm_cksum++;
- *kqswnal_tunables.kqn_inject_csum_error = 0;
- }
-#endif
- }
-
- } else if (payload_nob <= *kqswnal_tunables.kqn_tx_maxcontig) {
- lnet_hdr_t *mhdr;
- char *payload;
- kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer;
-
- /* small message: single frag copied into the pre-mapped buffer */
- if (the_lnet.ln_ptlcompat == 2) {
- /* Strong portals compatibility: send "raw" LNET header
- * + payload */
- mhdr = (lnet_hdr_t *)ktx->ktx_buffer;
- payload = (char *)(mhdr + 1);
- } else {
- /* Send an IMMEDIATE message */
- msg->kqm_magic = LNET_PROTO_QSW_MAGIC;
- msg->kqm_version = QSWLND_PROTO_VERSION;
- msg->kqm_type = QSWLND_MSG_IMMEDIATE;
-
- mhdr = &msg->kqm_u.immediate.kqim_hdr;
- payload = msg->kqm_u.immediate.kqim_payload;
- }
-
- *mhdr = *hdr;
- nob = (payload - ktx->ktx_buffer) + payload_nob;
-
- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob);
-
- if (payload_kiov != NULL)
- lnet_copy_kiov2flat(KQSW_TX_BUFFER_SIZE, payload, 0,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- else
- lnet_copy_iov2flat(KQSW_TX_BUFFER_SIZE, payload, 0,
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-#if KQSW_CKSUM
- LASSERT (the_lnet.ln_ptlcompat != 2);
- msg->kqm_nob = nob;
- msg->kqm_cksum = 0;
- msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob);
- if (*kqswnal_tunables.kqn_inject_csum_error == 1) {
- msg->kqm_cksum++;
- *kqswnal_tunables.kqn_inject_csum_error = 0;
- }
-#endif
- } else {
- lnet_hdr_t *mhdr;
- kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer;
-
- /* large message: multiple frags: first is hdr in pre-mapped buffer */
- if (the_lnet.ln_ptlcompat == 2) {
- /* Strong portals compatibility: send "raw" LNET header
- * + payload */
- mhdr = (lnet_hdr_t *)ktx->ktx_buffer;
- nob = sizeof(lnet_hdr_t);
- } else {
- /* Send an IMMEDIATE message */
- msg->kqm_magic = LNET_PROTO_QSW_MAGIC;
- msg->kqm_version = QSWLND_PROTO_VERSION;
- msg->kqm_type = QSWLND_MSG_IMMEDIATE;
-
- mhdr = &msg->kqm_u.immediate.kqim_hdr;
- nob = offsetof(kqswnal_msg_t,
- kqm_u.immediate.kqim_payload);
- }
-
- *mhdr = *hdr;
-
- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob);
-
- if (payload_kiov != NULL)
- rc = kqswnal_map_tx_kiov (ktx, payload_offset, payload_nob,
- payload_niov, payload_kiov);
- else
- rc = kqswnal_map_tx_iov (ktx, payload_offset, payload_nob,
- payload_niov, payload_iov);
- if (rc != 0)
- goto out;
-
-#if KQSW_CKSUM
- msg->kqm_nob = nob + payload_nob;
- msg->kqm_cksum = 0;
- msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob);
-
- msg->kqm_cksum = (payload_kiov != NULL) ?
- kqswnal_csum_kiov(msg->kqm_cksum,
- payload_offset, payload_nob,
- payload_niov, payload_kiov) :
- kqswnal_csum_iov(msg->kqm_cksum,
- payload_offset, payload_nob,
- payload_niov, payload_iov);
-
- if (*kqswnal_tunables.kqn_inject_csum_error == 1) {
- msg->kqm_cksum++;
- *kqswnal_tunables.kqn_inject_csum_error = 0;
- }
-#endif
- nob += payload_nob;
- }
-
- ktx->ktx_port = (nob <= KQSW_SMALLMSG) ?
- EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE;
-
- rc = kqswnal_launch (ktx);
-
- out:
- CDEBUG(rc == 0 ? D_NET : D_NETERROR, "%s %d bytes to %s%s: rc %d\n",
- routing ? (rc == 0 ? "Routed" : "Failed to route") :
- (rc == 0 ? "Sent" : "Failed to send"),
- nob, libcfs_nid2str(target.nid),
- target_is_router ? "(router)" : "", rc);
-
- if (rc != 0) {
- lnet_msg_t *repmsg = (lnet_msg_t *)ktx->ktx_args[2];
- int state = ktx->ktx_state;
-
- kqswnal_put_idle_tx (ktx);
-
- if (state == KTX_GETTING && repmsg != NULL) {
- /* We committed to reply, but there was a problem
- * launching the GET. We can't avoid delivering a
- * REPLY event since we committed above, so we
- * pretend the GET succeeded but the REPLY
- * failed. */
- rc = 0;
- lnet_finalize (kqswnal_data.kqn_ni, lntmsg, 0);
- lnet_finalize (kqswnal_data.kqn_ni, repmsg, -EIO);
- }
-
- }
-
- atomic_dec(&kqswnal_data.kqn_pending_txs);
- return (rc == 0 ? 0 : -EIO);
-}
-
-void
-kqswnal_requeue_rx (kqswnal_rx_t *krx)
-{
- LASSERT (atomic_read(&krx->krx_refcount) == 0);
- LASSERT (!krx->krx_rpc_reply_needed);
-
- krx->krx_state = KRX_POSTED;
-
- if (kqswnal_data.kqn_shuttingdown) {
- /* free EKC rxd on shutdown */
- ep_complete_receive(krx->krx_rxd);
- } else {
- /* repost receive */
- ep_requeue_receive(krx->krx_rxd,
- kqswnal_rxhandler, krx,
- &krx->krx_elanbuffer, 0);
- }
-}
-
-void
-kqswnal_rpc_complete (EP_RXD *rxd)
-{
- int status = ep_rxd_status(rxd);
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg(rxd);
-
- CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
- "rxd %p, krx %p, status %d\n", rxd, krx, status);
-
- LASSERT (krx->krx_rxd == rxd);
- LASSERT (krx->krx_rpc_reply_needed);
-
- krx->krx_rpc_reply_needed = 0;
- kqswnal_requeue_rx (krx);
-}
-
-void
-kqswnal_rx_done (kqswnal_rx_t *krx)
-{
- int rc;
-
- LASSERT (atomic_read(&krx->krx_refcount) == 0);
-
- if (krx->krx_rpc_reply_needed) {
- /* We've not completed the peer's RPC yet... */
- krx->krx_rpc_reply.msg.magic = LNET_PROTO_QSW_MAGIC;
- krx->krx_rpc_reply.msg.version = QSWLND_PROTO_VERSION;
-
- LASSERT (!in_interrupt());
-
- rc = ep_complete_rpc(krx->krx_rxd,
- kqswnal_rpc_complete, krx,
- &krx->krx_rpc_reply.ep_statusblk,
- NULL, NULL, 0);
- if (rc == EP_SUCCESS)
- return;
-
- CERROR("can't complete RPC: %d\n", rc);
- krx->krx_rpc_reply_needed = 0;
- }
-
- kqswnal_requeue_rx(krx);
-}
-
-void
-kqswnal_parse (kqswnal_rx_t *krx)
-{
- lnet_ni_t *ni = kqswnal_data.kqn_ni;
- kqswnal_msg_t *msg = (kqswnal_msg_t *)page_address(krx->krx_kiov[0].kiov_page);
- lnet_nid_t fromnid = kqswnal_rx_nid(krx);
- int swab;
- int n;
- int i;
- int nob;
- int rc;
-
- LASSERT (atomic_read(&krx->krx_refcount) == 1);
-
- /* If ln_ptlcompat is set, peers may send me an "old" unencapsulated
- * lnet hdr */
- LASSERT (offsetof(kqswnal_msg_t, kqm_u) <= sizeof(lnet_hdr_t));
-
- if (krx->krx_nob < offsetof(kqswnal_msg_t, kqm_u)) {
- CERROR("Short message %d received from %s\n",
- krx->krx_nob, libcfs_nid2str(fromnid));
- goto done;
- }
-
- swab = msg->kqm_magic == __swab32(LNET_PROTO_QSW_MAGIC);
-
- if (swab || msg->kqm_magic == LNET_PROTO_QSW_MAGIC) {
-#if KQSW_CKSUM
- __u32 csum0;
- __u32 csum1;
-
- /* csum byte array before swab */
- csum1 = msg->kqm_cksum;
- msg->kqm_cksum = 0;
- csum0 = kqswnal_csum_kiov(~0, 0, krx->krx_nob,
- krx->krx_npages, krx->krx_kiov);
- msg->kqm_cksum = csum1;
-#endif
-
- if (swab) {
- __swab16s(&msg->kqm_version);
- __swab16s(&msg->kqm_type);
-#if KQSW_CKSUM
- __swab32s(&msg->kqm_cksum);
- __swab32s(&msg->kqm_nob);
-#endif
- }
-
- if (msg->kqm_version != QSWLND_PROTO_VERSION) {
- /* Future protocol version compatibility support!
- * The next qswlnd-specific protocol rev will first
- * send an RPC to check version.
- * 1.4.6 and 1.4.7.early reply with a status
- * block containing its current version.
- * Later versions send a failure (-ve) status +
- * magic/version */
-
- if (!krx->krx_rpc_reply_needed) {
- CERROR("Unexpected version %d from %s\n",
- msg->kqm_version, libcfs_nid2str(fromnid));
- goto done;
- }
-
- LASSERT (krx->krx_rpc_reply.msg.status == -EPROTO);
- goto done;
- }
-
- switch (msg->kqm_type) {
- default:
- CERROR("Bad request type %x from %s\n",
- msg->kqm_type, libcfs_nid2str(fromnid));
- goto done;
-
- case QSWLND_MSG_IMMEDIATE:
- if (krx->krx_rpc_reply_needed) {
- /* Should have been a simple message */
- CERROR("IMMEDIATE sent as RPC from %s\n",
- libcfs_nid2str(fromnid));
- goto done;
- }
-
- nob = offsetof(kqswnal_msg_t, kqm_u.immediate.kqim_payload);
- if (krx->krx_nob < nob) {
- CERROR("Short IMMEDIATE %d(%d) from %s\n",
- krx->krx_nob, nob, libcfs_nid2str(fromnid));
- goto done;
- }
-
-#if KQSW_CKSUM
- if (csum0 != msg->kqm_cksum) {
- CERROR("Bad IMMEDIATE checksum %08x(%08x) from %s\n",
- csum0, msg->kqm_cksum, libcfs_nid2str(fromnid));
- CERROR("nob %d (%d)\n", krx->krx_nob, msg->kqm_nob);
- goto done;
- }
-#endif
- rc = lnet_parse(ni, &msg->kqm_u.immediate.kqim_hdr,
- fromnid, krx, 0);
- if (rc < 0)
- goto done;
- return;
-
- case QSWLND_MSG_RDMA:
- if (!krx->krx_rpc_reply_needed) {
- /* Should have been a simple message */
- CERROR("RDMA sent as simple message from %s\n",
- libcfs_nid2str(fromnid));
- goto done;
- }
-
- nob = offsetof(kqswnal_msg_t,
- kqm_u.rdma.kqrm_rmd.kqrmd_frag[0]);
- if (krx->krx_nob < nob) {
- CERROR("Short RDMA message %d(%d) from %s\n",
- krx->krx_nob, nob, libcfs_nid2str(fromnid));
- goto done;
- }
-
- if (swab)
- __swab32s(&msg->kqm_u.rdma.kqrm_rmd.kqrmd_nfrag);
-
- n = msg->kqm_u.rdma.kqrm_rmd.kqrmd_nfrag;
- nob = offsetof(kqswnal_msg_t,
- kqm_u.rdma.kqrm_rmd.kqrmd_frag[n]);
-
- if (krx->krx_nob < nob) {
- CERROR("short RDMA message %d(%d) from %s\n",
- krx->krx_nob, nob, libcfs_nid2str(fromnid));
- goto done;
- }
-
- if (swab) {
- for (i = 0; i < n; i++) {
- EP_NMD *nmd = &msg->kqm_u.rdma.kqrm_rmd.kqrmd_frag[i];
-
- __swab32s(&nmd->nmd_addr);
- __swab32s(&nmd->nmd_len);
- __swab32s(&nmd->nmd_attr);
- }
- }
-
-#if KQSW_CKSUM
- krx->krx_cksum = csum0; /* stash checksum so far */
-#endif
- rc = lnet_parse(ni, &msg->kqm_u.rdma.kqrm_hdr,
- fromnid, krx, 1);
- if (rc < 0)
- goto done;
- return;
- }
- /* Not Reached */
- }
-
- if (msg->kqm_magic == LNET_PROTO_MAGIC ||
- msg->kqm_magic == __swab32(LNET_PROTO_MAGIC)) {
- /* Future protocol version compatibility support!
- * When LNET unifies protocols over all LNDs, the first thing a
- * peer will send will be a version query RPC.
- * 1.4.6 and 1.4.7.early reply with a status block containing
- * LNET_PROTO_QSW_MAGIC..
- * Later versions send a failure (-ve) status +
- * magic/version */
-
- if (!krx->krx_rpc_reply_needed) {
- CERROR("Unexpected magic %08x from %s\n",
- msg->kqm_magic, libcfs_nid2str(fromnid));
- goto done;
- }
-
- LASSERT (krx->krx_rpc_reply.msg.status == -EPROTO);
- goto done;
- }
-
- if (the_lnet.ln_ptlcompat != 0) {
- /* Portals compatibility (strong or weak)
- * This could be an unencapsulated LNET header. If it's big
- * enough, let LNET's parser sort it out */
-
- if (krx->krx_nob < sizeof(lnet_hdr_t)) {
- CERROR("Short portals-compatible message from %s\n",
- libcfs_nid2str(fromnid));
- goto done;
- }
-
- krx->krx_raw_lnet_hdr = 1;
- rc = lnet_parse(ni, (lnet_hdr_t *)msg,
- fromnid, krx, krx->krx_rpc_reply_needed);
- if (rc < 0)
- goto done;
- return;
- }
-
- CERROR("Unrecognised magic %08x from %s\n",
- msg->kqm_magic, libcfs_nid2str(fromnid));
- done:
- kqswnal_rx_decref(krx);
-}
-
-/* Receive Interrupt Handler: posts to schedulers */
-void
-kqswnal_rxhandler(EP_RXD *rxd)
-{
- unsigned long flags;
- int nob = ep_rxd_len (rxd);
- int status = ep_rxd_status (rxd);
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg (rxd);
- CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n",
- rxd, krx, nob, status);
-
- LASSERT (krx != NULL);
- LASSERT (krx->krx_state == KRX_POSTED);
-
- krx->krx_state = KRX_PARSE;
- krx->krx_rxd = rxd;
- krx->krx_nob = nob;
- krx->krx_raw_lnet_hdr = 0;
-
- /* RPC reply iff rpc request received without error */
- krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd) &&
- (status == EP_SUCCESS ||
- status == EP_MSG_TOO_BIG);
-
- /* Default to failure if an RPC reply is requested but not handled */
- krx->krx_rpc_reply.msg.status = -EPROTO;
- atomic_set (&krx->krx_refcount, 1);
-
- if (status != EP_SUCCESS) {
- /* receives complete with failure when receiver is removed */
- if (status == EP_SHUTDOWN)
- LASSERT (kqswnal_data.kqn_shuttingdown);
- else
- CERROR("receive status failed with status %d nob %d\n",
- ep_rxd_status(rxd), nob);
- kqswnal_rx_decref(krx);
- return;
- }
-
- if (!in_interrupt()) {
- kqswnal_parse(krx);
- return;
- }
-
- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-
- list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
- wake_up (&kqswnal_data.kqn_sched_waitq);
-
- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
-}
-
-int
-kqswnal_recv (lnet_ni_t *ni,
- void *private,
- lnet_msg_t *lntmsg,
- int delayed,
- unsigned int niov,
- struct iovec *iov,
- lnet_kiov_t *kiov,
- unsigned int offset,
- unsigned int mlen,
- unsigned int rlen)
-{
- kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
- lnet_nid_t fromnid;
- kqswnal_msg_t *msg;
- lnet_hdr_t *hdr;
- kqswnal_remotemd_t *rmd;
- int msg_offset;
- int rc;
-
- LASSERT (!in_interrupt ()); /* OK to map */
- /* Either all pages or all vaddrs */
- LASSERT (!(kiov != NULL && iov != NULL));
-
- fromnid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ep_rxd_node(krx->krx_rxd));
- msg = (kqswnal_msg_t *)page_address(krx->krx_kiov[0].kiov_page);
-
- if (krx->krx_rpc_reply_needed) {
- /* optimized (rdma) request sent as RPC */
-
- if (krx->krx_raw_lnet_hdr) {
- LASSERT (the_lnet.ln_ptlcompat != 0);
- hdr = (lnet_hdr_t *)msg;
- rmd = kqswnal_get_portalscompat_rmd(krx);
- if (rmd == NULL)
- return (-EPROTO);
- } else {
- LASSERT (msg->kqm_type == QSWLND_MSG_RDMA);
- hdr = &msg->kqm_u.rdma.kqrm_hdr;
- rmd = &msg->kqm_u.rdma.kqrm_rmd;
- }
-
- /* NB header is still in wire byte order */
-
- switch (le32_to_cpu(hdr->type)) {
- case LNET_MSG_PUT:
- case LNET_MSG_REPLY:
- /* This is an optimized PUT/REPLY */
- rc = kqswnal_rdma(krx, lntmsg,
- KTX_RDMA_FETCH, rmd,
- niov, iov, kiov, offset, mlen);
- break;
-
- case LNET_MSG_GET:
-#if KQSW_CKSUM
- if (krx->krx_cksum != msg->kqm_cksum) {
- CERROR("Bad GET checksum %08x(%08x) from %s\n",
- krx->krx_cksum, msg->kqm_cksum,
- libcfs_nid2str(fromnid));
- rc = -EIO;
- break;
- }
-#endif
- if (lntmsg == NULL) {
- /* No buffer match: my decref will
- * complete the RPC with failure */
- rc = 0;
- } else {
- /* Matched something! */
- rc = kqswnal_rdma(krx, lntmsg,
- KTX_RDMA_STORE, rmd,
- lntmsg->msg_niov,
- lntmsg->msg_iov,
- lntmsg->msg_kiov,
- lntmsg->msg_offset,
- lntmsg->msg_len);
- }
- break;
-
- default:
- CERROR("Bad RPC type %d\n",
- le32_to_cpu(hdr->type));
- rc = -EPROTO;
- break;
- }
-
- kqswnal_rx_decref(krx);
- return rc;
- }
-
- if (krx->krx_raw_lnet_hdr) {
- LASSERT (the_lnet.ln_ptlcompat != 0);
- msg_offset = sizeof(lnet_hdr_t);
- } else {
- LASSERT (msg->kqm_type == QSWLND_MSG_IMMEDIATE);
- msg_offset = offsetof(kqswnal_msg_t, kqm_u.immediate.kqim_payload);
- }
-
- if (krx->krx_nob < msg_offset + rlen) {
- CERROR("Bad message size from %s: have %d, need %d + %d\n",
- libcfs_nid2str(fromnid), krx->krx_nob,
- msg_offset, rlen);
- kqswnal_rx_decref(krx);
- return -EPROTO;
- }
-
- if (kiov != NULL)
- lnet_copy_kiov2kiov(niov, kiov, offset,
- krx->krx_npages, krx->krx_kiov,
- msg_offset, mlen);
- else
- lnet_copy_kiov2iov(niov, iov, offset,
- krx->krx_npages, krx->krx_kiov,
- msg_offset, mlen);
-
- lnet_finalize(ni, lntmsg, 0);
- kqswnal_rx_decref(krx);
- return 0;
-}
-
-int
-kqswnal_thread_start (int (*fn)(void *arg), void *arg)
-{
- long pid = kernel_thread (fn, arg, 0);
-
- if (pid < 0)
- return ((int)pid);
-
- atomic_inc (&kqswnal_data.kqn_nthreads);
- return (0);
-}
-
-void
-kqswnal_thread_fini (void)
-{
- atomic_dec (&kqswnal_data.kqn_nthreads);
-}
-
-int
-kqswnal_scheduler (void *arg)
-{
- kqswnal_rx_t *krx;
- kqswnal_tx_t *ktx;
- unsigned long flags;
- int rc;
- int counter = 0;
- int did_something;
-
- cfs_daemonize ("kqswnal_sched");
- cfs_block_allsigs ();
-
- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-
- for (;;)
- {
- did_something = 0;
-
- if (!list_empty (&kqswnal_data.kqn_readyrxds))
- {
- krx = list_entry(kqswnal_data.kqn_readyrxds.next,
- kqswnal_rx_t, krx_list);
- list_del (&krx->krx_list);
- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
- flags);
-
- LASSERT (krx->krx_state == KRX_PARSE);
- kqswnal_parse (krx);
-
- did_something = 1;
- spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
- }
-
- if (!list_empty (&kqswnal_data.kqn_donetxds))
- {
- ktx = list_entry(kqswnal_data.kqn_donetxds.next,
- kqswnal_tx_t, ktx_schedlist);
- list_del_init (&ktx->ktx_schedlist);
- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
- flags);
-
- kqswnal_tx_done_in_thread_context(ktx);
-
- did_something = 1;
- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
- }
-
- if (!list_empty (&kqswnal_data.kqn_delayedtxds))
- {
- ktx = list_entry(kqswnal_data.kqn_delayedtxds.next,
- kqswnal_tx_t, ktx_schedlist);
- list_del_init (&ktx->ktx_schedlist);
- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
- flags);
-
- rc = kqswnal_launch (ktx);
- if (rc != 0) {
- CERROR("Failed delayed transmit to %s: %d\n",
- libcfs_nid2str(ktx->ktx_nid), rc);
- kqswnal_tx_done (ktx, rc);
- }
- atomic_dec (&kqswnal_data.kqn_pending_txs);
-
- did_something = 1;
- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
- }
-
- /* nothing to do or hogging CPU */
- if (!did_something || counter++ == KQSW_RESCHED) {
- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
- flags);
-
- counter = 0;
-
- if (!did_something) {
- if (kqswnal_data.kqn_shuttingdown == 2) {
- /* We only exit in stage 2 of shutdown when
- * there's nothing left to do */
- break;
- }
- rc = wait_event_interruptible_exclusive (
- kqswnal_data.kqn_sched_waitq,
- kqswnal_data.kqn_shuttingdown == 2 ||
- !list_empty(&kqswnal_data.kqn_readyrxds) ||
- !list_empty(&kqswnal_data.kqn_donetxds) ||
- !list_empty(&kqswnal_data.kqn_delayedtxds));
- LASSERT (rc == 0);
- } else if (need_resched())
- schedule ();
-
- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
- }
- }
-
- kqswnal_thread_fini ();
- return (0);
-}
+++ /dev/null
-/*
- * Copyright (C) 2002-2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.lustre.org
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "qswlnd.h"
-
-static int tx_maxcontig = (1<<10);
-CFS_MODULE_PARM(tx_maxcontig, "i", int, 0444,
- "maximum payload to de-fragment");
-
-static int ntxmsgs = 512;
-CFS_MODULE_PARM(ntxmsgs, "i", int, 0444,
- "# tx msg buffers");
-
-static int credits = 128;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "# concurrent sends");
-
-static int peer_credits = 8;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
- "# per-peer concurrent sends");
-
-static int nrxmsgs_large = 64;
-CFS_MODULE_PARM(nrxmsgs_large, "i", int, 0444,
- "# 'large' rx msg buffers");
-
-static int ep_envelopes_large = 256;
-CFS_MODULE_PARM(ep_envelopes_large, "i", int, 0444,
- "# 'large' rx msg envelope buffers");
-
-static int nrxmsgs_small = 256;
-CFS_MODULE_PARM(nrxmsgs_small, "i", int, 0444,
- "# 'small' rx msg buffers");
-
-static int ep_envelopes_small = 2048;
-CFS_MODULE_PARM(ep_envelopes_small, "i", int, 0444,
- "# 'small' rx msg envelope buffers");
-
-static int optimized_puts = (32<<10);
-CFS_MODULE_PARM(optimized_puts, "i", int, 0644,
- "zero-copy puts >= this size");
-
-static int optimized_gets = 2048;
-CFS_MODULE_PARM(optimized_gets, "i", int, 0644,
- "zero-copy gets >= this size");
-
-#if KQSW_CKSUM
-static int inject_csum_error = 0;
-CFS_MODULE_PARM(inject_csum_error, "i", int, 0644,
- "test checksumming");
-#endif
-
-kqswnal_tunables_t kqswnal_tunables = {
- .kqn_tx_maxcontig = &tx_maxcontig,
- .kqn_ntxmsgs = &ntxmsgs,
- .kqn_credits = &credits,
- .kqn_peercredits = &peer_credits,
- .kqn_nrxmsgs_large = &nrxmsgs_large,
- .kqn_ep_envelopes_large = &ep_envelopes_large,
- .kqn_nrxmsgs_small = &nrxmsgs_small,
- .kqn_ep_envelopes_small = &ep_envelopes_small,
- .kqn_optimized_puts = &optimized_puts,
- .kqn_optimized_gets = &optimized_gets,
-#if KQSW_CKSUM
- .kqn_inject_csum_error = &inject_csum_error,
-#endif
-};
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-static cfs_sysctl_table_t kqswnal_ctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "tx_maxcontig",
- .data = &tx_maxcontig,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 2,
- .procname = "ntxmsgs",
- .data = &ntxmsgs,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 3,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 4,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 5,
- .procname = "nrxmsgs_large",
- .data = &nrxmsgs_large,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 6,
- .procname = "ep_envelopes_large",
- .data = &ep_envelopes_large,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 7,
- .procname = "nrxmsgs_small",
- .data = &nrxmsgs_small,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 8,
- .procname = "ep_envelopes_small",
- .data = &ep_envelopes_small,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 9,
- .procname = "optimized_puts",
- .data = &optimized_puts,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 10,
- .procname = "optimized_gets",
- .data = &optimized_gets,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
-#if KQSW_CKSUM
- {
- .ctl_name = 11,
- .procname = "inject_csum_error",
- .data = &inject_csum_error,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
-#endif
- {0}
-};
-
-static cfs_sysctl_table_t kqswnal_top_ctl_table[] = {
- {
- .ctl_name = 201,
- .procname = "qswnal",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = kqswnal_ctl_table
- },
- {0}
-};
-
-int
-kqswnal_tunables_init ()
-{
- kqswnal_tunables.kqn_sysctl =
- cfs_register_sysctl_table(kqswnal_top_ctl_table, 0);
-
- if (kqswnal_tunables.kqn_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-
- return 0;
-}
-
-void
-kqswnal_tunables_fini ()
-{
- if (kqswnal_tunables.kqn_sysctl != NULL)
- cfs_unregister_sysctl_table(kqswnal_tunables.kqn_sysctl);
-}
-#else
-int
-kqswnal_tunables_init ()
-{
- return 0;
-}
-
-void
-kqswnal_tunables_fini ()
-{
-}
-#endif
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
+++ /dev/null
-MODULES := kralnd
-kralnd-objs := ralnd.o ralnd_cb.o ralnd_modparams.o
-
-EXTRA_POST_CFLAGS := @RACPPFLAGS@
-
-@INCLUDE_RULES@
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if BUILD_RALND
-modulenet_DATA = kralnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kralnd-objs:%.o=%.c) ralnd.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-#include "ralnd.h"
-
-static int kranal_devids[RANAL_MAXDEVS] = {RAPK_MAIN_DEVICE_ID,
- RAPK_EXPANSION_DEVICE_ID};
-
-lnd_t the_kralnd = {
- .lnd_type = RALND,
- .lnd_startup = kranal_startup,
- .lnd_shutdown = kranal_shutdown,
- .lnd_ctl = kranal_ctl,
- .lnd_send = kranal_send,
- .lnd_recv = kranal_recv,
- .lnd_eager_recv = kranal_eager_recv,
- .lnd_accept = kranal_accept,
-};
-
-kra_data_t kranal_data;
-
-void
-kranal_pack_connreq(kra_connreq_t *connreq, kra_conn_t *conn, lnet_nid_t dstnid)
-{
- RAP_RETURN rrc;
-
- memset(connreq, 0, sizeof(*connreq));
-
- connreq->racr_magic = RANAL_MSG_MAGIC;
- connreq->racr_version = RANAL_MSG_VERSION;
-
- if (conn == NULL) /* prepping a "stub" reply */
- return;
-
- connreq->racr_devid = conn->rac_device->rad_id;
- connreq->racr_srcnid = lnet_ptlcompat_srcnid(kranal_data.kra_ni->ni_nid,
- dstnid);
- connreq->racr_dstnid = dstnid;
- connreq->racr_peerstamp = kranal_data.kra_peerstamp;
- connreq->racr_connstamp = conn->rac_my_connstamp;
- connreq->racr_timeout = conn->rac_timeout;
-
- rrc = RapkGetRiParams(conn->rac_rihandle, &connreq->racr_riparams);
- LASSERT(rrc == RAP_SUCCESS);
-}
-
-int
-kranal_recv_connreq(struct socket *sock, kra_connreq_t *connreq, int active)
-{
- int timeout = active ? *kranal_tunables.kra_timeout :
- lnet_acceptor_timeout();
- int swab;
- int rc;
-
- /* return 0 on success, -ve on error, +ve to tell the peer I'm "old" */
-
- rc = libcfs_sock_read(sock, &connreq->racr_magic,
- sizeof(connreq->racr_magic), timeout);
- if (rc != 0) {
- CERROR("Read(magic) failed(1): %d\n", rc);
- return -EIO;
- }
-
- if (connreq->racr_magic != RANAL_MSG_MAGIC &&
- connreq->racr_magic != __swab32(RANAL_MSG_MAGIC)) {
- /* Unexpected magic! */
- if (!active &&
- the_lnet.ln_ptlcompat == 0 &&
- (connreq->racr_magic == LNET_PROTO_MAGIC ||
- connreq->racr_magic == __swab32(LNET_PROTO_MAGIC))) {
- /* future protocol version compatibility!
- * When LNET unifies protocols over all LNDs, the first
- * thing sent will be a version query. +ve rc means I
- * reply with my current magic/version */
- return EPROTO;
- }
-
- if (active ||
- the_lnet.ln_ptlcompat == 0) {
- CERROR("Unexpected magic %08x (1)\n",
- connreq->racr_magic);
- return -EPROTO;
- }
-
- /* When portals compatibility is set, I may be passed a new
- * connection "blindly" by the acceptor, and I have to
- * determine if my peer has sent an acceptor connection request
- * or not. This isn't a connreq, so I'll get the acceptor to
- * look at it... */
- rc = lnet_accept(kranal_data.kra_ni, sock, connreq->racr_magic);
- if (rc != 0)
- return -EPROTO;
-
- /* ...and if it's OK I'm back to looking for a connreq... */
- rc = libcfs_sock_read(sock, &connreq->racr_magic,
- sizeof(connreq->racr_magic), timeout);
- if (rc != 0) {
- CERROR("Read(magic) failed(2): %d\n", rc);
- return -EIO;
- }
-
- if (connreq->racr_magic != RANAL_MSG_MAGIC &&
- connreq->racr_magic != __swab32(RANAL_MSG_MAGIC)) {
- CERROR("Unexpected magic %08x(2)\n",
- connreq->racr_magic);
- return -EPROTO;
- }
- }
-
- swab = (connreq->racr_magic == __swab32(RANAL_MSG_MAGIC));
-
- rc = libcfs_sock_read(sock, &connreq->racr_version,
- sizeof(connreq->racr_version), timeout);
- if (rc != 0) {
- CERROR("Read(version) failed: %d\n", rc);
- return -EIO;
- }
-
- if (swab)
- __swab16s(&connreq->racr_version);
-
- if (connreq->racr_version != RANAL_MSG_VERSION) {
- if (active) {
- CERROR("Unexpected version %d\n", connreq->racr_version);
- return -EPROTO;
- }
- /* If this is a future version of the ralnd protocol, and I'm
- * passive (accepted the connection), tell my peer I'm "old"
- * (+ve rc) */
- return EPROTO;
- }
-
- rc = libcfs_sock_read(sock, &connreq->racr_devid,
- sizeof(connreq->racr_version) -
- offsetof(kra_connreq_t, racr_devid),
- timeout);
- if (rc != 0) {
- CERROR("Read(body) failed: %d\n", rc);
- return -EIO;
- }
-
- if (swab) {
- __swab32s(&connreq->racr_magic);
- __swab16s(&connreq->racr_version);
- __swab16s(&connreq->racr_devid);
- __swab64s(&connreq->racr_srcnid);
- __swab64s(&connreq->racr_dstnid);
- __swab64s(&connreq->racr_peerstamp);
- __swab64s(&connreq->racr_connstamp);
- __swab32s(&connreq->racr_timeout);
-
- __swab32s(&connreq->racr_riparams.HostId);
- __swab32s(&connreq->racr_riparams.FmaDomainHndl);
- __swab32s(&connreq->racr_riparams.PTag);
- __swab32s(&connreq->racr_riparams.CompletionCookie);
- }
-
- if (connreq->racr_srcnid == LNET_NID_ANY ||
- connreq->racr_dstnid == LNET_NID_ANY) {
- CERROR("Received LNET_NID_ANY\n");
- return -EPROTO;
- }
-
- if (connreq->racr_timeout < RANAL_MIN_TIMEOUT) {
- CERROR("Received timeout %d < MIN %d\n",
- connreq->racr_timeout, RANAL_MIN_TIMEOUT);
- return -EPROTO;
- }
-
- return 0;
-}
-
-int
-kranal_close_stale_conns_locked (kra_peer_t *peer, kra_conn_t *newconn)
-{
- kra_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int loopback;
- int count = 0;
-
- loopback = peer->rap_nid == kranal_data.kra_ni->ni_nid;
-
- list_for_each_safe (ctmp, cnxt, &peer->rap_conns) {
- conn = list_entry(ctmp, kra_conn_t, rac_list);
-
- if (conn == newconn)
- continue;
-
- if (conn->rac_peerstamp != newconn->rac_peerstamp) {
- CDEBUG(D_NET, "Closing stale conn nid: %s "
- " peerstamp:"LPX64"("LPX64")\n",
- libcfs_nid2str(peer->rap_nid),
- conn->rac_peerstamp, newconn->rac_peerstamp);
- LASSERT (conn->rac_peerstamp < newconn->rac_peerstamp);
- count++;
- kranal_close_conn_locked(conn, -ESTALE);
- continue;
- }
-
- if (conn->rac_device != newconn->rac_device)
- continue;
-
- if (loopback &&
- newconn->rac_my_connstamp == conn->rac_peer_connstamp &&
- newconn->rac_peer_connstamp == conn->rac_my_connstamp)
- continue;
-
- LASSERT (conn->rac_peer_connstamp < newconn->rac_peer_connstamp);
-
- CDEBUG(D_NET, "Closing stale conn nid: %s"
- " connstamp:"LPX64"("LPX64")\n",
- libcfs_nid2str(peer->rap_nid),
- conn->rac_peer_connstamp, newconn->rac_peer_connstamp);
-
- count++;
- kranal_close_conn_locked(conn, -ESTALE);
- }
-
- return count;
-}
-
-int
-kranal_conn_isdup_locked(kra_peer_t *peer, kra_conn_t *newconn)
-{
- kra_conn_t *conn;
- struct list_head *tmp;
- int loopback;
-
- loopback = peer->rap_nid == kranal_data.kra_ni->ni_nid;
-
- list_for_each(tmp, &peer->rap_conns) {
- conn = list_entry(tmp, kra_conn_t, rac_list);
-
- /* 'newconn' is from an earlier version of 'peer'!!! */
- if (newconn->rac_peerstamp < conn->rac_peerstamp)
- return 1;
-
- /* 'conn' is from an earlier version of 'peer': it will be
- * removed when we cull stale conns later on... */
- if (newconn->rac_peerstamp > conn->rac_peerstamp)
- continue;
-
- /* Different devices are OK */
- if (conn->rac_device != newconn->rac_device)
- continue;
-
- /* It's me connecting to myself */
- if (loopback &&
- newconn->rac_my_connstamp == conn->rac_peer_connstamp &&
- newconn->rac_peer_connstamp == conn->rac_my_connstamp)
- continue;
-
- /* 'newconn' is an earlier connection from 'peer'!!! */
- if (newconn->rac_peer_connstamp < conn->rac_peer_connstamp)
- return 2;
-
- /* 'conn' is an earlier connection from 'peer': it will be
- * removed when we cull stale conns later on... */
- if (newconn->rac_peer_connstamp > conn->rac_peer_connstamp)
- continue;
-
- /* 'newconn' has the SAME connection stamp; 'peer' isn't
- * playing the game... */
- return 3;
- }
-
- return 0;
-}
-
-void
-kranal_set_conn_uniqueness (kra_conn_t *conn)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- conn->rac_my_connstamp = kranal_data.kra_connstamp++;
-
- do { /* allocate a unique cqid */
- conn->rac_cqid = kranal_data.kra_next_cqid++;
- } while (kranal_cqid2conn_locked(conn->rac_cqid) != NULL);
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-}
-
-int
-kranal_create_conn(kra_conn_t **connp, kra_device_t *dev)
-{
- kra_conn_t *conn;
- RAP_RETURN rrc;
-
- LASSERT (!in_interrupt());
- LIBCFS_ALLOC(conn, sizeof(*conn));
-
- if (conn == NULL)
- return -ENOMEM;
-
- memset(conn, 0, sizeof(*conn));
- atomic_set(&conn->rac_refcount, 1);
- INIT_LIST_HEAD(&conn->rac_list);
- INIT_LIST_HEAD(&conn->rac_hashlist);
- INIT_LIST_HEAD(&conn->rac_schedlist);
- INIT_LIST_HEAD(&conn->rac_fmaq);
- INIT_LIST_HEAD(&conn->rac_rdmaq);
- INIT_LIST_HEAD(&conn->rac_replyq);
- spin_lock_init(&conn->rac_lock);
-
- kranal_set_conn_uniqueness(conn);
-
- conn->rac_device = dev;
- conn->rac_timeout = MAX(*kranal_tunables.kra_timeout, RANAL_MIN_TIMEOUT);
- kranal_update_reaper_timeout(conn->rac_timeout);
-
- rrc = RapkCreateRi(dev->rad_handle, conn->rac_cqid,
- &conn->rac_rihandle);
- if (rrc != RAP_SUCCESS) {
- CERROR("RapkCreateRi failed: %d\n", rrc);
- LIBCFS_FREE(conn, sizeof(*conn));
- return -ENETDOWN;
- }
-
- atomic_inc(&kranal_data.kra_nconns);
- *connp = conn;
- return 0;
-}
-
-void
-kranal_destroy_conn(kra_conn_t *conn)
-{
- RAP_RETURN rrc;
-
- LASSERT (!in_interrupt());
- LASSERT (!conn->rac_scheduled);
- LASSERT (list_empty(&conn->rac_list));
- LASSERT (list_empty(&conn->rac_hashlist));
- LASSERT (list_empty(&conn->rac_schedlist));
- LASSERT (atomic_read(&conn->rac_refcount) == 0);
- LASSERT (list_empty(&conn->rac_fmaq));
- LASSERT (list_empty(&conn->rac_rdmaq));
- LASSERT (list_empty(&conn->rac_replyq));
-
- rrc = RapkDestroyRi(conn->rac_device->rad_handle,
- conn->rac_rihandle);
- LASSERT (rrc == RAP_SUCCESS);
-
- if (conn->rac_peer != NULL)
- kranal_peer_decref(conn->rac_peer);
-
- LIBCFS_FREE(conn, sizeof(*conn));
- atomic_dec(&kranal_data.kra_nconns);
-}
-
-void
-kranal_terminate_conn_locked (kra_conn_t *conn)
-{
- LASSERT (!in_interrupt());
- LASSERT (conn->rac_state == RANAL_CONN_CLOSING);
- LASSERT (!list_empty(&conn->rac_hashlist));
- LASSERT (list_empty(&conn->rac_list));
-
- /* Remove from conn hash table: no new callbacks */
- list_del_init(&conn->rac_hashlist);
- kranal_conn_decref(conn);
-
- conn->rac_state = RANAL_CONN_CLOSED;
-
- /* schedule to clear out all uncompleted comms in context of dev's
- * scheduler */
- kranal_schedule_conn(conn);
-}
-
-void
-kranal_close_conn_locked (kra_conn_t *conn, int error)
-{
- kra_peer_t *peer = conn->rac_peer;
-
- CDEBUG(error == 0 ? D_NET : D_NETERROR,
- "closing conn to %s: error %d\n",
- libcfs_nid2str(peer->rap_nid), error);
-
- LASSERT (!in_interrupt());
- LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED);
- LASSERT (!list_empty(&conn->rac_hashlist));
- LASSERT (!list_empty(&conn->rac_list));
-
- list_del_init(&conn->rac_list);
-
- if (list_empty(&peer->rap_conns) &&
- peer->rap_persistence == 0) {
- /* Non-persistent peer with no more conns... */
- kranal_unlink_peer_locked(peer);
- }
-
- /* Reset RX timeout to ensure we wait for an incoming CLOSE for the
- * full timeout. If we get a CLOSE we know the peer has stopped all
- * RDMA. Otherwise if we wait for the full timeout we can also be sure
- * all RDMA has stopped. */
- conn->rac_last_rx = jiffies;
- mb();
-
- conn->rac_state = RANAL_CONN_CLOSING;
- kranal_schedule_conn(conn); /* schedule sending CLOSE */
-
- kranal_conn_decref(conn); /* lose peer's ref */
-}
-
-void
-kranal_close_conn (kra_conn_t *conn, int error)
-{
- unsigned long flags;
-
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- if (conn->rac_state == RANAL_CONN_ESTABLISHED)
- kranal_close_conn_locked(conn, error);
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-}
-
-int
-kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq,
- __u32 peer_ip, int peer_port)
-{
- kra_device_t *dev = conn->rac_device;
- unsigned long flags;
- RAP_RETURN rrc;
-
- /* CAVEAT EMPTOR: we're really overloading rac_last_tx + rac_keepalive
- * to do RapkCompleteSync() timekeeping (see kibnal_scheduler). */
- conn->rac_last_tx = jiffies;
- conn->rac_keepalive = 0;
-
- rrc = RapkSetRiParams(conn->rac_rihandle, &connreq->racr_riparams);
- if (rrc != RAP_SUCCESS) {
- CERROR("Error setting riparams from %u.%u.%u.%u/%d: %d\n",
- HIPQUAD(peer_ip), peer_port, rrc);
- return -ECONNABORTED;
- }
-
- /* Schedule conn on rad_new_conns */
- kranal_conn_addref(conn);
- spin_lock_irqsave(&dev->rad_lock, flags);
- list_add_tail(&conn->rac_schedlist, &dev->rad_new_conns);
- wake_up(&dev->rad_waitq);
- spin_unlock_irqrestore(&dev->rad_lock, flags);
-
- rrc = RapkWaitToConnect(conn->rac_rihandle);
- if (rrc != RAP_SUCCESS) {
- CERROR("Error waiting to connect to %u.%u.%u.%u/%d: %d\n",
- HIPQUAD(peer_ip), peer_port, rrc);
- return -ECONNABORTED;
- }
-
- /* Scheduler doesn't touch conn apart from to deschedule and decref it
- * after RapkCompleteSync() return success, so conn is all mine */
-
- conn->rac_peerstamp = connreq->racr_peerstamp;
- conn->rac_peer_connstamp = connreq->racr_connstamp;
- conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq->racr_timeout);
- kranal_update_reaper_timeout(conn->rac_keepalive);
- return 0;
-}
-
-int
-kranal_passive_conn_handshake (struct socket *sock, lnet_nid_t *src_nidp,
- lnet_nid_t *dst_nidp, kra_conn_t **connp)
-{
- __u32 peer_ip;
- unsigned int peer_port;
- kra_connreq_t rx_connreq;
- kra_connreq_t tx_connreq;
- kra_conn_t *conn;
- kra_device_t *dev;
- int rc;
- int i;
-
- rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- if (rc != 0) {
- CERROR("Can't get peer's IP: %d\n", rc);
- return rc;
- }
-
- rc = kranal_recv_connreq(sock, &rx_connreq, 0);
-
- if (rc < 0) {
- CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n",
- HIPQUAD(peer_ip), peer_port, rc);
- return rc;
- }
-
- if (rc > 0) {
- /* Request from "new" peer: send reply with my MAGIC/VERSION to
- * tell her I'm old... */
- kranal_pack_connreq(&tx_connreq, NULL, LNET_NID_ANY);
-
- rc = libcfs_sock_write(sock, &tx_connreq, sizeof(tx_connreq),
- lnet_acceptor_timeout());
- if (rc != 0)
- CERROR("Can't tx stub connreq to %u.%u.%u.%u/%d: %d\n",
- HIPQUAD(peer_ip), peer_port, rc);
-
- return -EPROTO;
- }
-
- for (i = 0;;i++) {
- if (i == kranal_data.kra_ndevs) {
- CERROR("Can't match dev %d from %u.%u.%u.%u/%d\n",
- rx_connreq.racr_devid, HIPQUAD(peer_ip), peer_port);
- return -ENODEV;
- }
- dev = &kranal_data.kra_devices[i];
- if (dev->rad_id == rx_connreq.racr_devid)
- break;
- }
-
- rc = kranal_create_conn(&conn, dev);
- if (rc != 0)
- return rc;
-
- kranal_pack_connreq(&tx_connreq, conn, rx_connreq.racr_srcnid);
-
- rc = libcfs_sock_write(sock, &tx_connreq, sizeof(tx_connreq),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n",
- HIPQUAD(peer_ip), peer_port, rc);
- kranal_conn_decref(conn);
- return rc;
- }
-
- rc = kranal_set_conn_params(conn, &rx_connreq, peer_ip, peer_port);
- if (rc != 0) {
- kranal_conn_decref(conn);
- return rc;
- }
-
- *connp = conn;
- *src_nidp = rx_connreq.racr_srcnid;
- *dst_nidp = rx_connreq.racr_dstnid;
- return 0;
-}
-
-int
-kranal_active_conn_handshake(kra_peer_t *peer,
- lnet_nid_t *dst_nidp, kra_conn_t **connp)
-{
- kra_connreq_t connreq;
- kra_conn_t *conn;
- kra_device_t *dev;
- struct socket *sock;
- int rc;
- unsigned int idx;
-
- /* spread connections over all devices using both peer NIDs to ensure
- * all nids use all devices */
- idx = peer->rap_nid + kranal_data.kra_ni->ni_nid;
- dev = &kranal_data.kra_devices[idx % kranal_data.kra_ndevs];
-
- rc = kranal_create_conn(&conn, dev);
- if (rc != 0)
- return rc;
-
- kranal_pack_connreq(&connreq, conn, peer->rap_nid);
-
- if (the_lnet.ln_testprotocompat != 0) {
- /* single-shot proto test */
- LNET_LOCK();
- if ((the_lnet.ln_testprotocompat & 1) != 0) {
- connreq.racr_version++;
- the_lnet.ln_testprotocompat &= ~1;
- }
- if ((the_lnet.ln_testprotocompat & 2) != 0) {
- connreq.racr_magic = LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~2;
- }
- LNET_UNLOCK();
- }
-
- rc = lnet_connect(&sock, peer->rap_nid,
- 0, peer->rap_ip, peer->rap_port);
- if (rc != 0)
- goto failed_0;
-
- /* CAVEAT EMPTOR: the passive side receives with a SHORT rx timeout
- * immediately after accepting a connection, so we connect and then
- * send immediately. */
-
- rc = libcfs_sock_write(sock, &connreq, sizeof(connreq),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n",
- HIPQUAD(peer->rap_ip), peer->rap_port, rc);
- goto failed_2;
- }
-
- rc = kranal_recv_connreq(sock, &connreq, 1);
- if (rc != 0) {
- CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n",
- HIPQUAD(peer->rap_ip), peer->rap_port, rc);
- goto failed_2;
- }
-
- libcfs_sock_release(sock);
- rc = -EPROTO;
-
- if (connreq.racr_srcnid != peer->rap_nid) {
- CERROR("Unexpected srcnid from %u.%u.%u.%u/%d: "
- "received %s expected %s\n",
- HIPQUAD(peer->rap_ip), peer->rap_port,
- libcfs_nid2str(connreq.racr_srcnid),
- libcfs_nid2str(peer->rap_nid));
- goto failed_1;
- }
-
- if (connreq.racr_devid != dev->rad_id) {
- CERROR("Unexpected device id from %u.%u.%u.%u/%d: "
- "received %d expected %d\n",
- HIPQUAD(peer->rap_ip), peer->rap_port,
- connreq.racr_devid, dev->rad_id);
- goto failed_1;
- }
-
- rc = kranal_set_conn_params(conn, &connreq,
- peer->rap_ip, peer->rap_port);
- if (rc != 0)
- goto failed_1;
-
- *connp = conn;
- *dst_nidp = connreq.racr_dstnid;
- return 0;
-
- failed_2:
- libcfs_sock_release(sock);
- failed_1:
- lnet_connect_console_error(rc, peer->rap_nid,
- peer->rap_ip, peer->rap_port);
- failed_0:
- kranal_conn_decref(conn);
- return rc;
-}
-
-int
-kranal_conn_handshake (struct socket *sock, kra_peer_t *peer)
-{
- kra_peer_t *peer2;
- kra_tx_t *tx;
- lnet_nid_t peer_nid;
- lnet_nid_t dst_nid;
- unsigned long flags;
- kra_conn_t *conn;
- int rc;
- int nstale;
- int new_peer = 0;
-
- if (sock == NULL) {
- /* active: connd wants to connect to 'peer' */
- LASSERT (peer != NULL);
- LASSERT (peer->rap_connecting);
-
- rc = kranal_active_conn_handshake(peer, &dst_nid, &conn);
- if (rc != 0)
- return rc;
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- if (!kranal_peer_active(peer)) {
- /* raced with peer getting unlinked */
- write_unlock_irqrestore(&kranal_data.kra_global_lock,
- flags);
- kranal_conn_decref(conn);
- return -ESTALE;
- }
-
- peer_nid = peer->rap_nid;
- } else {
- /* passive: listener accepted 'sock' */
- LASSERT (peer == NULL);
-
- rc = kranal_passive_conn_handshake(sock, &peer_nid,
- &dst_nid, &conn);
- if (rc != 0)
- return rc;
-
- /* assume this is a new peer */
- rc = kranal_create_peer(&peer, peer_nid);
- if (rc != 0) {
- CERROR("Can't create conn for %s\n",
- libcfs_nid2str(peer_nid));
- kranal_conn_decref(conn);
- return -ENOMEM;
- }
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- peer2 = kranal_find_peer_locked(peer_nid);
- if (peer2 == NULL) {
- new_peer = 1;
- } else {
- /* peer_nid already in the peer table */
- kranal_peer_decref(peer);
- peer = peer2;
- }
- }
-
- LASSERT ((!new_peer) != (!kranal_peer_active(peer)));
-
- /* Refuse connection if peer thinks we are a different NID. We check
- * this while holding the global lock, to synch with connection
- * destruction on NID change. */
- if (!lnet_ptlcompat_matchnid(kranal_data.kra_ni->ni_nid, dst_nid)) {
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
- CERROR("Stale/bad connection with %s: dst_nid %s, expected %s\n",
- libcfs_nid2str(peer_nid), libcfs_nid2str(dst_nid),
- libcfs_nid2str(kranal_data.kra_ni->ni_nid));
- rc = -ESTALE;
- goto failed;
- }
-
- /* Refuse to duplicate an existing connection (both sides might try to
- * connect at once). NB we return success! We _are_ connected so we
- * _don't_ have any blocked txs to complete with failure. */
- rc = kranal_conn_isdup_locked(peer, conn);
- if (rc != 0) {
- LASSERT (!list_empty(&peer->rap_conns));
- LASSERT (list_empty(&peer->rap_tx_queue));
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
- CWARN("Not creating duplicate connection to %s: %d\n",
- libcfs_nid2str(peer_nid), rc);
- rc = 0;
- goto failed;
- }
-
- if (new_peer) {
- /* peer table takes my ref on the new peer */
- list_add_tail(&peer->rap_list,
- kranal_nid2peerlist(peer_nid));
- }
-
- /* initialise timestamps before reaper looks at them */
- conn->rac_last_tx = conn->rac_last_rx = jiffies;
-
- kranal_peer_addref(peer); /* +1 ref for conn */
- conn->rac_peer = peer;
- list_add_tail(&conn->rac_list, &peer->rap_conns);
-
- kranal_conn_addref(conn); /* +1 ref for conn table */
- list_add_tail(&conn->rac_hashlist,
- kranal_cqid2connlist(conn->rac_cqid));
-
- /* Schedule all packets blocking for a connection */
- while (!list_empty(&peer->rap_tx_queue)) {
- tx = list_entry(peer->rap_tx_queue.next,
- kra_tx_t, tx_list);
-
- list_del(&tx->tx_list);
- kranal_post_fma(conn, tx);
- }
-
- nstale = kranal_close_stale_conns_locked(peer, conn);
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
- /* CAVEAT EMPTOR: passive peer can disappear NOW */
-
- if (nstale != 0)
- CWARN("Closed %d stale conns to %s\n", nstale,
- libcfs_nid2str(peer_nid));
-
- CWARN("New connection to %s on devid[%d] = %d\n",
- libcfs_nid2str(peer_nid),
- conn->rac_device->rad_idx, conn->rac_device->rad_id);
-
- /* Ensure conn gets checked. Transmits may have been queued and an
- * FMA event may have happened before it got in the cq hash table */
- kranal_schedule_conn(conn);
- return 0;
-
- failed:
- if (new_peer)
- kranal_peer_decref(peer);
- kranal_conn_decref(conn);
- return rc;
-}
-
-void
-kranal_connect (kra_peer_t *peer)
-{
- kra_tx_t *tx;
- unsigned long flags;
- struct list_head zombies;
- int rc;
-
- LASSERT (peer->rap_connecting);
-
- CDEBUG(D_NET, "About to handshake %s\n",
- libcfs_nid2str(peer->rap_nid));
-
- rc = kranal_conn_handshake(NULL, peer);
-
- CDEBUG(D_NET, "Done handshake %s:%d \n",
- libcfs_nid2str(peer->rap_nid), rc);
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- LASSERT (peer->rap_connecting);
- peer->rap_connecting = 0;
-
- if (rc == 0) {
- /* kranal_conn_handshake() queues blocked txs immediately on
- * success to avoid messages jumping the queue */
- LASSERT (list_empty(&peer->rap_tx_queue));
-
- peer->rap_reconnect_interval = 0; /* OK to reconnect at any time */
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
- return;
- }
-
- peer->rap_reconnect_interval *= 2;
- peer->rap_reconnect_interval =
- MAX(peer->rap_reconnect_interval,
- *kranal_tunables.kra_min_reconnect_interval);
- peer->rap_reconnect_interval =
- MIN(peer->rap_reconnect_interval,
- *kranal_tunables.kra_max_reconnect_interval);
-
- peer->rap_reconnect_time = jiffies + peer->rap_reconnect_interval * HZ;
-
- /* Grab all blocked packets while we have the global lock */
- list_add(&zombies, &peer->rap_tx_queue);
- list_del_init(&peer->rap_tx_queue);
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
- if (list_empty(&zombies))
- return;
-
- CDEBUG(D_NETERROR, "Dropping packets for %s: connection failed\n",
- libcfs_nid2str(peer->rap_nid));
-
- do {
- tx = list_entry(zombies.next, kra_tx_t, tx_list);
-
- list_del(&tx->tx_list);
- kranal_tx_done(tx, -EHOSTUNREACH);
-
- } while (!list_empty(&zombies));
-}
-
-void
-kranal_free_acceptsock (kra_acceptsock_t *ras)
-{
- libcfs_sock_release(ras->ras_sock);
- LIBCFS_FREE(ras, sizeof(*ras));
-}
-
-int
-kranal_accept (lnet_ni_t *ni, struct socket *sock)
-{
- kra_acceptsock_t *ras;
- int rc;
- __u32 peer_ip;
- int peer_port;
- unsigned long flags;
-
- rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- LASSERT (rc == 0); /* we succeeded before */
-
- LIBCFS_ALLOC(ras, sizeof(*ras));
- if (ras == NULL) {
- CERROR("ENOMEM allocating connection request from "
- "%u.%u.%u.%u\n", HIPQUAD(peer_ip));
- return -ENOMEM;
- }
-
- ras->ras_sock = sock;
-
- spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-
- list_add_tail(&ras->ras_list, &kranal_data.kra_connd_acceptq);
- wake_up(&kranal_data.kra_connd_waitq);
-
- spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
- return 0;
-}
-
-int
-kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid)
-{
- kra_peer_t *peer;
- unsigned long flags;
-
- LASSERT (nid != LNET_NID_ANY);
-
- LIBCFS_ALLOC(peer, sizeof(*peer));
- if (peer == NULL)
- return -ENOMEM;
-
- memset(peer, 0, sizeof(*peer)); /* zero flags etc */
-
- peer->rap_nid = nid;
- atomic_set(&peer->rap_refcount, 1); /* 1 ref for caller */
-
- INIT_LIST_HEAD(&peer->rap_list);
- INIT_LIST_HEAD(&peer->rap_connd_list);
- INIT_LIST_HEAD(&peer->rap_conns);
- INIT_LIST_HEAD(&peer->rap_tx_queue);
-
- peer->rap_reconnect_interval = 0; /* OK to connect at any time */
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- if (kranal_data.kra_nonewpeers) {
- /* shutdown has started already */
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
- LIBCFS_FREE(peer, sizeof(*peer));
- CERROR("Can't create peer: network shutdown\n");
- return -ESHUTDOWN;
- }
-
- atomic_inc(&kranal_data.kra_npeers);
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
- *peerp = peer;
- return 0;
-}
-
-void
-kranal_destroy_peer (kra_peer_t *peer)
-{
- CDEBUG(D_NET, "peer %s %p deleted\n",
- libcfs_nid2str(peer->rap_nid), peer);
-
- LASSERT (atomic_read(&peer->rap_refcount) == 0);
- LASSERT (peer->rap_persistence == 0);
- LASSERT (!kranal_peer_active(peer));
- LASSERT (!peer->rap_connecting);
- LASSERT (list_empty(&peer->rap_conns));
- LASSERT (list_empty(&peer->rap_tx_queue));
- LASSERT (list_empty(&peer->rap_connd_list));
-
- LIBCFS_FREE(peer, sizeof(*peer));
-
- /* NB a peer's connections keep a reference on their peer until
- * they are destroyed, so we can be assured that _all_ state to do
- * with this peer has been cleaned up when its refcount drops to
- * zero. */
- atomic_dec(&kranal_data.kra_npeers);
-}
-
-kra_peer_t *
-kranal_find_peer_locked (lnet_nid_t nid)
-{
- struct list_head *peer_list = kranal_nid2peerlist(nid);
- struct list_head *tmp;
- kra_peer_t *peer;
-
- list_for_each (tmp, peer_list) {
-
- peer = list_entry(tmp, kra_peer_t, rap_list);
-
- LASSERT (peer->rap_persistence > 0 || /* persistent peer */
- !list_empty(&peer->rap_conns)); /* active conn */
-
- if (peer->rap_nid != nid)
- continue;
-
- CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
- peer, libcfs_nid2str(nid),
- atomic_read(&peer->rap_refcount));
- return peer;
- }
- return NULL;
-}
-
-kra_peer_t *
-kranal_find_peer (lnet_nid_t nid)
-{
- kra_peer_t *peer;
-
- read_lock(&kranal_data.kra_global_lock);
- peer = kranal_find_peer_locked(nid);
- if (peer != NULL) /* +1 ref for caller? */
- kranal_peer_addref(peer);
- read_unlock(&kranal_data.kra_global_lock);
-
- return peer;
-}
-
-void
-kranal_unlink_peer_locked (kra_peer_t *peer)
-{
- LASSERT (peer->rap_persistence == 0);
- LASSERT (list_empty(&peer->rap_conns));
-
- LASSERT (kranal_peer_active(peer));
- list_del_init(&peer->rap_list);
-
- /* lose peerlist's ref */
- kranal_peer_decref(peer);
-}
-
-int
-kranal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, int *portp,
- int *persistencep)
-{
- kra_peer_t *peer;
- struct list_head *ptmp;
- int i;
-
- read_lock(&kranal_data.kra_global_lock);
-
- for (i = 0; i < kranal_data.kra_peer_hash_size; i++) {
-
- list_for_each(ptmp, &kranal_data.kra_peers[i]) {
-
- peer = list_entry(ptmp, kra_peer_t, rap_list);
- LASSERT (peer->rap_persistence > 0 ||
- !list_empty(&peer->rap_conns));
-
- if (index-- > 0)
- continue;
-
- *nidp = peer->rap_nid;
- *ipp = peer->rap_ip;
- *portp = peer->rap_port;
- *persistencep = peer->rap_persistence;
-
- read_unlock(&kranal_data.kra_global_lock);
- return 0;
- }
- }
-
- read_unlock(&kranal_data.kra_global_lock);
- return -ENOENT;
-}
-
-int
-kranal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port)
-{
- unsigned long flags;
- kra_peer_t *peer;
- kra_peer_t *peer2;
- int rc;
-
- if (nid == LNET_NID_ANY)
- return -EINVAL;
-
- rc = kranal_create_peer(&peer, nid);
- if (rc != 0)
- return rc;
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- peer2 = kranal_find_peer_locked(nid);
- if (peer2 != NULL) {
- kranal_peer_decref(peer);
- peer = peer2;
- } else {
- /* peer table takes existing ref on peer */
- list_add_tail(&peer->rap_list,
- kranal_nid2peerlist(nid));
- }
-
- peer->rap_ip = ip;
- peer->rap_port = port;
- peer->rap_persistence++;
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
- return 0;
-}
-
-void
-kranal_del_peer_locked (kra_peer_t *peer)
-{
- struct list_head *ctmp;
- struct list_head *cnxt;
- kra_conn_t *conn;
-
- peer->rap_persistence = 0;
-
- if (list_empty(&peer->rap_conns)) {
- kranal_unlink_peer_locked(peer);
- } else {
- list_for_each_safe(ctmp, cnxt, &peer->rap_conns) {
- conn = list_entry(ctmp, kra_conn_t, rac_list);
-
- kranal_close_conn_locked(conn, 0);
- }
- /* peer unlinks itself when last conn is closed */
- }
-}
-
-int
-kranal_del_peer (lnet_nid_t nid)
-{
- unsigned long flags;
- struct list_head *ptmp;
- struct list_head *pnxt;
- kra_peer_t *peer;
- int lo;
- int hi;
- int i;
- int rc = -ENOENT;
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kranal_nid2peerlist(nid) - kranal_data.kra_peers;
- else {
- lo = 0;
- hi = kranal_data.kra_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kranal_data.kra_peers[i]) {
- peer = list_entry(ptmp, kra_peer_t, rap_list);
- LASSERT (peer->rap_persistence > 0 ||
- !list_empty(&peer->rap_conns));
-
- if (!(nid == LNET_NID_ANY || peer->rap_nid == nid))
- continue;
-
- kranal_del_peer_locked(peer);
- rc = 0; /* matched something */
- }
- }
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
- return rc;
-}
-
-kra_conn_t *
-kranal_get_conn_by_idx (int index)
-{
- kra_peer_t *peer;
- struct list_head *ptmp;
- kra_conn_t *conn;
- struct list_head *ctmp;
- int i;
-
- read_lock (&kranal_data.kra_global_lock);
-
- for (i = 0; i < kranal_data.kra_peer_hash_size; i++) {
- list_for_each (ptmp, &kranal_data.kra_peers[i]) {
-
- peer = list_entry(ptmp, kra_peer_t, rap_list);
- LASSERT (peer->rap_persistence > 0 ||
- !list_empty(&peer->rap_conns));
-
- list_for_each (ctmp, &peer->rap_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry(ctmp, kra_conn_t, rac_list);
- CDEBUG(D_NET, "++conn[%p] -> %s (%d)\n", conn,
- libcfs_nid2str(conn->rac_peer->rap_nid),
- atomic_read(&conn->rac_refcount));
- atomic_inc(&conn->rac_refcount);
- read_unlock(&kranal_data.kra_global_lock);
- return conn;
- }
- }
- }
-
- read_unlock(&kranal_data.kra_global_lock);
- return NULL;
-}
-
-int
-kranal_close_peer_conns_locked (kra_peer_t *peer, int why)
-{
- kra_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->rap_conns) {
- conn = list_entry(ctmp, kra_conn_t, rac_list);
-
- count++;
- kranal_close_conn_locked(conn, why);
- }
-
- return count;
-}
-
-int
-kranal_close_matching_conns (lnet_nid_t nid)
-{
- unsigned long flags;
- kra_peer_t *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- int count = 0;
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kranal_nid2peerlist(nid) - kranal_data.kra_peers;
- else {
- lo = 0;
- hi = kranal_data.kra_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kranal_data.kra_peers[i]) {
-
- peer = list_entry(ptmp, kra_peer_t, rap_list);
- LASSERT (peer->rap_persistence > 0 ||
- !list_empty(&peer->rap_conns));
-
- if (!(nid == LNET_NID_ANY || nid == peer->rap_nid))
- continue;
-
- count += kranal_close_peer_conns_locked(peer, 0);
- }
- }
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
- /* wildcards always succeed */
- if (nid == LNET_NID_ANY)
- return 0;
-
- return (count == 0) ? -ENOENT : 0;
-}
-
-int
-kranal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
-
- LASSERT (ni == kranal_data.kra_ni);
-
- switch(cmd) {
- case IOC_LIBCFS_GET_PEER: {
- lnet_nid_t nid = 0;
- __u32 ip = 0;
- int port = 0;
- int share_count = 0;
-
- rc = kranal_get_peer_info(data->ioc_count,
- &nid, &ip, &port, &share_count);
- data->ioc_nid = nid;
- data->ioc_count = share_count;
- data->ioc_u32[0] = ip;
- data->ioc_u32[1] = port;
- break;
- }
- case IOC_LIBCFS_ADD_PEER: {
- rc = kranal_add_persistent_peer(data->ioc_nid,
- data->ioc_u32[0], /* IP */
- data->ioc_u32[1]); /* port */
- break;
- }
- case IOC_LIBCFS_DEL_PEER: {
- rc = kranal_del_peer(data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_GET_CONN: {
- kra_conn_t *conn = kranal_get_conn_by_idx(data->ioc_count);
-
- if (conn == NULL)
- rc = -ENOENT;
- else {
- rc = 0;
- data->ioc_nid = conn->rac_peer->rap_nid;
- data->ioc_u32[0] = conn->rac_device->rad_id;
- kranal_conn_decref(conn);
- }
- break;
- }
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- rc = kranal_close_matching_conns(data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_REGISTER_MYNID: {
- /* Ignore if this is a noop */
- if (data->ioc_nid == ni->ni_nid) {
- rc = 0;
- } else {
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- rc = -EINVAL;
- }
- break;
- }
- }
-
- return rc;
-}
-
-void
-kranal_free_txdescs(struct list_head *freelist)
-{
- kra_tx_t *tx;
-
- while (!list_empty(freelist)) {
- tx = list_entry(freelist->next, kra_tx_t, tx_list);
-
- list_del(&tx->tx_list);
- LIBCFS_FREE(tx->tx_phys, LNET_MAX_IOV * sizeof(*tx->tx_phys));
- LIBCFS_FREE(tx, sizeof(*tx));
- }
-}
-
-int
-kranal_alloc_txdescs(struct list_head *freelist, int n)
-{
- int i;
- kra_tx_t *tx;
-
- LASSERT (freelist == &kranal_data.kra_idle_txs);
- LASSERT (list_empty(freelist));
-
- for (i = 0; i < n; i++) {
-
- LIBCFS_ALLOC(tx, sizeof(*tx));
- if (tx == NULL) {
- CERROR("Can't allocate tx[%d]\n", i);
- kranal_free_txdescs(freelist);
- return -ENOMEM;
- }
-
- LIBCFS_ALLOC(tx->tx_phys,
- LNET_MAX_IOV * sizeof(*tx->tx_phys));
- if (tx->tx_phys == NULL) {
- CERROR("Can't allocate tx[%d]->tx_phys\n", i);
-
- LIBCFS_FREE(tx, sizeof(*tx));
- kranal_free_txdescs(freelist);
- return -ENOMEM;
- }
-
- tx->tx_buftype = RANAL_BUF_NONE;
- tx->tx_msg.ram_type = RANAL_MSG_NONE;
-
- list_add(&tx->tx_list, freelist);
- }
-
- return 0;
-}
-
-int
-kranal_device_init(int id, kra_device_t *dev)
-{
- int total_ntx = *kranal_tunables.kra_ntx;
- RAP_RETURN rrc;
-
- dev->rad_id = id;
- rrc = RapkGetDeviceByIndex(id, kranal_device_callback,
- &dev->rad_handle);
- if (rrc != RAP_SUCCESS) {
- CERROR("Can't get Rapidarray Device %d: %d\n", id, rrc);
- goto failed_0;
- }
-
- rrc = RapkReserveRdma(dev->rad_handle, total_ntx);
- if (rrc != RAP_SUCCESS) {
- CERROR("Can't reserve %d RDMA descriptors"
- " for device %d: %d\n", total_ntx, id, rrc);
- goto failed_1;
- }
-
- rrc = RapkCreateCQ(dev->rad_handle, total_ntx, RAP_CQTYPE_SEND,
- &dev->rad_rdma_cqh);
- if (rrc != RAP_SUCCESS) {
- CERROR("Can't create rdma cq size %d for device %d: %d\n",
- total_ntx, id, rrc);
- goto failed_1;
- }
-
- rrc = RapkCreateCQ(dev->rad_handle,
- *kranal_tunables.kra_fma_cq_size,
- RAP_CQTYPE_RECV, &dev->rad_fma_cqh);
- if (rrc != RAP_SUCCESS) {
- CERROR("Can't create fma cq size %d for device %d: %d\n",
- *kranal_tunables.kra_fma_cq_size, id, rrc);
- goto failed_2;
- }
-
- return 0;
-
- failed_2:
- RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh);
- failed_1:
- RapkReleaseDevice(dev->rad_handle);
- failed_0:
- return -ENODEV;
-}
-
-void
-kranal_device_fini(kra_device_t *dev)
-{
- LASSERT (list_empty(&dev->rad_ready_conns));
- LASSERT (list_empty(&dev->rad_new_conns));
- LASSERT (dev->rad_nphysmap == 0);
- LASSERT (dev->rad_nppphysmap == 0);
- LASSERT (dev->rad_nvirtmap == 0);
- LASSERT (dev->rad_nobvirtmap == 0);
-
- LASSERT(dev->rad_scheduler == NULL);
- RapkDestroyCQ(dev->rad_handle, dev->rad_fma_cqh);
- RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh);
- RapkReleaseDevice(dev->rad_handle);
-}
-
-void
-kranal_shutdown (lnet_ni_t *ni)
-{
- int i;
- unsigned long flags;
-
- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
- atomic_read(&libcfs_kmemory));
-
- LASSERT (ni == kranal_data.kra_ni);
- LASSERT (ni->ni_data == &kranal_data);
-
- switch (kranal_data.kra_init) {
- default:
- CERROR("Unexpected state %d\n", kranal_data.kra_init);
- LBUG();
-
- case RANAL_INIT_ALL:
- /* Prevent new peers from being created */
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
- kranal_data.kra_nonewpeers = 1;
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
- /* Remove all existing peers from the peer table */
- kranal_del_peer(LNET_NID_ANY);
-
- /* Wait for pending conn reqs to be handled */
- i = 2;
- spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
- while (!list_empty(&kranal_data.kra_connd_acceptq)) {
- spin_unlock_irqrestore(&kranal_data.kra_connd_lock,
- flags);
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
- "waiting for conn reqs to clean up\n");
- cfs_pause(cfs_time_seconds(1));
-
- spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
- }
- spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
- /* Wait for all peers to be freed */
- i = 2;
- while (atomic_read(&kranal_data.kra_npeers) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
- "waiting for %d peers to close down\n",
- atomic_read(&kranal_data.kra_npeers));
- cfs_pause(cfs_time_seconds(1));
- }
- /* fall through */
-
- case RANAL_INIT_DATA:
- break;
- }
-
- /* Peer state all cleaned up BEFORE setting shutdown, so threads don't
- * have to worry about shutdown races. NB connections may be created
- * while there are still active connds, but these will be temporary
- * since peer creation always fails after the listener has started to
- * shut down. */
- LASSERT (atomic_read(&kranal_data.kra_npeers) == 0);
-
- /* Flag threads to terminate */
- kranal_data.kra_shutdown = 1;
-
- for (i = 0; i < kranal_data.kra_ndevs; i++) {
- kra_device_t *dev = &kranal_data.kra_devices[i];
-
- spin_lock_irqsave(&dev->rad_lock, flags);
- wake_up(&dev->rad_waitq);
- spin_unlock_irqrestore(&dev->rad_lock, flags);
- }
-
- spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
- wake_up_all(&kranal_data.kra_reaper_waitq);
- spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-
- LASSERT (list_empty(&kranal_data.kra_connd_peers));
- spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
- wake_up_all(&kranal_data.kra_connd_waitq);
- spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
- /* Wait for threads to exit */
- i = 2;
- while (atomic_read(&kranal_data.kra_nthreads) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d threads to terminate\n",
- atomic_read(&kranal_data.kra_nthreads));
- cfs_pause(cfs_time_seconds(1));
- }
-
- LASSERT (atomic_read(&kranal_data.kra_npeers) == 0);
- if (kranal_data.kra_peers != NULL) {
- for (i = 0; i < kranal_data.kra_peer_hash_size; i++)
- LASSERT (list_empty(&kranal_data.kra_peers[i]));
-
- LIBCFS_FREE(kranal_data.kra_peers,
- sizeof (struct list_head) *
- kranal_data.kra_peer_hash_size);
- }
-
- LASSERT (atomic_read(&kranal_data.kra_nconns) == 0);
- if (kranal_data.kra_conns != NULL) {
- for (i = 0; i < kranal_data.kra_conn_hash_size; i++)
- LASSERT (list_empty(&kranal_data.kra_conns[i]));
-
- LIBCFS_FREE(kranal_data.kra_conns,
- sizeof (struct list_head) *
- kranal_data.kra_conn_hash_size);
- }
-
- for (i = 0; i < kranal_data.kra_ndevs; i++)
- kranal_device_fini(&kranal_data.kra_devices[i]);
-
- kranal_free_txdescs(&kranal_data.kra_idle_txs);
-
- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
- atomic_read(&libcfs_kmemory));
-
- kranal_data.kra_init = RANAL_INIT_NOTHING;
- PORTAL_MODULE_UNUSE;
-}
-
-int
-kranal_startup (lnet_ni_t *ni)
-{
- struct timeval tv;
- int pkmem = atomic_read(&libcfs_kmemory);
- int rc;
- int i;
- kra_device_t *dev;
-
- LASSERT (ni->ni_lnd == &the_kralnd);
-
- /* Only 1 instance supported */
- if (kranal_data.kra_init != RANAL_INIT_NOTHING) {
- CERROR ("Only 1 instance supported\n");
- return -EPERM;
- }
-
- if (lnet_set_ip_niaddr(ni) != 0) {
- CERROR ("Can't determine my NID\n");
- return -EPERM;
- }
-
- if (*kranal_tunables.kra_credits > *kranal_tunables.kra_ntx) {
- CERROR ("Can't set credits(%d) > ntx(%d)\n",
- *kranal_tunables.kra_credits,
- *kranal_tunables.kra_ntx);
- return -EINVAL;
- }
-
- memset(&kranal_data, 0, sizeof(kranal_data)); /* zero pointers, flags etc */
-
- ni->ni_maxtxcredits = *kranal_tunables.kra_credits;
- ni->ni_peertxcredits = *kranal_tunables.kra_peercredits;
-
- ni->ni_data = &kranal_data;
- kranal_data.kra_ni = ni;
-
- /* CAVEAT EMPTOR: Every 'Fma' message includes the sender's NID and
- * a unique (for all time) connstamp so we can uniquely identify
- * the sender. The connstamp is an incrementing counter
- * initialised with seconds + microseconds at startup time. So we
- * rely on NOT creating connections more frequently on average than
- * 1MHz to ensure we don't use old connstamps when we reboot. */
- do_gettimeofday(&tv);
- kranal_data.kra_connstamp =
- kranal_data.kra_peerstamp = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
- rwlock_init(&kranal_data.kra_global_lock);
-
- for (i = 0; i < RANAL_MAXDEVS; i++ ) {
- kra_device_t *dev = &kranal_data.kra_devices[i];
-
- dev->rad_idx = i;
- INIT_LIST_HEAD(&dev->rad_ready_conns);
- INIT_LIST_HEAD(&dev->rad_new_conns);
- init_waitqueue_head(&dev->rad_waitq);
- spin_lock_init(&dev->rad_lock);
- }
-
- kranal_data.kra_new_min_timeout = MAX_SCHEDULE_TIMEOUT;
- init_waitqueue_head(&kranal_data.kra_reaper_waitq);
- spin_lock_init(&kranal_data.kra_reaper_lock);
-
- INIT_LIST_HEAD(&kranal_data.kra_connd_acceptq);
- INIT_LIST_HEAD(&kranal_data.kra_connd_peers);
- init_waitqueue_head(&kranal_data.kra_connd_waitq);
- spin_lock_init(&kranal_data.kra_connd_lock);
-
- INIT_LIST_HEAD(&kranal_data.kra_idle_txs);
- spin_lock_init(&kranal_data.kra_tx_lock);
-
- /* OK to call kranal_api_shutdown() to cleanup now */
- kranal_data.kra_init = RANAL_INIT_DATA;
- PORTAL_MODULE_USE;
-
- kranal_data.kra_peer_hash_size = RANAL_PEER_HASH_SIZE;
- LIBCFS_ALLOC(kranal_data.kra_peers,
- sizeof(struct list_head) * kranal_data.kra_peer_hash_size);
- if (kranal_data.kra_peers == NULL)
- goto failed;
-
- for (i = 0; i < kranal_data.kra_peer_hash_size; i++)
- INIT_LIST_HEAD(&kranal_data.kra_peers[i]);
-
- kranal_data.kra_conn_hash_size = RANAL_PEER_HASH_SIZE;
- LIBCFS_ALLOC(kranal_data.kra_conns,
- sizeof(struct list_head) * kranal_data.kra_conn_hash_size);
- if (kranal_data.kra_conns == NULL)
- goto failed;
-
- for (i = 0; i < kranal_data.kra_conn_hash_size; i++)
- INIT_LIST_HEAD(&kranal_data.kra_conns[i]);
-
- rc = kranal_alloc_txdescs(&kranal_data.kra_idle_txs,
- *kranal_tunables.kra_ntx);
- if (rc != 0)
- goto failed;
-
- rc = kranal_thread_start(kranal_reaper, NULL);
- if (rc != 0) {
- CERROR("Can't spawn ranal reaper: %d\n", rc);
- goto failed;
- }
-
- for (i = 0; i < *kranal_tunables.kra_n_connd; i++) {
- rc = kranal_thread_start(kranal_connd, (void *)(unsigned long)i);
- if (rc != 0) {
- CERROR("Can't spawn ranal connd[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- LASSERT (kranal_data.kra_ndevs == 0);
-
- /* Use all available RapidArray devices */
- for (i = 0; i < RANAL_MAXDEVS; i++) {
- dev = &kranal_data.kra_devices[kranal_data.kra_ndevs];
-
- rc = kranal_device_init(kranal_devids[i], dev);
- if (rc == 0)
- kranal_data.kra_ndevs++;
- }
-
- if (kranal_data.kra_ndevs == 0) {
- CERROR("Can't initialise any RapidArray devices\n");
- goto failed;
- }
-
- for (i = 0; i < kranal_data.kra_ndevs; i++) {
- dev = &kranal_data.kra_devices[i];
- rc = kranal_thread_start(kranal_scheduler, dev);
- if (rc != 0) {
- CERROR("Can't spawn ranal scheduler[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- /* flag everything initialised */
- kranal_data.kra_init = RANAL_INIT_ALL;
- /*****************************************************/
-
- CDEBUG(D_MALLOC, "initial kmem %d\n", pkmem);
- return 0;
-
- failed:
- kranal_shutdown(ni);
- return -ENETDOWN;
-}
-
-void __exit
-kranal_module_fini (void)
-{
- lnet_unregister_lnd(&the_kralnd);
- kranal_tunables_fini();
-}
-
-int __init
-kranal_module_init (void)
-{
- int rc;
-
- rc = kranal_tunables_init();
- if (rc != 0)
- return rc;
-
- lnet_register_lnd(&the_kralnd);
-
- return 0;
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel RapidArray LND v0.01");
-MODULE_LICENSE("GPL");
-
-module_init(kranal_module_init);
-module_exit(kranal_module_fini);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-
-#include <rapl.h>
-
-/* tunables determined at compile time */
-#define RANAL_RESCHED 100 /* # scheduler loops before reschedule */
-
-#define RANAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define RANAL_CONN_HASH_SIZE 101 /* # conn lists */
-
-#define RANAL_MIN_TIMEOUT 5 /* minimum timeout interval (seconds) */
-#define RANAL_TIMEOUT2KEEPALIVE(t) (((t)+1)/2) /* timeout -> keepalive interval */
-
-/* fixed constants */
-#define RANAL_MAXDEVS 2 /* max # devices RapidArray supports */
-#define RANAL_FMA_MAX_PREFIX 232 /* max bytes in FMA "Prefix" we can use */
-#define RANAL_FMA_MAX_DATA ((7<<10)-256) /* Max FMA MSG is 7K including prefix */
-
-
-typedef struct
-{
- int *kra_n_connd; /* # connection daemons */
- int *kra_min_reconnect_interval; /* first failed connection retry... */
- int *kra_max_reconnect_interval; /* ...exponentially increasing to this */
- int *kra_ntx; /* # tx descs */
- int *kra_credits; /* # concurrent sends */
- int *kra_peercredits; /* # concurrent sends to 1 peer */
- int *kra_fma_cq_size; /* # entries in receive CQ */
- int *kra_timeout; /* comms timeout (seconds) */
- int *kra_max_immediate; /* immediate payload breakpoint */
-
-#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *kra_sysctl; /* sysctl interface */
-#endif
-} kra_tunables_t;
-
-typedef struct
-{
- RAP_PVOID rad_handle; /* device handle */
- RAP_PVOID rad_fma_cqh; /* FMA completion queue handle */
- RAP_PVOID rad_rdma_cqh; /* rdma completion queue handle */
- int rad_id; /* device id */
- int rad_idx; /* index in kra_devices */
- int rad_ready; /* set by device callback */
- struct list_head rad_ready_conns;/* connections ready to tx/rx */
- struct list_head rad_new_conns; /* new connections to complete */
- wait_queue_head_t rad_waitq; /* scheduler waits here */
- spinlock_t rad_lock; /* serialise */
- void *rad_scheduler; /* scheduling thread */
- unsigned int rad_nphysmap; /* # phys mappings */
- unsigned int rad_nppphysmap; /* # phys pages mapped */
- unsigned int rad_nvirtmap; /* # virt mappings */
- unsigned long rad_nobvirtmap; /* # virt bytes mapped */
-} kra_device_t;
-
-typedef struct
-{
- int kra_init; /* initialisation state */
- int kra_shutdown; /* shut down? */
- atomic_t kra_nthreads; /* # live threads */
- lnet_ni_t *kra_ni; /* _the_ nal instance */
-
- kra_device_t kra_devices[RANAL_MAXDEVS]; /* device/ptag/cq etc */
- int kra_ndevs; /* # devices */
-
- rwlock_t kra_global_lock; /* stabilize peer/conn ops */
-
- struct list_head *kra_peers; /* hash table of all my known peers */
- int kra_peer_hash_size; /* size of kra_peers */
- atomic_t kra_npeers; /* # peers extant */
- int kra_nonewpeers; /* prevent new peers */
-
- struct list_head *kra_conns; /* conns hashed by cqid */
- int kra_conn_hash_size; /* size of kra_conns */
- __u64 kra_peerstamp; /* when I started up */
- __u64 kra_connstamp; /* conn stamp generator */
- int kra_next_cqid; /* cqid generator */
- atomic_t kra_nconns; /* # connections extant */
-
- long kra_new_min_timeout; /* minimum timeout on any new conn */
- wait_queue_head_t kra_reaper_waitq; /* reaper sleeps here */
- spinlock_t kra_reaper_lock; /* serialise */
-
- struct list_head kra_connd_peers; /* peers waiting for a connection */
- struct list_head kra_connd_acceptq; /* accepted sockets to handshake */
- wait_queue_head_t kra_connd_waitq; /* connection daemons sleep here */
- spinlock_t kra_connd_lock; /* serialise */
-
- struct list_head kra_idle_txs; /* idle tx descriptors */
- __u64 kra_next_tx_cookie; /* RDMA completion cookie */
- spinlock_t kra_tx_lock; /* serialise */
-} kra_data_t;
-
-#define RANAL_INIT_NOTHING 0
-#define RANAL_INIT_DATA 1
-#define RANAL_INIT_ALL 2
-
-typedef struct kra_acceptsock /* accepted socket queued for connd */
-{
- struct list_head ras_list; /* queue for attention */
- struct socket *ras_sock; /* the accepted socket */
-} kra_acceptsock_t;
-
-/************************************************************************
- * Wire message structs. These are sent in sender's byte order
- * (i.e. receiver checks magic and flips if required).
- */
-
-typedef struct kra_connreq /* connection request/response */
-{ /* (sent via socket) */
- __u32 racr_magic; /* I'm an ranal connreq */
- __u16 racr_version; /* this is my version number */
- __u16 racr_devid; /* sender's device ID */
- __u64 racr_srcnid; /* sender's NID */
- __u64 racr_dstnid; /* who sender expects to listen */
- __u64 racr_peerstamp; /* sender's instance stamp */
- __u64 racr_connstamp; /* sender's connection stamp */
- __u32 racr_timeout; /* sender's timeout */
- RAP_RI_PARAMETERS racr_riparams; /* sender's endpoint info */
-} kra_connreq_t;
-
-typedef struct
-{
- RAP_MEM_KEY rard_key;
- RAP_PVOID64 rard_addr;
- RAP_UINT32 rard_nob;
-} kra_rdma_desc_t;
-
-typedef struct
-{
- lnet_hdr_t raim_hdr; /* portals header */
- /* Portals payload is in FMA "Message Data" */
-} kra_immediate_msg_t;
-
-typedef struct
-{
- lnet_hdr_t raprm_hdr; /* portals header */
- __u64 raprm_cookie; /* opaque completion cookie */
-} kra_putreq_msg_t;
-
-typedef struct
-{
- __u64 rapam_src_cookie; /* reflected completion cookie */
- __u64 rapam_dst_cookie; /* opaque completion cookie */
- kra_rdma_desc_t rapam_desc; /* sender's sink buffer */
-} kra_putack_msg_t;
-
-typedef struct
-{
- lnet_hdr_t ragm_hdr; /* portals header */
- __u64 ragm_cookie; /* opaque completion cookie */
- kra_rdma_desc_t ragm_desc; /* sender's sink buffer */
-} kra_get_msg_t;
-
-typedef struct
-{
- __u64 racm_cookie; /* reflected completion cookie */
-} kra_completion_msg_t;
-
-typedef struct /* NB must fit in FMA "Prefix" */
-{
- __u32 ram_magic; /* I'm an ranal message */
- __u16 ram_version; /* this is my version number */
- __u16 ram_type; /* msg type */
- __u64 ram_srcnid; /* sender's NID */
- __u64 ram_connstamp; /* sender's connection stamp */
- union {
- kra_immediate_msg_t immediate;
- kra_putreq_msg_t putreq;
- kra_putack_msg_t putack;
- kra_get_msg_t get;
- kra_completion_msg_t completion;
- } ram_u;
- __u32 ram_seq; /* incrementing sequence number */
-} kra_msg_t;
-
-#define RANAL_MSG_MAGIC LNET_PROTO_RA_MAGIC /* unique magic */
-#define RANAL_MSG_VERSION 1 /* current protocol version */
-
-#define RANAL_MSG_FENCE 0x80 /* fence RDMA */
-
-#define RANAL_MSG_NONE 0x00 /* illegal message */
-#define RANAL_MSG_NOOP 0x01 /* empty ram_u (keepalive) */
-#define RANAL_MSG_IMMEDIATE 0x02 /* ram_u.immediate */
-#define RANAL_MSG_PUT_REQ 0x03 /* ram_u.putreq (src->sink) */
-#define RANAL_MSG_PUT_NAK 0x04 /* ram_u.completion (no PUT match: sink->src) */
-#define RANAL_MSG_PUT_ACK 0x05 /* ram_u.putack (PUT matched: sink->src) */
-#define RANAL_MSG_PUT_DONE 0x86 /* ram_u.completion (src->sink) */
-#define RANAL_MSG_GET_REQ 0x07 /* ram_u.get (sink->src) */
-#define RANAL_MSG_GET_NAK 0x08 /* ram_u.completion (no GET match: src->sink) */
-#define RANAL_MSG_GET_DONE 0x89 /* ram_u.completion (src->sink) */
-#define RANAL_MSG_CLOSE 0x8a /* empty ram_u */
-
-/***********************************************************************/
-
-typedef struct kra_tx /* message descriptor */
-{
- struct list_head tx_list; /* queue on idle_txs/rac_sendq/rac_waitq */
- struct kra_conn *tx_conn; /* owning conn */
- lnet_msg_t *tx_lntmsg[2]; /* ptl msgs to finalize on completion */
- unsigned long tx_qtime; /* when tx started to wait for something (jiffies) */
- int tx_nob; /* # bytes of payload */
- int tx_buftype; /* payload buffer type */
- void *tx_buffer; /* source/sink buffer */
- int tx_phys_offset; /* first page offset (if phys) */
- int tx_phys_npages; /* # physical pages */
- RAP_PHYS_REGION *tx_phys; /* page descriptors */
- RAP_MEM_KEY tx_map_key; /* mapping key */
- RAP_RDMA_DESCRIPTOR tx_rdma_desc; /* rdma descriptor */
- __u64 tx_cookie; /* identify this tx to peer */
- kra_msg_t tx_msg; /* FMA message buffer */
-} kra_tx_t;
-
-#define RANAL_BUF_NONE 0 /* buffer type not set */
-#define RANAL_BUF_IMMEDIATE 1 /* immediate data */
-#define RANAL_BUF_PHYS_UNMAPPED 2 /* physical: not mapped yet */
-#define RANAL_BUF_PHYS_MAPPED 3 /* physical: mapped already */
-#define RANAL_BUF_VIRT_UNMAPPED 4 /* virtual: not mapped yet */
-#define RANAL_BUF_VIRT_MAPPED 5 /* virtual: mapped already */
-
-typedef struct kra_conn
-{
- struct kra_peer *rac_peer; /* owning peer */
- struct list_head rac_list; /* stash on peer's conn list */
- struct list_head rac_hashlist; /* stash in connection hash table */
- struct list_head rac_schedlist; /* schedule (on rad_???_conns) for attention */
- struct list_head rac_fmaq; /* txs queued for FMA */
- struct list_head rac_rdmaq; /* txs awaiting RDMA completion */
- struct list_head rac_replyq; /* txs awaiting replies */
- __u64 rac_peerstamp; /* peer's unique stamp */
- __u64 rac_peer_connstamp; /* peer's unique connection stamp */
- __u64 rac_my_connstamp; /* my unique connection stamp */
- unsigned long rac_last_tx; /* when I last sent an FMA message (jiffies) */
- unsigned long rac_last_rx; /* when I last received an FMA messages (jiffies) */
- long rac_keepalive; /* keepalive interval (seconds) */
- long rac_timeout; /* infer peer death if no rx for this many seconds */
- __u32 rac_cqid; /* my completion callback id (non-unique) */
- __u32 rac_tx_seq; /* tx msg sequence number */
- __u32 rac_rx_seq; /* rx msg sequence number */
- atomic_t rac_refcount; /* # users */
- unsigned int rac_close_sent; /* I've sent CLOSE */
- unsigned int rac_close_recvd; /* I've received CLOSE */
- unsigned int rac_state; /* connection state */
- unsigned int rac_scheduled; /* being attented to */
- spinlock_t rac_lock; /* serialise */
- kra_device_t *rac_device; /* which device */
- RAP_PVOID rac_rihandle; /* RA endpoint */
- kra_msg_t *rac_rxmsg; /* incoming message (FMA prefix) */
- kra_msg_t rac_msg; /* keepalive/CLOSE message buffer */
-} kra_conn_t;
-
-#define RANAL_CONN_ESTABLISHED 0
-#define RANAL_CONN_CLOSING 1
-#define RANAL_CONN_CLOSED 2
-
-typedef struct kra_peer
-{
- struct list_head rap_list; /* stash on global peer list */
- struct list_head rap_connd_list; /* schedule on kra_connd_peers */
- struct list_head rap_conns; /* all active connections */
- struct list_head rap_tx_queue; /* msgs waiting for a conn */
- lnet_nid_t rap_nid; /* who's on the other end(s) */
- __u32 rap_ip; /* IP address of peer */
- int rap_port; /* port on which peer listens */
- atomic_t rap_refcount; /* # users */
- int rap_persistence; /* "known" peer refs */
- int rap_connecting; /* connection forming */
- unsigned long rap_reconnect_time; /* CURRENT_SECONDS when reconnect OK */
- unsigned long rap_reconnect_interval; /* exponential backoff */
-} kra_peer_t;
-
-extern kra_data_t kranal_data;
-extern kra_tunables_t kranal_tunables;
-
-extern void kranal_destroy_peer(kra_peer_t *peer);
-extern void kranal_destroy_conn(kra_conn_t *conn);
-
-static inline void
-kranal_peer_addref(kra_peer_t *peer)
-{
- CDEBUG(D_NET, "%p->%s\n", peer, libcfs_nid2str(peer->rap_nid));
- LASSERT(atomic_read(&peer->rap_refcount) > 0);
- atomic_inc(&peer->rap_refcount);
-}
-
-static inline void
-kranal_peer_decref(kra_peer_t *peer)
-{
- CDEBUG(D_NET, "%p->%s\n", peer, libcfs_nid2str(peer->rap_nid));
- LASSERT(atomic_read(&peer->rap_refcount) > 0);
- if (atomic_dec_and_test(&peer->rap_refcount))
- kranal_destroy_peer(peer);
-}
-
-static inline struct list_head *
-kranal_nid2peerlist (lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % kranal_data.kra_peer_hash_size;
-
- return (&kranal_data.kra_peers[hash]);
-}
-
-static inline int
-kranal_peer_active(kra_peer_t *peer)
-{
- /* Am I in the peer hash table? */
- return (!list_empty(&peer->rap_list));
-}
-
-static inline void
-kranal_conn_addref(kra_conn_t *conn)
-{
- CDEBUG(D_NET, "%p->%s\n", conn,
- libcfs_nid2str(conn->rac_peer->rap_nid));
- LASSERT(atomic_read(&conn->rac_refcount) > 0);
- atomic_inc(&conn->rac_refcount);
-}
-
-static inline void
-kranal_conn_decref(kra_conn_t *conn)
-{
- CDEBUG(D_NET, "%p->%s\n", conn,
- libcfs_nid2str(conn->rac_peer->rap_nid));
- LASSERT(atomic_read(&conn->rac_refcount) > 0);
- if (atomic_dec_and_test(&conn->rac_refcount))
- kranal_destroy_conn(conn);
-}
-
-static inline struct list_head *
-kranal_cqid2connlist (__u32 cqid)
-{
- unsigned int hash = cqid % kranal_data.kra_conn_hash_size;
-
- return (&kranal_data.kra_conns [hash]);
-}
-
-static inline kra_conn_t *
-kranal_cqid2conn_locked (__u32 cqid)
-{
- struct list_head *conns = kranal_cqid2connlist(cqid);
- struct list_head *tmp;
- kra_conn_t *conn;
-
- list_for_each(tmp, conns) {
- conn = list_entry(tmp, kra_conn_t, rac_hashlist);
-
- if (conn->rac_cqid == cqid)
- return conn;
- }
-
- return NULL;
-}
-
-static inline int
-kranal_tx_mapped (kra_tx_t *tx)
-{
- return (tx->tx_buftype == RANAL_BUF_VIRT_MAPPED ||
- tx->tx_buftype == RANAL_BUF_PHYS_MAPPED);
-}
-
-int kranal_startup (lnet_ni_t *ni);
-void kranal_shutdown (lnet_ni_t *ni);
-int kranal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int kranal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kranal_eager_recv(lnet_ni_t *ni, void *private,
- lnet_msg_t *lntmsg, void **new_private);
-int kranal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-int kranal_accept(lnet_ni_t *ni, struct socket *sock);
-
-extern void kranal_free_acceptsock (kra_acceptsock_t *ras);
-extern int kranal_listener_procint (cfs_sysctl_table_t *table,
- int write, struct file *filp,
- void *buffer, size_t *lenp);
-extern void kranal_update_reaper_timeout (long timeout);
-extern void kranal_tx_done (kra_tx_t *tx, int completion);
-extern void kranal_unlink_peer_locked (kra_peer_t *peer);
-extern void kranal_schedule_conn (kra_conn_t *conn);
-extern int kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid);
-extern int kranal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port);
-extern kra_peer_t *kranal_find_peer_locked (lnet_nid_t nid);
-extern void kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx);
-extern int kranal_del_peer (lnet_nid_t nid);
-extern void kranal_device_callback (RAP_INT32 devid, RAP_PVOID arg);
-extern int kranal_thread_start (int(*fn)(void *arg), void *arg);
-extern int kranal_connd (void *arg);
-extern int kranal_reaper (void *arg);
-extern int kranal_scheduler (void *arg);
-extern void kranal_close_conn_locked (kra_conn_t *conn, int error);
-extern void kranal_close_conn (kra_conn_t *conn, int error);
-extern void kranal_terminate_conn_locked (kra_conn_t *conn);
-extern void kranal_connect (kra_peer_t *peer);
-extern int kranal_conn_handshake (struct socket *sock, kra_peer_t *peer);
-extern int kranal_tunables_init(void);
-extern void kranal_tunables_fini(void);
-extern void kranal_init_msg(kra_msg_t *msg, int type);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "ralnd.h"
-
-void
-kranal_device_callback(RAP_INT32 devid, RAP_PVOID arg)
-{
- kra_device_t *dev;
- int i;
- unsigned long flags;
-
- CDEBUG(D_NET, "callback for device %d\n", devid);
-
- for (i = 0; i < kranal_data.kra_ndevs; i++) {
-
- dev = &kranal_data.kra_devices[i];
- if (dev->rad_id != devid)
- continue;
-
- spin_lock_irqsave(&dev->rad_lock, flags);
-
- if (!dev->rad_ready) {
- dev->rad_ready = 1;
- wake_up(&dev->rad_waitq);
- }
-
- spin_unlock_irqrestore(&dev->rad_lock, flags);
- return;
- }
-
- CWARN("callback for unknown device %d\n", devid);
-}
-
-void
-kranal_schedule_conn(kra_conn_t *conn)
-{
- kra_device_t *dev = conn->rac_device;
- unsigned long flags;
-
- spin_lock_irqsave(&dev->rad_lock, flags);
-
- if (!conn->rac_scheduled) {
- kranal_conn_addref(conn); /* +1 ref for scheduler */
- conn->rac_scheduled = 1;
- list_add_tail(&conn->rac_schedlist, &dev->rad_ready_conns);
- wake_up(&dev->rad_waitq);
- }
-
- spin_unlock_irqrestore(&dev->rad_lock, flags);
-}
-
-kra_tx_t *
-kranal_get_idle_tx (void)
-{
- unsigned long flags;
- kra_tx_t *tx;
-
- spin_lock_irqsave(&kranal_data.kra_tx_lock, flags);
-
- if (list_empty(&kranal_data.kra_idle_txs)) {
- spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
- return NULL;
- }
-
- tx = list_entry(kranal_data.kra_idle_txs.next, kra_tx_t, tx_list);
- list_del(&tx->tx_list);
-
- /* Allocate a new completion cookie. It might not be needed, but we've
- * got a lock right now... */
- tx->tx_cookie = kranal_data.kra_next_tx_cookie++;
-
- spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
-
- LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
- LASSERT (tx->tx_msg.ram_type == RANAL_MSG_NONE);
- LASSERT (tx->tx_conn == NULL);
- LASSERT (tx->tx_lntmsg[0] == NULL);
- LASSERT (tx->tx_lntmsg[1] == NULL);
-
- return tx;
-}
-
-void
-kranal_init_msg(kra_msg_t *msg, int type)
-{
- msg->ram_magic = RANAL_MSG_MAGIC;
- msg->ram_version = RANAL_MSG_VERSION;
- msg->ram_type = type;
- msg->ram_srcnid = kranal_data.kra_ni->ni_nid;
- /* ram_connstamp gets set when FMA is sent */
-}
-
-kra_tx_t *
-kranal_new_tx_msg (int type)
-{
- kra_tx_t *tx = kranal_get_idle_tx();
-
- if (tx != NULL)
- kranal_init_msg(&tx->tx_msg, type);
-
- return tx;
-}
-
-int
-kranal_setup_immediate_buffer (kra_tx_t *tx,
- unsigned int niov, struct iovec *iov,
- int offset, int nob)
-
-{
- /* For now this is almost identical to kranal_setup_virt_buffer, but we
- * could "flatten" the payload into a single contiguous buffer ready
- * for sending direct over an FMA if we ever needed to. */
-
- LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
- LASSERT (nob >= 0);
-
- if (nob == 0) {
- tx->tx_buffer = NULL;
- } else {
- LASSERT (niov > 0);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- if (nob > iov->iov_len - offset) {
- CERROR("Can't handle multiple vaddr fragments\n");
- return -EMSGSIZE;
- }
-
- tx->tx_buffer = (void *)(((unsigned long)iov->iov_base) + offset);
- }
-
- tx->tx_buftype = RANAL_BUF_IMMEDIATE;
- tx->tx_nob = nob;
- return 0;
-}
-
-int
-kranal_setup_virt_buffer (kra_tx_t *tx,
- unsigned int niov, struct iovec *iov,
- int offset, int nob)
-
-{
- LASSERT (nob > 0);
- LASSERT (niov > 0);
- LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- if (nob > iov->iov_len - offset) {
- CERROR("Can't handle multiple vaddr fragments\n");
- return -EMSGSIZE;
- }
-
- tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED;
- tx->tx_nob = nob;
- tx->tx_buffer = (void *)(((unsigned long)iov->iov_base) + offset);
- return 0;
-}
-
-int
-kranal_setup_phys_buffer (kra_tx_t *tx, int nkiov, lnet_kiov_t *kiov,
- int offset, int nob)
-{
- RAP_PHYS_REGION *phys = tx->tx_phys;
- int resid;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT (nob > 0);
- LASSERT (nkiov > 0);
- LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
-
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- nkiov--;
- kiov++;
- LASSERT (nkiov > 0);
- }
-
- tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED;
- tx->tx_nob = nob;
- tx->tx_buffer = (void *)((unsigned long)(kiov->kiov_offset + offset));
-
- phys->Address = lnet_page2phys(kiov->kiov_page);
- phys++;
-
- resid = nob - (kiov->kiov_len - offset);
- while (resid > 0) {
- kiov++;
- nkiov--;
- LASSERT (nkiov > 0);
-
- if (kiov->kiov_offset != 0 ||
- ((resid > PAGE_SIZE) &&
- kiov->kiov_len < PAGE_SIZE)) {
- /* Can't have gaps */
- CERROR("Can't make payload contiguous in I/O VM:"
- "page %d, offset %d, len %d \n",
- (int)(phys - tx->tx_phys),
- kiov->kiov_offset, kiov->kiov_len);
- return -EINVAL;
- }
-
- if ((phys - tx->tx_phys) == LNET_MAX_IOV) {
- CERROR ("payload too big (%d)\n", (int)(phys - tx->tx_phys));
- return -EMSGSIZE;
- }
-
- phys->Address = lnet_page2phys(kiov->kiov_page);
- phys++;
-
- resid -= PAGE_SIZE;
- }
-
- tx->tx_phys_npages = phys - tx->tx_phys;
- return 0;
-}
-
-static inline int
-kranal_setup_rdma_buffer (kra_tx_t *tx, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- int offset, int nob)
-{
- LASSERT ((iov == NULL) != (kiov == NULL));
-
- if (kiov != NULL)
- return kranal_setup_phys_buffer(tx, niov, kiov, offset, nob);
-
- return kranal_setup_virt_buffer(tx, niov, iov, offset, nob);
-}
-
-int
-kranal_map_buffer (kra_tx_t *tx)
-{
- kra_conn_t *conn = tx->tx_conn;
- kra_device_t *dev = conn->rac_device;
- RAP_RETURN rrc;
-
- LASSERT (current == dev->rad_scheduler);
-
- switch (tx->tx_buftype) {
- default:
- LBUG();
-
- case RANAL_BUF_NONE:
- case RANAL_BUF_IMMEDIATE:
- case RANAL_BUF_PHYS_MAPPED:
- case RANAL_BUF_VIRT_MAPPED:
- return 0;
-
- case RANAL_BUF_PHYS_UNMAPPED:
- rrc = RapkRegisterPhys(dev->rad_handle,
- tx->tx_phys, tx->tx_phys_npages,
- &tx->tx_map_key);
- if (rrc != RAP_SUCCESS) {
- CERROR ("Can't map %d pages: dev %d "
- "phys %u pp %u, virt %u nob %lu\n",
- tx->tx_phys_npages, dev->rad_id,
- dev->rad_nphysmap, dev->rad_nppphysmap,
- dev->rad_nvirtmap, dev->rad_nobvirtmap);
- return -ENOMEM; /* assume insufficient resources */
- }
-
- dev->rad_nphysmap++;
- dev->rad_nppphysmap += tx->tx_phys_npages;
-
- tx->tx_buftype = RANAL_BUF_PHYS_MAPPED;
- return 0;
-
- case RANAL_BUF_VIRT_UNMAPPED:
- rrc = RapkRegisterMemory(dev->rad_handle,
- tx->tx_buffer, tx->tx_nob,
- &tx->tx_map_key);
- if (rrc != RAP_SUCCESS) {
- CERROR ("Can't map %d bytes: dev %d "
- "phys %u pp %u, virt %u nob %lu\n",
- tx->tx_nob, dev->rad_id,
- dev->rad_nphysmap, dev->rad_nppphysmap,
- dev->rad_nvirtmap, dev->rad_nobvirtmap);
- return -ENOMEM; /* assume insufficient resources */
- }
-
- dev->rad_nvirtmap++;
- dev->rad_nobvirtmap += tx->tx_nob;
-
- tx->tx_buftype = RANAL_BUF_VIRT_MAPPED;
- return 0;
- }
-}
-
-void
-kranal_unmap_buffer (kra_tx_t *tx)
-{
- kra_device_t *dev;
- RAP_RETURN rrc;
-
- switch (tx->tx_buftype) {
- default:
- LBUG();
-
- case RANAL_BUF_NONE:
- case RANAL_BUF_IMMEDIATE:
- case RANAL_BUF_PHYS_UNMAPPED:
- case RANAL_BUF_VIRT_UNMAPPED:
- break;
-
- case RANAL_BUF_PHYS_MAPPED:
- LASSERT (tx->tx_conn != NULL);
- dev = tx->tx_conn->rac_device;
- LASSERT (current == dev->rad_scheduler);
- rrc = RapkDeregisterMemory(dev->rad_handle, NULL,
- &tx->tx_map_key);
- LASSERT (rrc == RAP_SUCCESS);
-
- dev->rad_nphysmap--;
- dev->rad_nppphysmap -= tx->tx_phys_npages;
-
- tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED;
- break;
-
- case RANAL_BUF_VIRT_MAPPED:
- LASSERT (tx->tx_conn != NULL);
- dev = tx->tx_conn->rac_device;
- LASSERT (current == dev->rad_scheduler);
- rrc = RapkDeregisterMemory(dev->rad_handle, tx->tx_buffer,
- &tx->tx_map_key);
- LASSERT (rrc == RAP_SUCCESS);
-
- dev->rad_nvirtmap--;
- dev->rad_nobvirtmap -= tx->tx_nob;
-
- tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED;
- break;
- }
-}
-
-void
-kranal_tx_done (kra_tx_t *tx, int completion)
-{
- lnet_msg_t *lnetmsg[2];
- unsigned long flags;
- int i;
-
- LASSERT (!in_interrupt());
-
- kranal_unmap_buffer(tx);
-
- lnetmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
- lnetmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
-
- tx->tx_buftype = RANAL_BUF_NONE;
- tx->tx_msg.ram_type = RANAL_MSG_NONE;
- tx->tx_conn = NULL;
-
- spin_lock_irqsave(&kranal_data.kra_tx_lock, flags);
-
- list_add_tail(&tx->tx_list, &kranal_data.kra_idle_txs);
-
- spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
-
- /* finalize AFTER freeing lnet msgs */
- for (i = 0; i < 2; i++) {
- if (lnetmsg[i] == NULL)
- continue;
-
- lnet_finalize(kranal_data.kra_ni, lnetmsg[i], completion);
- }
-}
-
-kra_conn_t *
-kranal_find_conn_locked (kra_peer_t *peer)
-{
- struct list_head *tmp;
-
- /* just return the first connection */
- list_for_each (tmp, &peer->rap_conns) {
- return list_entry(tmp, kra_conn_t, rac_list);
- }
-
- return NULL;
-}
-
-void
-kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx)
-{
- unsigned long flags;
-
- tx->tx_conn = conn;
-
- spin_lock_irqsave(&conn->rac_lock, flags);
- list_add_tail(&tx->tx_list, &conn->rac_fmaq);
- tx->tx_qtime = jiffies;
- spin_unlock_irqrestore(&conn->rac_lock, flags);
-
- kranal_schedule_conn(conn);
-}
-
-void
-kranal_launch_tx (kra_tx_t *tx, lnet_nid_t nid)
-{
- unsigned long flags;
- kra_peer_t *peer;
- kra_conn_t *conn;
- int rc;
- int retry;
- rwlock_t *g_lock = &kranal_data.kra_global_lock;
-
- /* If I get here, I've committed to send, so I complete the tx with
- * failure on any problems */
-
- LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */
-
- for (retry = 0; ; retry = 1) {
-
- read_lock(g_lock);
-
- peer = kranal_find_peer_locked(nid);
- if (peer != NULL) {
- conn = kranal_find_conn_locked(peer);
- if (conn != NULL) {
- kranal_post_fma(conn, tx);
- read_unlock(g_lock);
- return;
- }
- }
-
- /* Making connections; I'll need a write lock... */
- read_unlock(g_lock);
- write_lock_irqsave(g_lock, flags);
-
- peer = kranal_find_peer_locked(nid);
- if (peer != NULL)
- break;
-
- write_unlock_irqrestore(g_lock, flags);
-
- if (retry) {
- CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
- kranal_tx_done(tx, -EHOSTUNREACH);
- return;
- }
-
- rc = kranal_add_persistent_peer(nid, LNET_NIDADDR(nid),
- lnet_acceptor_port());
- if (rc != 0) {
- CERROR("Can't add peer %s: %d\n",
- libcfs_nid2str(nid), rc);
- kranal_tx_done(tx, rc);
- return;
- }
- }
-
- conn = kranal_find_conn_locked(peer);
- if (conn != NULL) {
- /* Connection exists; queue message on it */
- kranal_post_fma(conn, tx);
- write_unlock_irqrestore(g_lock, flags);
- return;
- }
-
- LASSERT (peer->rap_persistence > 0);
-
- if (!peer->rap_connecting) {
- LASSERT (list_empty(&peer->rap_tx_queue));
-
- if (!(peer->rap_reconnect_interval == 0 || /* first attempt */
- time_after_eq(jiffies, peer->rap_reconnect_time))) {
- write_unlock_irqrestore(g_lock, flags);
- kranal_tx_done(tx, -EHOSTUNREACH);
- return;
- }
-
- peer->rap_connecting = 1;
- kranal_peer_addref(peer); /* extra ref for connd */
-
- spin_lock(&kranal_data.kra_connd_lock);
-
- list_add_tail(&peer->rap_connd_list,
- &kranal_data.kra_connd_peers);
- wake_up(&kranal_data.kra_connd_waitq);
-
- spin_unlock(&kranal_data.kra_connd_lock);
- }
-
- /* A connection is being established; queue the message... */
- list_add_tail(&tx->tx_list, &peer->rap_tx_queue);
-
- write_unlock_irqrestore(g_lock, flags);
-}
-
-void
-kranal_rdma(kra_tx_t *tx, int type,
- kra_rdma_desc_t *sink, int nob, __u64 cookie)
-{
- kra_conn_t *conn = tx->tx_conn;
- RAP_RETURN rrc;
- unsigned long flags;
-
- LASSERT (kranal_tx_mapped(tx));
- LASSERT (nob <= sink->rard_nob);
- LASSERT (nob <= tx->tx_nob);
-
- /* No actual race with scheduler sending CLOSE (I'm she!) */
- LASSERT (current == conn->rac_device->rad_scheduler);
-
- memset(&tx->tx_rdma_desc, 0, sizeof(tx->tx_rdma_desc));
- tx->tx_rdma_desc.SrcPtr.AddressBits = (__u64)((unsigned long)tx->tx_buffer);
- tx->tx_rdma_desc.SrcKey = tx->tx_map_key;
- tx->tx_rdma_desc.DstPtr = sink->rard_addr;
- tx->tx_rdma_desc.DstKey = sink->rard_key;
- tx->tx_rdma_desc.Length = nob;
- tx->tx_rdma_desc.AppPtr = tx;
-
- /* prep final completion message */
- kranal_init_msg(&tx->tx_msg, type);
- tx->tx_msg.ram_u.completion.racm_cookie = cookie;
-
- if (nob == 0) { /* Immediate completion */
- kranal_post_fma(conn, tx);
- return;
- }
-
- LASSERT (!conn->rac_close_sent); /* Don't lie (CLOSE == RDMA idle) */
-
- rrc = RapkPostRdma(conn->rac_rihandle, &tx->tx_rdma_desc);
- LASSERT (rrc == RAP_SUCCESS);
-
- spin_lock_irqsave(&conn->rac_lock, flags);
- list_add_tail(&tx->tx_list, &conn->rac_rdmaq);
- tx->tx_qtime = jiffies;
- spin_unlock_irqrestore(&conn->rac_lock, flags);
-}
-
-int
-kranal_consume_rxmsg (kra_conn_t *conn, void *buffer, int nob)
-{
- __u32 nob_received = nob;
- RAP_RETURN rrc;
-
- LASSERT (conn->rac_rxmsg != NULL);
- CDEBUG(D_NET, "Consuming %p\n", conn);
-
- rrc = RapkFmaCopyOut(conn->rac_rihandle, buffer,
- &nob_received, sizeof(kra_msg_t));
- LASSERT (rrc == RAP_SUCCESS);
-
- conn->rac_rxmsg = NULL;
-
- if (nob_received < nob) {
- CWARN("Incomplete immediate msg from %s: expected %d, got %d\n",
- libcfs_nid2str(conn->rac_peer->rap_nid),
- nob, nob_received);
- return -EPROTO;
- }
-
- return 0;
-}
-
-int
-kranal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- lnet_hdr_t *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int niov = lntmsg->msg_niov;
- struct iovec *iov = lntmsg->msg_iov;
- lnet_kiov_t *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- kra_tx_t *tx;
- int rc;
-
- /* NB 'private' is different depending on what we're sending.... */
-
- CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
- nob, niov, libcfs_id2str(target));
-
- LASSERT (nob == 0 || niov > 0);
- LASSERT (niov <= LNET_MAX_IOV);
-
- LASSERT (!in_interrupt());
- /* payload is either all vaddrs or all pages */
- LASSERT (!(kiov != NULL && iov != NULL));
-
- if (routing) {
- CERROR ("Can't route\n");
- return -EIO;
- }
-
- switch(type) {
- default:
- LBUG();
-
- case LNET_MSG_ACK:
- LASSERT (nob == 0);
- break;
-
- case LNET_MSG_GET:
- LASSERT (niov == 0);
- LASSERT (nob == 0);
- /* We have to consider the eventual sink buffer rather than any
- * payload passed here (there isn't any, and strictly, looking
- * inside lntmsg is a layering violation). We send a simple
- * IMMEDIATE GET if the sink buffer is mapped already and small
- * enough for FMA */
-
- if (routing || target_is_router)
- break; /* send IMMEDIATE */
-
- if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0 &&
- lntmsg->msg_md->md_length <= RANAL_FMA_MAX_DATA &&
- lntmsg->msg_md->md_length <= *kranal_tunables.kra_max_immediate)
- break; /* send IMMEDIATE */
-
- tx = kranal_new_tx_msg(RANAL_MSG_GET_REQ);
- if (tx == NULL)
- return -ENOMEM;
-
- if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
- rc = kranal_setup_virt_buffer(tx, lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.iov,
- 0, lntmsg->msg_md->md_length);
- else
- rc = kranal_setup_phys_buffer(tx, lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.kiov,
- 0, lntmsg->msg_md->md_length);
- if (rc != 0) {
- kranal_tx_done(tx, rc);
- return -EIO;
- }
-
- tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg);
- if (tx->tx_lntmsg[1] == NULL) {
- CERROR("Can't create reply for GET to %s\n",
- libcfs_nid2str(target.nid));
- kranal_tx_done(tx, rc);
- return -EIO;
- }
-
- tx->tx_lntmsg[0] = lntmsg;
- tx->tx_msg.ram_u.get.ragm_hdr = *hdr;
- /* rest of tx_msg is setup just before it is sent */
- kranal_launch_tx(tx, target.nid);
- return 0;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- if (kiov == NULL && /* not paged */
- nob <= RANAL_FMA_MAX_DATA && /* small enough */
- nob <= *kranal_tunables.kra_max_immediate)
- break; /* send IMMEDIATE */
-
- tx = kranal_new_tx_msg(RANAL_MSG_PUT_REQ);
- if (tx == NULL)
- return -ENOMEM;
-
- rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, nob);
- if (rc != 0) {
- kranal_tx_done(tx, rc);
- return -EIO;
- }
-
- tx->tx_lntmsg[0] = lntmsg;
- tx->tx_msg.ram_u.putreq.raprm_hdr = *hdr;
- /* rest of tx_msg is setup just before it is sent */
- kranal_launch_tx(tx, target.nid);
- return 0;
- }
-
- /* send IMMEDIATE */
-
- LASSERT (kiov == NULL);
- LASSERT (nob <= RANAL_FMA_MAX_DATA);
-
- tx = kranal_new_tx_msg(RANAL_MSG_IMMEDIATE);
- if (tx == NULL)
- return -ENOMEM;
-
- rc = kranal_setup_immediate_buffer(tx, niov, iov, offset, nob);
- if (rc != 0) {
- kranal_tx_done(tx, rc);
- return -EIO;
- }
-
- tx->tx_msg.ram_u.immediate.raim_hdr = *hdr;
- tx->tx_lntmsg[0] = lntmsg;
- kranal_launch_tx(tx, target.nid);
- return 0;
-}
-
-void
-kranal_reply(lnet_ni_t *ni, kra_conn_t *conn, lnet_msg_t *lntmsg)
-{
- kra_msg_t *rxmsg = conn->rac_rxmsg;
- unsigned int niov = lntmsg->msg_niov;
- struct iovec *iov = lntmsg->msg_iov;
- lnet_kiov_t *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- kra_tx_t *tx;
- int rc;
-
- tx = kranal_get_idle_tx();
- if (tx == NULL)
- goto failed_0;
-
- rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, nob);
- if (rc != 0)
- goto failed_1;
-
- tx->tx_conn = conn;
-
- rc = kranal_map_buffer(tx);
- if (rc != 0)
- goto failed_1;
-
- tx->tx_lntmsg[0] = lntmsg;
-
- kranal_rdma(tx, RANAL_MSG_GET_DONE,
- &rxmsg->ram_u.get.ragm_desc, nob,
- rxmsg->ram_u.get.ragm_cookie);
- return;
-
- failed_1:
- kranal_tx_done(tx, -EIO);
- failed_0:
- lnet_finalize(ni, lntmsg, -EIO);
-}
-
-int
-kranal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- void **new_private)
-{
- kra_conn_t *conn = (kra_conn_t *)private;
-
- LCONSOLE_ERROR_MSG(0x12b, "Dropping message from %s: no buffers free.\n",
- libcfs_nid2str(conn->rac_peer->rap_nid));
-
- return -EDEADLK;
-}
-
-int
-kranal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- kra_conn_t *conn = private;
- kra_msg_t *rxmsg = conn->rac_rxmsg;
- kra_tx_t *tx;
- void *buffer;
- int rc;
-
- LASSERT (mlen <= rlen);
- LASSERT (!in_interrupt());
- /* Either all pages or all vaddrs */
- LASSERT (!(kiov != NULL && iov != NULL));
-
- CDEBUG(D_NET, "conn %p, rxmsg %p, lntmsg %p\n", conn, rxmsg, lntmsg);
-
- switch(rxmsg->ram_type) {
- default:
- LBUG();
-
- case RANAL_MSG_IMMEDIATE:
- if (mlen == 0) {
- buffer = NULL;
- } else if (kiov != NULL) {
- CERROR("Can't recv immediate into paged buffer\n");
- return -EIO;
- } else {
- LASSERT (niov > 0);
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- iov++;
- niov--;
- LASSERT (niov > 0);
- }
- if (mlen > iov->iov_len - offset) {
- CERROR("Can't handle immediate frags\n");
- return -EIO;
- }
- buffer = ((char *)iov->iov_base) + offset;
- }
- rc = kranal_consume_rxmsg(conn, buffer, mlen);
- lnet_finalize(ni, lntmsg, (rc == 0) ? 0 : -EIO);
- return 0;
-
- case RANAL_MSG_PUT_REQ:
- tx = kranal_new_tx_msg(RANAL_MSG_PUT_ACK);
- if (tx == NULL) {
- kranal_consume_rxmsg(conn, NULL, 0);
- return -ENOMEM;
- }
-
- rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, mlen);
- if (rc != 0) {
- kranal_tx_done(tx, rc);
- kranal_consume_rxmsg(conn, NULL, 0);
- return -EIO;
- }
-
- tx->tx_conn = conn;
- rc = kranal_map_buffer(tx);
- if (rc != 0) {
- kranal_tx_done(tx, rc);
- kranal_consume_rxmsg(conn, NULL, 0);
- return -EIO;
- }
-
- tx->tx_msg.ram_u.putack.rapam_src_cookie =
- conn->rac_rxmsg->ram_u.putreq.raprm_cookie;
- tx->tx_msg.ram_u.putack.rapam_dst_cookie = tx->tx_cookie;
- tx->tx_msg.ram_u.putack.rapam_desc.rard_key = tx->tx_map_key;
- tx->tx_msg.ram_u.putack.rapam_desc.rard_addr.AddressBits =
- (__u64)((unsigned long)tx->tx_buffer);
- tx->tx_msg.ram_u.putack.rapam_desc.rard_nob = mlen;
-
- tx->tx_lntmsg[0] = lntmsg; /* finalize this on RDMA_DONE */
-
- kranal_post_fma(conn, tx);
- kranal_consume_rxmsg(conn, NULL, 0);
- return 0;
-
- case RANAL_MSG_GET_REQ:
- if (lntmsg != NULL) {
- /* Matched! */
- kranal_reply(ni, conn, lntmsg);
- } else {
- /* No match */
- tx = kranal_new_tx_msg(RANAL_MSG_GET_NAK);
- if (tx != NULL) {
- tx->tx_msg.ram_u.completion.racm_cookie =
- rxmsg->ram_u.get.ragm_cookie;
- kranal_post_fma(conn, tx);
- }
- }
- kranal_consume_rxmsg(conn, NULL, 0);
- return 0;
- }
-}
-
-int
-kranal_thread_start (int(*fn)(void *arg), void *arg)
-{
- long pid = kernel_thread(fn, arg, 0);
-
- if (pid < 0)
- return(int)pid;
-
- atomic_inc(&kranal_data.kra_nthreads);
- return 0;
-}
-
-void
-kranal_thread_fini (void)
-{
- atomic_dec(&kranal_data.kra_nthreads);
-}
-
-int
-kranal_check_conn_timeouts (kra_conn_t *conn)
-{
- kra_tx_t *tx;
- struct list_head *ttmp;
- unsigned long flags;
- long timeout;
- unsigned long now = jiffies;
-
- LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED ||
- conn->rac_state == RANAL_CONN_CLOSING);
-
- if (!conn->rac_close_sent &&
- time_after_eq(now, conn->rac_last_tx + conn->rac_keepalive * HZ)) {
- /* not sent in a while; schedule conn so scheduler sends a keepalive */
- CDEBUG(D_NET, "Scheduling keepalive %p->%s\n",
- conn, libcfs_nid2str(conn->rac_peer->rap_nid));
- kranal_schedule_conn(conn);
- }
-
- timeout = conn->rac_timeout * HZ;
-
- if (!conn->rac_close_recvd &&
- time_after_eq(now, conn->rac_last_rx + timeout)) {
- CERROR("%s received from %s within %lu seconds\n",
- (conn->rac_state == RANAL_CONN_ESTABLISHED) ?
- "Nothing" : "CLOSE not",
- libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - conn->rac_last_rx)/HZ);
- return -ETIMEDOUT;
- }
-
- if (conn->rac_state != RANAL_CONN_ESTABLISHED)
- return 0;
-
- /* Check the conn's queues are moving. These are "belt+braces" checks,
- * in case of hardware/software errors that make this conn seem
- * responsive even though it isn't progressing its message queues. */
-
- spin_lock_irqsave(&conn->rac_lock, flags);
-
- list_for_each (ttmp, &conn->rac_fmaq) {
- tx = list_entry(ttmp, kra_tx_t, tx_list);
-
- if (time_after_eq(now, tx->tx_qtime + timeout)) {
- spin_unlock_irqrestore(&conn->rac_lock, flags);
- CERROR("tx on fmaq for %s blocked %lu seconds\n",
- libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - tx->tx_qtime)/HZ);
- return -ETIMEDOUT;
- }
- }
-
- list_for_each (ttmp, &conn->rac_rdmaq) {
- tx = list_entry(ttmp, kra_tx_t, tx_list);
-
- if (time_after_eq(now, tx->tx_qtime + timeout)) {
- spin_unlock_irqrestore(&conn->rac_lock, flags);
- CERROR("tx on rdmaq for %s blocked %lu seconds\n",
- libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - tx->tx_qtime)/HZ);
- return -ETIMEDOUT;
- }
- }
-
- list_for_each (ttmp, &conn->rac_replyq) {
- tx = list_entry(ttmp, kra_tx_t, tx_list);
-
- if (time_after_eq(now, tx->tx_qtime + timeout)) {
- spin_unlock_irqrestore(&conn->rac_lock, flags);
- CERROR("tx on replyq for %s blocked %lu seconds\n",
- libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - tx->tx_qtime)/HZ);
- return -ETIMEDOUT;
- }
- }
-
- spin_unlock_irqrestore(&conn->rac_lock, flags);
- return 0;
-}
-
-void
-kranal_reaper_check (int idx, unsigned long *min_timeoutp)
-{
- struct list_head *conns = &kranal_data.kra_conns[idx];
- struct list_head *ctmp;
- kra_conn_t *conn;
- unsigned long flags;
- int rc;
-
- again:
- /* NB. We expect to check all the conns and not find any problems, so
- * we just use a shared lock while we take a look... */
- read_lock(&kranal_data.kra_global_lock);
-
- list_for_each (ctmp, conns) {
- conn = list_entry(ctmp, kra_conn_t, rac_hashlist);
-
- if (conn->rac_timeout < *min_timeoutp )
- *min_timeoutp = conn->rac_timeout;
- if (conn->rac_keepalive < *min_timeoutp )
- *min_timeoutp = conn->rac_keepalive;
-
- rc = kranal_check_conn_timeouts(conn);
- if (rc == 0)
- continue;
-
- kranal_conn_addref(conn);
- read_unlock(&kranal_data.kra_global_lock);
-
- CERROR("Conn to %s, cqid %d timed out\n",
- libcfs_nid2str(conn->rac_peer->rap_nid),
- conn->rac_cqid);
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- switch (conn->rac_state) {
- default:
- LBUG();
-
- case RANAL_CONN_ESTABLISHED:
- kranal_close_conn_locked(conn, -ETIMEDOUT);
- break;
-
- case RANAL_CONN_CLOSING:
- kranal_terminate_conn_locked(conn);
- break;
- }
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
- kranal_conn_decref(conn);
-
- /* start again now I've dropped the lock */
- goto again;
- }
-
- read_unlock(&kranal_data.kra_global_lock);
-}
-
-int
-kranal_connd (void *arg)
-{
- long id = (long)arg;
- char name[16];
- wait_queue_t wait;
- unsigned long flags;
- kra_peer_t *peer;
- kra_acceptsock_t *ras;
- int did_something;
-
- snprintf(name, sizeof(name), "kranal_connd_%02ld", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- init_waitqueue_entry(&wait, current);
-
- spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-
- while (!kranal_data.kra_shutdown) {
- did_something = 0;
-
- if (!list_empty(&kranal_data.kra_connd_acceptq)) {
- ras = list_entry(kranal_data.kra_connd_acceptq.next,
- kra_acceptsock_t, ras_list);
- list_del(&ras->ras_list);
-
- spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
- CDEBUG(D_NET,"About to handshake someone\n");
-
- kranal_conn_handshake(ras->ras_sock, NULL);
- kranal_free_acceptsock(ras);
-
- CDEBUG(D_NET,"Finished handshaking someone\n");
-
- spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
- did_something = 1;
- }
-
- if (!list_empty(&kranal_data.kra_connd_peers)) {
- peer = list_entry(kranal_data.kra_connd_peers.next,
- kra_peer_t, rap_connd_list);
-
- list_del_init(&peer->rap_connd_list);
- spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
- kranal_connect(peer);
- kranal_peer_decref(peer);
-
- spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
- did_something = 1;
- }
-
- if (did_something)
- continue;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&kranal_data.kra_connd_waitq, &wait);
-
- spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
- schedule ();
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&kranal_data.kra_connd_waitq, &wait);
-
- spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
- }
-
- spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
- kranal_thread_fini();
- return 0;
-}
-
-void
-kranal_update_reaper_timeout(long timeout)
-{
- unsigned long flags;
-
- LASSERT (timeout > 0);
-
- spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
- if (timeout < kranal_data.kra_new_min_timeout)
- kranal_data.kra_new_min_timeout = timeout;
-
- spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-}
-
-int
-kranal_reaper (void *arg)
-{
- wait_queue_t wait;
- unsigned long flags;
- long timeout;
- int i;
- int conn_entries = kranal_data.kra_conn_hash_size;
- int conn_index = 0;
- int base_index = conn_entries - 1;
- unsigned long next_check_time = jiffies;
- long next_min_timeout = MAX_SCHEDULE_TIMEOUT;
- long current_min_timeout = 1;
-
- cfs_daemonize("kranal_reaper");
- cfs_block_allsigs();
-
- init_waitqueue_entry(&wait, current);
-
- spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
- while (!kranal_data.kra_shutdown) {
- /* I wake up every 'p' seconds to check for timeouts on some
- * more peers. I try to check every connection 'n' times
- * within the global minimum of all keepalive and timeout
- * intervals, to ensure I attend to every connection within
- * (n+1)/n times its timeout intervals. */
- const int p = 1;
- const int n = 3;
- unsigned long min_timeout;
- int chunk;
-
- /* careful with the jiffy wrap... */
- timeout = (long)(next_check_time - jiffies);
- if (timeout > 0) {
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&kranal_data.kra_reaper_waitq, &wait);
-
- spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-
- schedule_timeout(timeout);
-
- spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&kranal_data.kra_reaper_waitq, &wait);
- continue;
- }
-
- if (kranal_data.kra_new_min_timeout != MAX_SCHEDULE_TIMEOUT) {
- /* new min timeout set: restart min timeout scan */
- next_min_timeout = MAX_SCHEDULE_TIMEOUT;
- base_index = conn_index - 1;
- if (base_index < 0)
- base_index = conn_entries - 1;
-
- if (kranal_data.kra_new_min_timeout < current_min_timeout) {
- current_min_timeout = kranal_data.kra_new_min_timeout;
- CDEBUG(D_NET, "Set new min timeout %ld\n",
- current_min_timeout);
- }
-
- kranal_data.kra_new_min_timeout = MAX_SCHEDULE_TIMEOUT;
- }
- min_timeout = current_min_timeout;
-
- spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-
- LASSERT (min_timeout > 0);
-
- /* Compute how many table entries to check now so I get round
- * the whole table fast enough given that I do this at fixed
- * intervals of 'p' seconds) */
- chunk = conn_entries;
- if (min_timeout > n * p)
- chunk = (chunk * n * p) / min_timeout;
- if (chunk == 0)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kranal_reaper_check(conn_index,
- &next_min_timeout);
- conn_index = (conn_index + 1) % conn_entries;
- }
-
- next_check_time += p * HZ;
-
- spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
- if (((conn_index - chunk <= base_index &&
- base_index < conn_index) ||
- (conn_index - conn_entries - chunk <= base_index &&
- base_index < conn_index - conn_entries))) {
-
- /* Scanned all conns: set current_min_timeout... */
- if (current_min_timeout != next_min_timeout) {
- current_min_timeout = next_min_timeout;
- CDEBUG(D_NET, "Set new min timeout %ld\n",
- current_min_timeout);
- }
-
- /* ...and restart min timeout scan */
- next_min_timeout = MAX_SCHEDULE_TIMEOUT;
- base_index = conn_index - 1;
- if (base_index < 0)
- base_index = conn_entries - 1;
- }
- }
-
- kranal_thread_fini();
- return 0;
-}
-
-void
-kranal_check_rdma_cq (kra_device_t *dev)
-{
- kra_conn_t *conn;
- kra_tx_t *tx;
- RAP_RETURN rrc;
- unsigned long flags;
- RAP_RDMA_DESCRIPTOR *desc;
- __u32 cqid;
- __u32 event_type;
-
- for (;;) {
- rrc = RapkCQDone(dev->rad_rdma_cqh, &cqid, &event_type);
- if (rrc == RAP_NOT_DONE) {
- CDEBUG(D_NET, "RDMA CQ %d empty\n", dev->rad_id);
- return;
- }
-
- LASSERT (rrc == RAP_SUCCESS);
- LASSERT ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0);
-
- read_lock(&kranal_data.kra_global_lock);
-
- conn = kranal_cqid2conn_locked(cqid);
- if (conn == NULL) {
- /* Conn was destroyed? */
- CDEBUG(D_NET, "RDMA CQID lookup %d failed\n", cqid);
- read_unlock(&kranal_data.kra_global_lock);
- continue;
- }
-
- rrc = RapkRdmaDone(conn->rac_rihandle, &desc);
- LASSERT (rrc == RAP_SUCCESS);
-
- CDEBUG(D_NET, "Completed %p\n",
- list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list));
-
- spin_lock_irqsave(&conn->rac_lock, flags);
-
- LASSERT (!list_empty(&conn->rac_rdmaq));
- tx = list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list);
- list_del(&tx->tx_list);
-
- LASSERT(desc->AppPtr == (void *)tx);
- LASSERT(tx->tx_msg.ram_type == RANAL_MSG_PUT_DONE ||
- tx->tx_msg.ram_type == RANAL_MSG_GET_DONE);
-
- list_add_tail(&tx->tx_list, &conn->rac_fmaq);
- tx->tx_qtime = jiffies;
-
- spin_unlock_irqrestore(&conn->rac_lock, flags);
-
- /* Get conn's fmaq processed, now I've just put something
- * there */
- kranal_schedule_conn(conn);
-
- read_unlock(&kranal_data.kra_global_lock);
- }
-}
-
-void
-kranal_check_fma_cq (kra_device_t *dev)
-{
- kra_conn_t *conn;
- RAP_RETURN rrc;
- __u32 cqid;
- __u32 event_type;
- struct list_head *conns;
- struct list_head *tmp;
- int i;
-
- for (;;) {
- rrc = RapkCQDone(dev->rad_fma_cqh, &cqid, &event_type);
- if (rrc == RAP_NOT_DONE) {
- CDEBUG(D_NET, "FMA CQ %d empty\n", dev->rad_id);
- return;
- }
-
- LASSERT (rrc == RAP_SUCCESS);
-
- if ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0) {
-
- read_lock(&kranal_data.kra_global_lock);
-
- conn = kranal_cqid2conn_locked(cqid);
- if (conn == NULL) {
- CDEBUG(D_NET, "FMA CQID lookup %d failed\n",
- cqid);
- } else {
- CDEBUG(D_NET, "FMA completed: %p CQID %d\n",
- conn, cqid);
- kranal_schedule_conn(conn);
- }
-
- read_unlock(&kranal_data.kra_global_lock);
- continue;
- }
-
- /* FMA CQ has overflowed: check ALL conns */
- CWARN("FMA CQ overflow: scheduling ALL conns on device %d\n",
- dev->rad_id);
-
- for (i = 0; i < kranal_data.kra_conn_hash_size; i++) {
-
- read_lock(&kranal_data.kra_global_lock);
-
- conns = &kranal_data.kra_conns[i];
-
- list_for_each (tmp, conns) {
- conn = list_entry(tmp, kra_conn_t,
- rac_hashlist);
-
- if (conn->rac_device == dev)
- kranal_schedule_conn(conn);
- }
-
- /* don't block write lockers for too long... */
- read_unlock(&kranal_data.kra_global_lock);
- }
- }
-}
-
-int
-kranal_sendmsg(kra_conn_t *conn, kra_msg_t *msg,
- void *immediate, int immediatenob)
-{
- int sync = (msg->ram_type & RANAL_MSG_FENCE) != 0;
- RAP_RETURN rrc;
-
- CDEBUG(D_NET,"%p sending msg %p %02x%s [%p for %d]\n",
- conn, msg, msg->ram_type, sync ? "(sync)" : "",
- immediate, immediatenob);
-
- LASSERT (sizeof(*msg) <= RANAL_FMA_MAX_PREFIX);
- LASSERT ((msg->ram_type == RANAL_MSG_IMMEDIATE) ?
- immediatenob <= RANAL_FMA_MAX_DATA :
- immediatenob == 0);
-
- msg->ram_connstamp = conn->rac_my_connstamp;
- msg->ram_seq = conn->rac_tx_seq;
-
- if (sync)
- rrc = RapkFmaSyncSend(conn->rac_rihandle,
- immediate, immediatenob,
- msg, sizeof(*msg));
- else
- rrc = RapkFmaSend(conn->rac_rihandle,
- immediate, immediatenob,
- msg, sizeof(*msg));
-
- switch (rrc) {
- default:
- LBUG();
-
- case RAP_SUCCESS:
- conn->rac_last_tx = jiffies;
- conn->rac_tx_seq++;
- return 0;
-
- case RAP_NOT_DONE:
- if (time_after_eq(jiffies,
- conn->rac_last_tx + conn->rac_keepalive*HZ))
- CWARN("EAGAIN sending %02x (idle %lu secs)\n",
- msg->ram_type, (jiffies - conn->rac_last_tx)/HZ);
- return -EAGAIN;
- }
-}
-
-void
-kranal_process_fmaq (kra_conn_t *conn)
-{
- unsigned long flags;
- int more_to_do;
- kra_tx_t *tx;
- int rc;
- int expect_reply;
-
- /* NB 1. kranal_sendmsg() may fail if I'm out of credits right now.
- * However I will be rescheduled by an FMA completion event
- * when I eventually get some.
- * NB 2. Sampling rac_state here races with setting it elsewhere.
- * But it doesn't matter if I try to send a "real" message just
- * as I start closing because I'll get scheduled to send the
- * close anyway. */
-
- /* Not racing with incoming message processing! */
- LASSERT (current == conn->rac_device->rad_scheduler);
-
- if (conn->rac_state != RANAL_CONN_ESTABLISHED) {
- if (!list_empty(&conn->rac_rdmaq)) {
- /* RDMAs in progress */
- LASSERT (!conn->rac_close_sent);
-
- if (time_after_eq(jiffies,
- conn->rac_last_tx +
- conn->rac_keepalive * HZ)) {
- CDEBUG(D_NET, "sending NOOP (rdma in progress)\n");
- kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
- kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
- }
- return;
- }
-
- if (conn->rac_close_sent)
- return;
-
- CWARN("sending CLOSE to %s\n",
- libcfs_nid2str(conn->rac_peer->rap_nid));
- kranal_init_msg(&conn->rac_msg, RANAL_MSG_CLOSE);
- rc = kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
- if (rc != 0)
- return;
-
- conn->rac_close_sent = 1;
- if (!conn->rac_close_recvd)
- return;
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- if (conn->rac_state == RANAL_CONN_CLOSING)
- kranal_terminate_conn_locked(conn);
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
- return;
- }
-
- spin_lock_irqsave(&conn->rac_lock, flags);
-
- if (list_empty(&conn->rac_fmaq)) {
-
- spin_unlock_irqrestore(&conn->rac_lock, flags);
-
- if (time_after_eq(jiffies,
- conn->rac_last_tx + conn->rac_keepalive * HZ)) {
- CDEBUG(D_NET, "sending NOOP -> %s (%p idle %lu(%ld))\n",
- libcfs_nid2str(conn->rac_peer->rap_nid), conn,
- (jiffies - conn->rac_last_tx)/HZ, conn->rac_keepalive);
- kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
- kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
- }
- return;
- }
-
- tx = list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list);
- list_del(&tx->tx_list);
- more_to_do = !list_empty(&conn->rac_fmaq);
-
- spin_unlock_irqrestore(&conn->rac_lock, flags);
-
- expect_reply = 0;
- CDEBUG(D_NET, "sending regular msg: %p, type %02x, cookie "LPX64"\n",
- tx, tx->tx_msg.ram_type, tx->tx_cookie);
- switch (tx->tx_msg.ram_type) {
- default:
- LBUG();
-
- case RANAL_MSG_IMMEDIATE:
- rc = kranal_sendmsg(conn, &tx->tx_msg,
- tx->tx_buffer, tx->tx_nob);
- break;
-
- case RANAL_MSG_PUT_NAK:
- case RANAL_MSG_PUT_DONE:
- case RANAL_MSG_GET_NAK:
- case RANAL_MSG_GET_DONE:
- rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
- break;
-
- case RANAL_MSG_PUT_REQ:
- rc = kranal_map_buffer(tx);
- LASSERT (rc != -EAGAIN);
- if (rc != 0)
- break;
-
- tx->tx_msg.ram_u.putreq.raprm_cookie = tx->tx_cookie;
- rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
- expect_reply = 1;
- break;
-
- case RANAL_MSG_PUT_ACK:
- rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
- expect_reply = 1;
- break;
-
- case RANAL_MSG_GET_REQ:
- rc = kranal_map_buffer(tx);
- LASSERT (rc != -EAGAIN);
- if (rc != 0)
- break;
-
- tx->tx_msg.ram_u.get.ragm_cookie = tx->tx_cookie;
- tx->tx_msg.ram_u.get.ragm_desc.rard_key = tx->tx_map_key;
- tx->tx_msg.ram_u.get.ragm_desc.rard_addr.AddressBits =
- (__u64)((unsigned long)tx->tx_buffer);
- tx->tx_msg.ram_u.get.ragm_desc.rard_nob = tx->tx_nob;
- rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
- expect_reply = 1;
- break;
- }
-
- if (rc == -EAGAIN) {
- /* I need credits to send this. Replace tx at the head of the
- * fmaq and I'll get rescheduled when credits appear */
- CDEBUG(D_NET, "EAGAIN on %p\n", conn);
- spin_lock_irqsave(&conn->rac_lock, flags);
- list_add(&tx->tx_list, &conn->rac_fmaq);
- spin_unlock_irqrestore(&conn->rac_lock, flags);
- return;
- }
-
- if (!expect_reply || rc != 0) {
- kranal_tx_done(tx, rc);
- } else {
- /* LASSERT(current) above ensures this doesn't race with reply
- * processing */
- spin_lock_irqsave(&conn->rac_lock, flags);
- list_add_tail(&tx->tx_list, &conn->rac_replyq);
- tx->tx_qtime = jiffies;
- spin_unlock_irqrestore(&conn->rac_lock, flags);
- }
-
- if (more_to_do) {
- CDEBUG(D_NET, "Rescheduling %p (more to do)\n", conn);
- kranal_schedule_conn(conn);
- }
-}
-
-static inline void
-kranal_swab_rdma_desc (kra_rdma_desc_t *d)
-{
- __swab64s(&d->rard_key.Key);
- __swab16s(&d->rard_key.Cookie);
- __swab16s(&d->rard_key.MdHandle);
- __swab32s(&d->rard_key.Flags);
- __swab64s(&d->rard_addr.AddressBits);
- __swab32s(&d->rard_nob);
-}
-
-kra_tx_t *
-kranal_match_reply(kra_conn_t *conn, int type, __u64 cookie)
-{
- struct list_head *ttmp;
- kra_tx_t *tx;
- unsigned long flags;
-
- spin_lock_irqsave(&conn->rac_lock, flags);
-
- list_for_each(ttmp, &conn->rac_replyq) {
- tx = list_entry(ttmp, kra_tx_t, tx_list);
-
- CDEBUG(D_NET,"Checking %p %02x/"LPX64"\n",
- tx, tx->tx_msg.ram_type, tx->tx_cookie);
-
- if (tx->tx_cookie != cookie)
- continue;
-
- if (tx->tx_msg.ram_type != type) {
- spin_unlock_irqrestore(&conn->rac_lock, flags);
- CWARN("Unexpected type %x (%x expected) "
- "matched reply from %s\n",
- tx->tx_msg.ram_type, type,
- libcfs_nid2str(conn->rac_peer->rap_nid));
- return NULL;
- }
-
- list_del(&tx->tx_list);
- spin_unlock_irqrestore(&conn->rac_lock, flags);
- return tx;
- }
-
- spin_unlock_irqrestore(&conn->rac_lock, flags);
- CWARN("Unmatched reply %02x/"LPX64" from %s\n",
- type, cookie, libcfs_nid2str(conn->rac_peer->rap_nid));
- return NULL;
-}
-
-void
-kranal_check_fma_rx (kra_conn_t *conn)
-{
- unsigned long flags;
- __u32 seq;
- kra_tx_t *tx;
- kra_msg_t *msg;
- void *prefix;
- RAP_RETURN rrc = RapkFmaGetPrefix(conn->rac_rihandle, &prefix);
- kra_peer_t *peer = conn->rac_peer;
- int rc = 0;
- int repost = 1;
-
- if (rrc == RAP_NOT_DONE)
- return;
-
- CDEBUG(D_NET, "RX on %p\n", conn);
-
- LASSERT (rrc == RAP_SUCCESS);
- conn->rac_last_rx = jiffies;
- seq = conn->rac_rx_seq++;
- msg = (kra_msg_t *)prefix;
-
- /* stash message for portals callbacks they'll NULL
- * rac_rxmsg if they consume it */
- LASSERT (conn->rac_rxmsg == NULL);
- conn->rac_rxmsg = msg;
-
- if (msg->ram_magic != RANAL_MSG_MAGIC) {
- if (__swab32(msg->ram_magic) != RANAL_MSG_MAGIC) {
- CERROR("Unexpected magic %08x from %s\n",
- msg->ram_magic, libcfs_nid2str(peer->rap_nid));
- rc = -EPROTO;
- goto out;
- }
-
- __swab32s(&msg->ram_magic);
- __swab16s(&msg->ram_version);
- __swab16s(&msg->ram_type);
- __swab64s(&msg->ram_srcnid);
- __swab64s(&msg->ram_connstamp);
- __swab32s(&msg->ram_seq);
-
- /* NB message type checked below; NOT here... */
- switch (msg->ram_type) {
- case RANAL_MSG_PUT_ACK:
- kranal_swab_rdma_desc(&msg->ram_u.putack.rapam_desc);
- break;
-
- case RANAL_MSG_GET_REQ:
- kranal_swab_rdma_desc(&msg->ram_u.get.ragm_desc);
- break;
-
- default:
- break;
- }
- }
-
- if (msg->ram_version != RANAL_MSG_VERSION) {
- CERROR("Unexpected protocol version %d from %s\n",
- msg->ram_version, libcfs_nid2str(peer->rap_nid));
- rc = -EPROTO;
- goto out;
- }
-
- if (msg->ram_srcnid != peer->rap_nid) {
- CERROR("Unexpected peer %s from %s\n",
- libcfs_nid2str(msg->ram_srcnid),
- libcfs_nid2str(peer->rap_nid));
- rc = -EPROTO;
- goto out;
- }
-
- if (msg->ram_connstamp != conn->rac_peer_connstamp) {
- CERROR("Unexpected connstamp "LPX64"("LPX64
- " expected) from %s\n",
- msg->ram_connstamp, conn->rac_peer_connstamp,
- libcfs_nid2str(peer->rap_nid));
- rc = -EPROTO;
- goto out;
- }
-
- if (msg->ram_seq != seq) {
- CERROR("Unexpected sequence number %d(%d expected) from %s\n",
- msg->ram_seq, seq, libcfs_nid2str(peer->rap_nid));
- rc = -EPROTO;
- goto out;
- }
-
- if ((msg->ram_type & RANAL_MSG_FENCE) != 0) {
- /* This message signals RDMA completion... */
- rrc = RapkFmaSyncWait(conn->rac_rihandle);
- if (rrc != RAP_SUCCESS) {
- CERROR("RapkFmaSyncWait failed: %d\n", rrc);
- rc = -ENETDOWN;
- goto out;
- }
- }
-
- if (conn->rac_close_recvd) {
- CERROR("Unexpected message %d after CLOSE from %s\n",
- msg->ram_type, libcfs_nid2str(conn->rac_peer->rap_nid));
- rc = -EPROTO;
- goto out;
- }
-
- if (msg->ram_type == RANAL_MSG_CLOSE) {
- CWARN("RX CLOSE from %s\n", libcfs_nid2str(conn->rac_peer->rap_nid));
- conn->rac_close_recvd = 1;
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
- if (conn->rac_state == RANAL_CONN_ESTABLISHED)
- kranal_close_conn_locked(conn, 0);
- else if (conn->rac_state == RANAL_CONN_CLOSING &&
- conn->rac_close_sent)
- kranal_terminate_conn_locked(conn);
-
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
- goto out;
- }
-
- if (conn->rac_state != RANAL_CONN_ESTABLISHED)
- goto out;
-
- switch (msg->ram_type) {
- case RANAL_MSG_NOOP:
- /* Nothing to do; just a keepalive */
- CDEBUG(D_NET, "RX NOOP on %p\n", conn);
- break;
-
- case RANAL_MSG_IMMEDIATE:
- CDEBUG(D_NET, "RX IMMEDIATE on %p\n", conn);
- rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.immediate.raim_hdr,
- msg->ram_srcnid, conn, 0);
- repost = rc < 0;
- break;
-
- case RANAL_MSG_PUT_REQ:
- CDEBUG(D_NET, "RX PUT_REQ on %p\n", conn);
- rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.putreq.raprm_hdr,
- msg->ram_srcnid, conn, 1);
- repost = rc < 0;
- break;
-
- case RANAL_MSG_PUT_NAK:
- CDEBUG(D_NET, "RX PUT_NAK on %p\n", conn);
- tx = kranal_match_reply(conn, RANAL_MSG_PUT_REQ,
- msg->ram_u.completion.racm_cookie);
- if (tx == NULL)
- break;
-
- LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
- tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
- kranal_tx_done(tx, -ENOENT); /* no match */
- break;
-
- case RANAL_MSG_PUT_ACK:
- CDEBUG(D_NET, "RX PUT_ACK on %p\n", conn);
- tx = kranal_match_reply(conn, RANAL_MSG_PUT_REQ,
- msg->ram_u.putack.rapam_src_cookie);
- if (tx == NULL)
- break;
-
- kranal_rdma(tx, RANAL_MSG_PUT_DONE,
- &msg->ram_u.putack.rapam_desc,
- msg->ram_u.putack.rapam_desc.rard_nob,
- msg->ram_u.putack.rapam_dst_cookie);
- break;
-
- case RANAL_MSG_PUT_DONE:
- CDEBUG(D_NET, "RX PUT_DONE on %p\n", conn);
- tx = kranal_match_reply(conn, RANAL_MSG_PUT_ACK,
- msg->ram_u.completion.racm_cookie);
- if (tx == NULL)
- break;
-
- LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
- tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
- kranal_tx_done(tx, 0);
- break;
-
- case RANAL_MSG_GET_REQ:
- CDEBUG(D_NET, "RX GET_REQ on %p\n", conn);
- rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.get.ragm_hdr,
- msg->ram_srcnid, conn, 1);
- repost = rc < 0;
- break;
-
- case RANAL_MSG_GET_NAK:
- CDEBUG(D_NET, "RX GET_NAK on %p\n", conn);
- tx = kranal_match_reply(conn, RANAL_MSG_GET_REQ,
- msg->ram_u.completion.racm_cookie);
- if (tx == NULL)
- break;
-
- LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
- tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
- kranal_tx_done(tx, -ENOENT); /* no match */
- break;
-
- case RANAL_MSG_GET_DONE:
- CDEBUG(D_NET, "RX GET_DONE on %p\n", conn);
- tx = kranal_match_reply(conn, RANAL_MSG_GET_REQ,
- msg->ram_u.completion.racm_cookie);
- if (tx == NULL)
- break;
-
- LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
- tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
-#if 0
- /* completion message should send rdma length if we ever allow
- * GET truncation */
- lnet_set_reply_msg_len(kranal_data.kra_ni, tx->tx_lntmsg[1], ???);
-#endif
- kranal_tx_done(tx, 0);
- break;
- }
-
- out:
- if (rc < 0) /* protocol/comms error */
- kranal_close_conn (conn, rc);
-
- if (repost && conn->rac_rxmsg != NULL)
- kranal_consume_rxmsg(conn, NULL, 0);
-
- /* check again later */
- kranal_schedule_conn(conn);
-}
-
-void
-kranal_complete_closed_conn (kra_conn_t *conn)
-{
- kra_tx_t *tx;
- int nfma;
- int nreplies;
-
- LASSERT (conn->rac_state == RANAL_CONN_CLOSED);
- LASSERT (list_empty(&conn->rac_list));
- LASSERT (list_empty(&conn->rac_hashlist));
-
- for (nfma = 0; !list_empty(&conn->rac_fmaq); nfma++) {
- tx = list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list);
-
- list_del(&tx->tx_list);
- kranal_tx_done(tx, -ECONNABORTED);
- }
-
- LASSERT (list_empty(&conn->rac_rdmaq));
-
- for (nreplies = 0; !list_empty(&conn->rac_replyq); nreplies++) {
- tx = list_entry(conn->rac_replyq.next, kra_tx_t, tx_list);
-
- list_del(&tx->tx_list);
- kranal_tx_done(tx, -ECONNABORTED);
- }
-
- CWARN("Closed conn %p -> %s: nmsg %d nreplies %d\n",
- conn, libcfs_nid2str(conn->rac_peer->rap_nid), nfma, nreplies);
-}
-
-int
-kranal_process_new_conn (kra_conn_t *conn)
-{
- RAP_RETURN rrc;
-
- rrc = RapkCompleteSync(conn->rac_rihandle, 1);
- if (rrc == RAP_SUCCESS)
- return 0;
-
- LASSERT (rrc == RAP_NOT_DONE);
- if (!time_after_eq(jiffies, conn->rac_last_tx +
- conn->rac_timeout * HZ))
- return -EAGAIN;
-
- /* Too late */
- rrc = RapkCompleteSync(conn->rac_rihandle, 0);
- LASSERT (rrc == RAP_SUCCESS);
- return -ETIMEDOUT;
-}
-
-int
-kranal_scheduler (void *arg)
-{
- kra_device_t *dev = (kra_device_t *)arg;
- wait_queue_t wait;
- char name[16];
- kra_conn_t *conn;
- unsigned long flags;
- unsigned long deadline;
- unsigned long soonest;
- int nsoonest;
- long timeout;
- struct list_head *tmp;
- struct list_head *nxt;
- int rc;
- int dropped_lock;
- int busy_loops = 0;
-
- snprintf(name, sizeof(name), "kranal_sd_%02d", dev->rad_idx);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- dev->rad_scheduler = current;
- init_waitqueue_entry(&wait, current);
-
- spin_lock_irqsave(&dev->rad_lock, flags);
-
- while (!kranal_data.kra_shutdown) {
- /* Safe: kra_shutdown only set when quiescent */
-
- if (busy_loops++ >= RANAL_RESCHED) {
- spin_unlock_irqrestore(&dev->rad_lock, flags);
-
- our_cond_resched();
- busy_loops = 0;
-
- spin_lock_irqsave(&dev->rad_lock, flags);
- }
-
- dropped_lock = 0;
-
- if (dev->rad_ready) {
- /* Device callback fired since I last checked it */
- dev->rad_ready = 0;
- spin_unlock_irqrestore(&dev->rad_lock, flags);
- dropped_lock = 1;
-
- kranal_check_rdma_cq(dev);
- kranal_check_fma_cq(dev);
-
- spin_lock_irqsave(&dev->rad_lock, flags);
- }
-
- list_for_each_safe(tmp, nxt, &dev->rad_ready_conns) {
- conn = list_entry(tmp, kra_conn_t, rac_schedlist);
-
- list_del_init(&conn->rac_schedlist);
- LASSERT (conn->rac_scheduled);
- conn->rac_scheduled = 0;
- spin_unlock_irqrestore(&dev->rad_lock, flags);
- dropped_lock = 1;
-
- kranal_check_fma_rx(conn);
- kranal_process_fmaq(conn);
-
- if (conn->rac_state == RANAL_CONN_CLOSED)
- kranal_complete_closed_conn(conn);
-
- kranal_conn_decref(conn);
- spin_lock_irqsave(&dev->rad_lock, flags);
- }
-
- nsoonest = 0;
- soonest = jiffies;
-
- list_for_each_safe(tmp, nxt, &dev->rad_new_conns) {
- conn = list_entry(tmp, kra_conn_t, rac_schedlist);
-
- deadline = conn->rac_last_tx + conn->rac_keepalive;
- if (time_after_eq(jiffies, deadline)) {
- /* Time to process this new conn */
- spin_unlock_irqrestore(&dev->rad_lock, flags);
- dropped_lock = 1;
-
- rc = kranal_process_new_conn(conn);
- if (rc != -EAGAIN) {
- /* All done with this conn */
- spin_lock_irqsave(&dev->rad_lock, flags);
- list_del_init(&conn->rac_schedlist);
- spin_unlock_irqrestore(&dev->rad_lock, flags);
-
- kranal_conn_decref(conn);
- spin_lock_irqsave(&dev->rad_lock, flags);
- continue;
- }
-
- /* retry with exponential backoff until HZ */
- if (conn->rac_keepalive == 0)
- conn->rac_keepalive = 1;
- else if (conn->rac_keepalive <= HZ)
- conn->rac_keepalive *= 2;
- else
- conn->rac_keepalive += HZ;
-
- deadline = conn->rac_last_tx + conn->rac_keepalive;
- spin_lock_irqsave(&dev->rad_lock, flags);
- }
-
- /* Does this conn need attention soonest? */
- if (nsoonest++ == 0 ||
- !time_after_eq(deadline, soonest))
- soonest = deadline;
- }
-
- if (dropped_lock) /* may sleep iff I didn't drop the lock */
- continue;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&dev->rad_waitq, &wait);
- spin_unlock_irqrestore(&dev->rad_lock, flags);
-
- if (nsoonest == 0) {
- busy_loops = 0;
- schedule();
- } else {
- timeout = (long)(soonest - jiffies);
- if (timeout > 0) {
- busy_loops = 0;
- schedule_timeout(timeout);
- }
- }
-
- remove_wait_queue(&dev->rad_waitq, &wait);
- set_current_state(TASK_RUNNING);
- spin_lock_irqsave(&dev->rad_lock, flags);
- }
-
- spin_unlock_irqrestore(&dev->rad_lock, flags);
-
- dev->rad_scheduler = NULL;
- kranal_thread_fini();
- return 0;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "ralnd.h"
-
-static int n_connd = 4;
-CFS_MODULE_PARM(n_connd, "i", int, 0444,
- "# of connection daemons");
-
-static int min_reconnect_interval = 1;
-CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644,
- "minimum connection retry interval (seconds)");
-
-static int max_reconnect_interval = 60;
-CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644,
- "maximum connection retry interval (seconds)");
-
-static int ntx = 256;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
- "# of transmit descriptors");
-
-static int credits = 128;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "# concurrent sends");
-
-static int peer_credits = 32;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
- "# concurrent sends to 1 peer");
-
-static int fma_cq_size = 8192;
-CFS_MODULE_PARM(fma_cq_size, "i", int, 0444,
- "size of the completion queue");
-
-static int timeout = 30;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
- "communications timeout (seconds)");
-
-static int max_immediate = (2<<10);
-CFS_MODULE_PARM(max_immediate, "i", int, 0644,
- "immediate/RDMA breakpoint");
-
-kra_tunables_t kranal_tunables = {
- .kra_n_connd = &n_connd,
- .kra_min_reconnect_interval = &min_reconnect_interval,
- .kra_max_reconnect_interval = &max_reconnect_interval,
- .kra_ntx = &ntx,
- .kra_credits = &credits,
- .kra_peercredits = &peer_credits,
- .kra_fma_cq_size = &fma_cq_size,
- .kra_timeout = &timeout,
- .kra_max_immediate = &max_immediate,
-};
-
-#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
-static cfs_sysctl_table_t kranal_ctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "n_connd",
- .data = &n_connd,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 2,
- .procname = "min_reconnect_interval",
- .data = &min_reconnect_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 3,
- .procname = "max_reconnect_interval",
- .data = &max_reconnect_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 4,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 5,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 6,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 7,
- .procname = "fma_cq_size",
- .data = &fma_cq_size,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 8,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 9,
- .procname = "max_immediate",
- .data = &max_immediate,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {0}
-};
-
-static cfs_sysctl_table_t kranal_top_ctl_table[] = {
- {
- .ctl_name = 202,
- .procname = "ranal",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = kranal_ctl_table
- },
- {0}
-};
-
-int
-kranal_tunables_init ()
-{
- kranal_tunables.kra_sysctl =
- cfs_register_sysctl_table(kranal_top_ctl_table, 0);
-
- if (kranal_tunables.kra_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-
- return 0;
-}
-
-void
-kranal_tunables_fini ()
-{
- if (kranal_tunables.kra_sysctl != NULL)
- cfs_unregister_sysctl_table(kranal_tunables.kra_sysctl);
-}
-
-#else
-
-int
-kranal_tunables_init ()
-{
- return 0;
-}
-
-void
-kranal_tunables_fini ()
-{
-}
-
-#endif
-
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>CFBundleDevelopmentRegion</key>
- <string>English</string>
- <key>CFBundleExecutable</key>
- <string>ksocklnd</string>
- <key>CFBundleIconFile</key>
- <string></string>
- <key>CFBundleIdentifier</key>
- <string>com.clusterfs.lustre.ksocklnd</string>
- <key>CFBundleInfoDictionaryVersion</key>
- <string>6.0</string>
- <key>CFBundlePackageType</key>
- <string>KEXT</string>
- <key>CFBundleSignature</key>
- <string>????</string>
- <key>CFBundleVersion</key>
- <string>1.0.1</string>
- <key>OSBundleCompatibleVersion</key>
- <string>1.0.0</string>
- <key>OSBundleLibraries</key>
- <dict>
- <key>com.apple.kpi.bsd</key>
- <string>8.0.0b1</string>
- <key>com.apple.kpi.libkern</key>
- <string>8.0.0b1</string>
- <key>com.apple.kpi.mach</key>
- <string>8.0.0b1</string>
- <key>com.apple.kpi.unsupported</key>
- <string>8.0.0b1</string>
- <key>com.clusterfs.lustre.libcfs</key>
- <string>1.0.0</string>
- <key>com.clusterfs.lustre.lnet</key>
- <string>1.0.0</string>
- </dict>
-</dict>
-</plist>
+++ /dev/null
-MODULES := ksocklnd
-
-ksocklnd-objs := socklnd.o socklnd_cb.o socklnd_modparams.o socklnd_lib-linux.o
-
-@INCLUDE_RULES@
+++ /dev/null
-if MODULES
-if LINUX
-
- modulenet_DATA := ksocklnd$(KMODEXT)
-
-endif # LINUX
-endif # MODULES
-
-DIST_SOURCES := $(ksocklnd-objs:%.o=%.c) socklnd_lib-linux.h socklnd.h
-
-if DARWIN
-
- macos_PROGRAMS := ksocklnd
-
- nodist_ksocklnd_SOURCES := socklnd.c socklnd_cb.c socklnd_modparams.c socklnd_lib-darwin.c
- DIST_SOURCES += socklnd_lib-darwin.c socklnd_lib-darwin.h
-
- ksocklnd_CFLAGS := $(EXTRA_KCFLAGS)
- ksocklnd_LDFLAGS := $(EXTRA_KLDFLAGS)
- ksocklnd_LDADD := $(EXTRA_KLIBS)
-
- plist_DATA := Info.plist
- install_data_hook := fix-kext-ownership
-
-endif # DARWIN
-
-EXTRA_DIST := $(plist_DATA)
-install-data-hook: $(install_data_hook)
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ socklnd_lib.c
+++ /dev/null
-// !$*UTF8*$!
-{
- archiveVersion = 1;
- classes = {
- };
- objectVersion = 39;
- objects = {
- 06AA1262FFB20DD611CA28AA = {
- buildRules = (
- );
- buildSettings = {
- COPY_PHASE_STRIP = NO;
- GCC_DYNAMIC_NO_PIC = NO;
- GCC_ENABLE_FIX_AND_CONTINUE = YES;
- GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
- GCC_OPTIMIZATION_LEVEL = 0;
- OPTIMIZATION_CFLAGS = "-O0";
- ZERO_LINK = YES;
- };
- isa = PBXBuildStyle;
- name = Development;
- };
- 06AA1263FFB20DD611CA28AA = {
- buildRules = (
- );
- buildSettings = {
- COPY_PHASE_STRIP = YES;
- GCC_ENABLE_FIX_AND_CONTINUE = NO;
- ZERO_LINK = NO;
- };
- isa = PBXBuildStyle;
- name = Deployment;
- };
-//060
-//061
-//062
-//063
-//064
-//080
-//081
-//082
-//083
-//084
- 089C1669FE841209C02AAC07 = {
- buildSettings = {
- };
- buildStyles = (
- 06AA1262FFB20DD611CA28AA,
- 06AA1263FFB20DD611CA28AA,
- );
- hasScannedForEncodings = 1;
- isa = PBXProject;
- mainGroup = 089C166AFE841209C02AAC07;
- projectDirPath = "";
- targets = (
- 32A4FEB80562C75700D090E7,
- );
- };
- 089C166AFE841209C02AAC07 = {
- children = (
- 247142CAFF3F8F9811CA285C,
- 089C167CFE841241C02AAC07,
- 19C28FB6FE9D52B211CA2CBB,
- );
- isa = PBXGroup;
- name = ksocknal;
- refType = 4;
- sourceTree = "<group>";
- };
- 089C167CFE841241C02AAC07 = {
- children = (
- 32A4FEC30562C75700D090E7,
- );
- isa = PBXGroup;
- name = Resources;
- refType = 4;
- sourceTree = "<group>";
- };
-//080
-//081
-//082
-//083
-//084
-//190
-//191
-//192
-//193
-//194
- 1957C5680737C71F00425049 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = socknal.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 1957C5690737C71F00425049 = {
- fileRef = 1957C5680737C71F00425049;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 1957C56A0737C72F00425049 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = socknal_cb.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 1957C56B0737C72F00425049 = {
- fileRef = 1957C56A0737C72F00425049;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 1957C5B20737C78E00425049 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- name = socknal_lib.c;
- path = arch/xnu/socknal_lib.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 1957C5B30737C78E00425049 = {
- fileRef = 1957C5B20737C78E00425049;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19C28FB6FE9D52B211CA2CBB = {
- children = (
- 32A4FEC40562C75800D090E7,
- );
- isa = PBXGroup;
- name = Products;
- refType = 4;
- sourceTree = "<group>";
- };
-//190
-//191
-//192
-//193
-//194
-//240
-//241
-//242
-//243
-//244
- 247142CAFF3F8F9811CA285C = {
- children = (
- 1957C5B20737C78E00425049,
- 1957C56A0737C72F00425049,
- 1957C5680737C71F00425049,
- );
- isa = PBXGroup;
- name = Source;
- path = "";
- refType = 4;
- sourceTree = "<group>";
- };
-//240
-//241
-//242
-//243
-//244
-//320
-//321
-//322
-//323
-//324
- 32A4FEB80562C75700D090E7 = {
- buildPhases = (
- 32A4FEB90562C75700D090E7,
- 32A4FEBA0562C75700D090E7,
- 32A4FEBB0562C75700D090E7,
- 32A4FEBD0562C75700D090E7,
- 32A4FEBF0562C75700D090E7,
- 32A4FEC00562C75700D090E7,
- 32A4FEC10562C75700D090E7,
- );
- buildRules = (
- );
- buildSettings = {
- FRAMEWORK_SEARCH_PATHS = "";
- GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO;
- GCC_WARN_UNKNOWN_PRAGMAS = NO;
- HEADER_SEARCH_PATHS = "../../include ./arch/xnu";
- INFOPLIST_FILE = Info.plist;
- INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
- LIBRARY_SEARCH_PATHS = "";
- MODULE_NAME = com.clusterfs.lustre.portals.knals.ksocknal;
- MODULE_START = ksocknal_start;
- MODULE_STOP = ksocknal_stop;
- MODULE_VERSION = 1.0.1;
- OTHER_CFLAGS = "-D__KERNEL__";
- OTHER_LDFLAGS = "";
- OTHER_REZFLAGS = "";
- PRODUCT_NAME = ksocknal;
- SECTORDER_FLAGS = "";
- WARNING_CFLAGS = "-Wmost";
- WRAPPER_EXTENSION = kext;
- };
- dependencies = (
- );
- isa = PBXNativeTarget;
- name = ksocknal;
- productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions";
- productName = ksocknal;
- productReference = 32A4FEC40562C75800D090E7;
- productType = "com.apple.product-type.kernel-extension";
- };
- 32A4FEB90562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXShellScriptBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- shellPath = /bin/sh;
- shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
- };
- 32A4FEBA0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXHeadersBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBB0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXResourcesBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBD0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- 1957C5690737C71F00425049,
- 1957C56B0737C72F00425049,
- 1957C5B30737C78E00425049,
- );
- isa = PBXSourcesBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBF0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXFrameworksBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEC00562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXRezBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEC10562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXShellScriptBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- shellPath = /bin/sh;
- shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
- };
- 32A4FEC30562C75700D090E7 = {
- isa = PBXFileReference;
- lastKnownFileType = text.plist.xml;
- path = Info.plist;
- refType = 4;
- sourceTree = "<group>";
- };
- 32A4FEC40562C75800D090E7 = {
- explicitFileType = wrapper.cfbundle;
- includeInIndex = 0;
- isa = PBXFileReference;
- path = ksocknal.kext;
- refType = 3;
- sourceTree = BUILT_PRODUCTS_DIR;
- };
- };
- rootObject = 089C1669FE841209C02AAC07;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "socklnd.h"
-
-lnd_t the_ksocklnd = {
- .lnd_type = SOCKLND,
- .lnd_startup = ksocknal_startup,
- .lnd_shutdown = ksocknal_shutdown,
- .lnd_ctl = ksocknal_ctl,
- .lnd_send = ksocknal_send,
- .lnd_recv = ksocknal_recv,
- .lnd_notify = ksocknal_notify,
- .lnd_accept = ksocknal_accept,
-};
-
-ksock_nal_data_t ksocknal_data;
-
-ksock_interface_t *
-ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip)
-{
- ksock_net_t *net = ni->ni_data;
- int i;
- ksock_interface_t *iface;
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- LASSERT(i < LNET_MAX_INTERFACES);
- iface = &net->ksnn_interfaces[i];
-
- if (iface->ksni_ipaddr == ip)
- return (iface);
- }
-
- return (NULL);
-}
-
-ksock_route_t *
-ksocknal_create_route (__u32 ipaddr, int port)
-{
- ksock_route_t *route;
-
- LIBCFS_ALLOC (route, sizeof (*route));
- if (route == NULL)
- return (NULL);
-
- atomic_set (&route->ksnr_refcount, 1);
- route->ksnr_peer = NULL;
- route->ksnr_retry_interval = 0; /* OK to connect at any time */
- route->ksnr_ipaddr = ipaddr;
- route->ksnr_port = port;
- route->ksnr_scheduled = 0;
- route->ksnr_connecting = 0;
- route->ksnr_connected = 0;
- route->ksnr_deleted = 0;
- route->ksnr_conn_count = 0;
- route->ksnr_share_count = 0;
-
- return (route);
-}
-
-void
-ksocknal_destroy_route (ksock_route_t *route)
-{
- LASSERT (atomic_read(&route->ksnr_refcount) == 0);
-
- if (route->ksnr_peer != NULL)
- ksocknal_peer_decref(route->ksnr_peer);
-
- LIBCFS_FREE (route, sizeof (*route));
-}
-
-int
-ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
-{
- ksock_net_t *net = ni->ni_data;
- ksock_peer_t *peer;
-
- LASSERT (id.nid != LNET_NID_ANY);
- LASSERT (id.pid != LNET_PID_ANY);
- LASSERT (!in_interrupt());
-
- LIBCFS_ALLOC (peer, sizeof (*peer));
- if (peer == NULL)
- return -ENOMEM;
-
- memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */
-
- peer->ksnp_ni = ni;
- peer->ksnp_id = id;
- atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */
- peer->ksnp_closing = 0;
- peer->ksnp_accepting = 0;
- peer->ksnp_zc_next_cookie = 1;
- peer->ksnp_proto = NULL;
- CFS_INIT_LIST_HEAD (&peer->ksnp_conns);
- CFS_INIT_LIST_HEAD (&peer->ksnp_routes);
- CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue);
- CFS_INIT_LIST_HEAD (&peer->ksnp_zc_req_list);
- spin_lock_init(&peer->ksnp_lock);
-
- spin_lock_bh (&net->ksnn_lock);
-
- if (net->ksnn_shutdown) {
- spin_unlock_bh (&net->ksnn_lock);
-
- LIBCFS_FREE(peer, sizeof(*peer));
- CERROR("Can't create peer: network shutdown\n");
- return -ESHUTDOWN;
- }
-
- net->ksnn_npeers++;
-
- spin_unlock_bh (&net->ksnn_lock);
-
- *peerp = peer;
- return 0;
-}
-
-void
-ksocknal_destroy_peer (ksock_peer_t *peer)
-{
- ksock_net_t *net = peer->ksnp_ni->ni_data;
-
- CDEBUG (D_NET, "peer %s %p deleted\n",
- libcfs_id2str(peer->ksnp_id), peer);
-
- LASSERT (atomic_read (&peer->ksnp_refcount) == 0);
- LASSERT (peer->ksnp_accepting == 0);
- LASSERT (list_empty (&peer->ksnp_conns));
- LASSERT (list_empty (&peer->ksnp_routes));
- LASSERT (list_empty (&peer->ksnp_tx_queue));
- LASSERT (list_empty (&peer->ksnp_zc_req_list));
-
- LIBCFS_FREE (peer, sizeof (*peer));
-
- /* NB a peer's connections and routes keep a reference on their peer
- * until they are destroyed, so we can be assured that _all_ state to
- * do with this peer has been cleaned up when its refcount drops to
- * zero. */
- spin_lock_bh (&net->ksnn_lock);
- net->ksnn_npeers--;
- spin_unlock_bh (&net->ksnn_lock);
-}
-
-ksock_peer_t *
-ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id)
-{
- struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
- struct list_head *tmp;
- ksock_peer_t *peer;
-
- list_for_each (tmp, peer_list) {
-
- peer = list_entry (tmp, ksock_peer_t, ksnp_list);
-
- LASSERT (!peer->ksnp_closing);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- if (peer->ksnp_id.nid != id.nid ||
- peer->ksnp_id.pid != id.pid)
- continue;
-
- CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
- peer, libcfs_id2str(id),
- atomic_read(&peer->ksnp_refcount));
- return (peer);
- }
- return (NULL);
-}
-
-ksock_peer_t *
-ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id)
-{
- ksock_peer_t *peer;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
- peer = ksocknal_find_peer_locked (ni, id);
- if (peer != NULL) /* +1 ref for caller? */
- ksocknal_peer_addref(peer);
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- return (peer);
-}
-
-void
-ksocknal_unlink_peer_locked (ksock_peer_t *peer)
-{
- int i;
- __u32 ip;
- ksock_interface_t *iface;
-
- for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
- LASSERT (i < LNET_MAX_INTERFACES);
- ip = peer->ksnp_passive_ips[i];
-
- iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
- /* All IPs in peer->ksnp_passive_ips[] come from the
- * interface list, therefore the call must succeed. */
- LASSERT (iface != NULL);
-
- CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
- peer, iface, iface->ksni_nroutes);
- iface->ksni_npeers--;
- }
-
- LASSERT (list_empty(&peer->ksnp_conns));
- LASSERT (list_empty(&peer->ksnp_routes));
- LASSERT (!peer->ksnp_closing);
- peer->ksnp_closing = 1;
- list_del (&peer->ksnp_list);
- /* lose peerlist's ref */
- ksocknal_peer_decref(peer);
-}
-
-int
-ksocknal_get_peer_info (lnet_ni_t *ni, int index,
- lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, int *port,
- int *conn_count, int *share_count)
-{
- ksock_peer_t *peer;
- struct list_head *ptmp;
- ksock_route_t *route;
- struct list_head *rtmp;
- int i;
- int j;
- int rc = -ENOENT;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-
- list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- if (peer->ksnp_n_passive_ips == 0 &&
- list_empty(&peer->ksnp_routes)) {
- if (index-- > 0)
- continue;
-
- *id = peer->ksnp_id;
- *myip = 0;
- *peer_ip = 0;
- *port = 0;
- *conn_count = 0;
- *share_count = 0;
- rc = 0;
- goto out;
- }
-
- for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
- if (index-- > 0)
- continue;
-
- *id = peer->ksnp_id;
- *myip = peer->ksnp_passive_ips[j];
- *peer_ip = 0;
- *port = 0;
- *conn_count = 0;
- *share_count = 0;
- rc = 0;
- goto out;
- }
-
- list_for_each (rtmp, &peer->ksnp_routes) {
- if (index-- > 0)
- continue;
-
- route = list_entry(rtmp, ksock_route_t,
- ksnr_list);
-
- *id = peer->ksnp_id;
- *myip = route->ksnr_myipaddr;
- *peer_ip = route->ksnr_ipaddr;
- *port = route->ksnr_port;
- *conn_count = route->ksnr_conn_count;
- *share_count = route->ksnr_share_count;
- rc = 0;
- goto out;
- }
- }
- }
- out:
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return (rc);
-}
-
-void
-ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
-{
- ksock_peer_t *peer = route->ksnr_peer;
- int type = conn->ksnc_type;
- ksock_interface_t *iface;
-
- conn->ksnc_route = route;
- ksocknal_route_addref(route);
-
- if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
- if (route->ksnr_myipaddr == 0) {
- /* route wasn't bound locally yet (the initial route) */
- CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n",
- libcfs_id2str(peer->ksnp_id),
- HIPQUAD(route->ksnr_ipaddr),
- HIPQUAD(conn->ksnc_myipaddr));
- } else {
- CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from "
- "%u.%u.%u.%u to %u.%u.%u.%u\n",
- libcfs_id2str(peer->ksnp_id),
- HIPQUAD(route->ksnr_ipaddr),
- HIPQUAD(route->ksnr_myipaddr),
- HIPQUAD(conn->ksnc_myipaddr));
-
- iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
- route->ksnr_myipaddr);
- if (iface != NULL)
- iface->ksni_nroutes--;
- }
- route->ksnr_myipaddr = conn->ksnc_myipaddr;
- iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
- route->ksnr_myipaddr);
- if (iface != NULL)
- iface->ksni_nroutes++;
- }
-
- route->ksnr_connected |= (1<<type);
- route->ksnr_conn_count++;
-
- /* Successful connection => further attempts can
- * proceed immediately */
- route->ksnr_retry_interval = 0;
-}
-
-void
-ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
-{
- struct list_head *tmp;
- ksock_conn_t *conn;
- ksock_route_t *route2;
-
- LASSERT (!peer->ksnp_closing);
- LASSERT (route->ksnr_peer == NULL);
- LASSERT (!route->ksnr_scheduled);
- LASSERT (!route->ksnr_connecting);
- LASSERT (route->ksnr_connected == 0);
-
- /* LASSERT(unique) */
- list_for_each(tmp, &peer->ksnp_routes) {
- route2 = list_entry(tmp, ksock_route_t, ksnr_list);
-
- if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
- CERROR ("Duplicate route %s %u.%u.%u.%u\n",
- libcfs_id2str(peer->ksnp_id),
- HIPQUAD(route->ksnr_ipaddr));
- LBUG();
- }
- }
-
- route->ksnr_peer = peer;
- ksocknal_peer_addref(peer);
- /* peer's routelist takes over my ref on 'route' */
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-
- list_for_each(tmp, &peer->ksnp_conns) {
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-
- if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
- continue;
-
- ksocknal_associate_route_conn_locked(route, conn);
- /* keep going (typed routes) */
- }
-}
-
-void
-ksocknal_del_route_locked (ksock_route_t *route)
-{
- ksock_peer_t *peer = route->ksnr_peer;
- ksock_interface_t *iface;
- ksock_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
-
- LASSERT (!route->ksnr_deleted);
-
- /* Close associated conns */
- list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
- conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
-
- if (conn->ksnc_route != route)
- continue;
-
- ksocknal_close_conn_locked (conn, 0);
- }
-
- if (route->ksnr_myipaddr != 0) {
- iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
- route->ksnr_myipaddr);
- if (iface != NULL)
- iface->ksni_nroutes--;
- }
-
- route->ksnr_deleted = 1;
- list_del (&route->ksnr_list);
- ksocknal_route_decref(route); /* drop peer's ref */
-
- if (list_empty (&peer->ksnp_routes) &&
- list_empty (&peer->ksnp_conns)) {
- /* I've just removed the last route to a peer with no active
- * connections */
- ksocknal_unlink_peer_locked (peer);
- }
-}
-
-int
-ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
-{
- struct list_head *tmp;
- ksock_peer_t *peer;
- ksock_peer_t *peer2;
- ksock_route_t *route;
- ksock_route_t *route2;
- int rc;
-
- if (id.nid == LNET_NID_ANY ||
- id.pid == LNET_PID_ANY)
- return (-EINVAL);
-
- /* Have a brand new peer ready... */
- rc = ksocknal_create_peer(&peer, ni, id);
- if (rc != 0)
- return rc;
-
- route = ksocknal_create_route (ipaddr, port);
- if (route == NULL) {
- ksocknal_peer_decref(peer);
- return (-ENOMEM);
- }
-
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
- /* always called with a ref on ni, so shutdown can't have started */
- LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
-
- peer2 = ksocknal_find_peer_locked (ni, id);
- if (peer2 != NULL) {
- ksocknal_peer_decref(peer);
- peer = peer2;
- } else {
- /* peer table takes my ref on peer */
- list_add_tail (&peer->ksnp_list,
- ksocknal_nid2peerlist (id.nid));
- }
-
- route2 = NULL;
- list_for_each (tmp, &peer->ksnp_routes) {
- route2 = list_entry(tmp, ksock_route_t, ksnr_list);
-
- if (route2->ksnr_ipaddr == ipaddr)
- break;
-
- route2 = NULL;
- }
- if (route2 == NULL) {
- ksocknal_add_route_locked(peer, route);
- route->ksnr_share_count++;
- } else {
- ksocknal_route_decref(route);
- route2->ksnr_share_count++;
- }
-
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
- return (0);
-}
-
-void
-ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip)
-{
- ksock_conn_t *conn;
- ksock_route_t *route;
- struct list_head *tmp;
- struct list_head *nxt;
- int nshared;
-
- LASSERT (!peer->ksnp_closing);
-
- /* Extra ref prevents peer disappearing until I'm done with it */
- ksocknal_peer_addref(peer);
-
- list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, ksock_route_t, ksnr_list);
-
- /* no match */
- if (!(ip == 0 || route->ksnr_ipaddr == ip))
- continue;
-
- route->ksnr_share_count = 0;
- /* This deletes associated conns too */
- ksocknal_del_route_locked (route);
- }
-
- nshared = 0;
- list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, ksock_route_t, ksnr_list);
- nshared += route->ksnr_share_count;
- }
-
- if (nshared == 0) {
- /* remove everything else if there are no explicit entries
- * left */
-
- list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, ksock_route_t, ksnr_list);
-
- /* we should only be removing auto-entries */
- LASSERT(route->ksnr_share_count == 0);
- ksocknal_del_route_locked (route);
- }
-
- list_for_each_safe (tmp, nxt, &peer->ksnp_conns) {
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-
- ksocknal_close_conn_locked(conn, 0);
- }
- }
-
- ksocknal_peer_decref(peer);
- /* NB peer unlinks itself when last conn/route is removed */
-}
-
-int
-ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
-{
- CFS_LIST_HEAD (zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- ksock_peer_t *peer;
- int lo;
- int hi;
- int i;
- int rc = -ENOENT;
-
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
- if (id.nid != LNET_NID_ANY)
- lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers;
- else {
- lo = 0;
- hi = ksocknal_data.ksnd_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
- (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
- continue;
-
- ksocknal_peer_addref(peer); /* a ref for me... */
-
- ksocknal_del_peer_locked (peer, ip);
-
- if (peer->ksnp_closing && !list_empty(&peer->ksnp_tx_queue)) {
- LASSERT (list_empty(&peer->ksnp_conns));
- LASSERT (list_empty(&peer->ksnp_routes));
-
- list_splice_init(&peer->ksnp_tx_queue, &zombies);
- }
-
- ksocknal_peer_decref(peer); /* ...till here */
-
- rc = 0; /* matched! */
- }
- }
-
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
- ksocknal_txlist_done(ni, &zombies, 1);
-
- return (rc);
-}
-
-ksock_conn_t *
-ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index)
-{
- ksock_peer_t *peer;
- struct list_head *ptmp;
- ksock_conn_t *conn;
- struct list_head *ctmp;
- int i;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
-
- LASSERT (!peer->ksnp_closing);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- list_for_each (ctmp, &peer->ksnp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
- ksocknal_conn_addref(conn);
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return (conn);
- }
- }
- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return (NULL);
-}
-
-ksock_sched_t *
-ksocknal_choose_scheduler_locked (unsigned int irq)
-{
- ksock_sched_t *sched;
- ksock_irqinfo_t *info;
- int i;
-
- LASSERT (irq < NR_IRQS);
- info = &ksocknal_data.ksnd_irqinfo[irq];
-
- if (irq != 0 && /* hardware NIC */
- info->ksni_valid) { /* already set up */
- return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]);
- }
-
- /* software NIC (irq == 0) || not associated with a scheduler yet.
- * Choose the CPU with the fewest connections... */
- sched = &ksocknal_data.ksnd_schedulers[0];
- for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++)
- if (sched->kss_nconns >
- ksocknal_data.ksnd_schedulers[i].kss_nconns)
- sched = &ksocknal_data.ksnd_schedulers[i];
-
- if (irq != 0) { /* Hardware NIC */
- info->ksni_valid = 1;
- info->ksni_sched = sched - ksocknal_data.ksnd_schedulers;
-
- /* no overflow... */
- LASSERT (info->ksni_sched == sched - ksocknal_data.ksnd_schedulers);
- }
-
- return (sched);
-}
-
-int
-ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs)
-{
- ksock_net_t *net = ni->ni_data;
- int i;
- int nip;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- nip = net->ksnn_ninterfaces;
- LASSERT (nip <= LNET_MAX_INTERFACES);
-
- /* Only offer interfaces for additional connections if I have
- * more than one. */
- if (nip < 2) {
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return 0;
- }
-
- for (i = 0; i < nip; i++) {
- ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
- LASSERT (ipaddrs[i] != 0);
- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return (nip);
-}
-
-int
-ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips)
-{
- int best_netmatch = 0;
- int best_xor = 0;
- int best = -1;
- int this_xor;
- int this_netmatch;
- int i;
-
- for (i = 0; i < nips; i++) {
- if (ips[i] == 0)
- continue;
-
- this_xor = (ips[i] ^ iface->ksni_ipaddr);
- this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
-
- if (!(best < 0 ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_xor > this_xor)))
- continue;
-
- best = i;
- best_netmatch = this_netmatch;
- best_xor = this_xor;
- }
-
- LASSERT (best >= 0);
- return (best);
-}
-
-int
-ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
-{
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- ksock_net_t *net = peer->ksnp_ni->ni_data;
- ksock_interface_t *iface;
- ksock_interface_t *best_iface;
- int n_ips;
- int i;
- int j;
- int k;
- __u32 ip;
- __u32 xor;
- int this_netmatch;
- int best_netmatch;
- int best_npeers;
-
- /* CAVEAT EMPTOR: We do all our interface matching with an
- * exclusive hold of global lock at IRQ priority. We're only
- * expecting to be dealing with small numbers of interfaces, so the
- * O(n**3)-ness shouldn't matter */
-
- /* Also note that I'm not going to return more than n_peerips
- * interfaces, even if I have more myself */
-
- write_lock_bh (global_lock);
-
- LASSERT (n_peerips <= LNET_MAX_INTERFACES);
- LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
-
- /* Only match interfaces for additional connections
- * if I have > 1 interface */
- n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
- MIN(n_peerips, net->ksnn_ninterfaces);
-
- for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
- /* ^ yes really... */
-
- /* If we have any new interfaces, first tick off all the
- * peer IPs that match old interfaces, then choose new
- * interfaces to match the remaining peer IPS.
- * We don't forget interfaces we've stopped using; we might
- * start using them again... */
-
- if (i < peer->ksnp_n_passive_ips) {
- /* Old interface. */
- ip = peer->ksnp_passive_ips[i];
- best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
-
- /* peer passive ips are kept up to date */
- LASSERT(best_iface != NULL);
- } else {
- /* choose a new interface */
- LASSERT (i == peer->ksnp_n_passive_ips);
-
- best_iface = NULL;
- best_netmatch = 0;
- best_npeers = 0;
-
- for (j = 0; j < net->ksnn_ninterfaces; j++) {
- iface = &net->ksnn_interfaces[j];
- ip = iface->ksni_ipaddr;
-
- for (k = 0; k < peer->ksnp_n_passive_ips; k++)
- if (peer->ksnp_passive_ips[k] == ip)
- break;
-
- if (k < peer->ksnp_n_passive_ips) /* using it already */
- continue;
-
- k = ksocknal_match_peerip(iface, peerips, n_peerips);
- xor = (ip ^ peerips[k]);
- this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0;
-
- if (!(best_iface == NULL ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_npeers > iface->ksni_npeers)))
- continue;
-
- best_iface = iface;
- best_netmatch = this_netmatch;
- best_npeers = iface->ksni_npeers;
- }
-
- best_iface->ksni_npeers++;
- ip = best_iface->ksni_ipaddr;
- peer->ksnp_passive_ips[i] = ip;
- peer->ksnp_n_passive_ips = i+1;
- }
-
- LASSERT (best_iface != NULL);
-
- /* mark the best matching peer IP used */
- j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
- peerips[j] = 0;
- }
-
- /* Overwrite input peer IP addresses */
- memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
-
- write_unlock_bh (global_lock);
-
- return (n_ips);
-}
-
-void
-ksocknal_create_routes(ksock_peer_t *peer, int port,
- __u32 *peer_ipaddrs, int npeer_ipaddrs)
-{
- ksock_route_t *newroute = NULL;
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- lnet_ni_t *ni = peer->ksnp_ni;
- ksock_net_t *net = ni->ni_data;
- struct list_head *rtmp;
- ksock_route_t *route;
- ksock_interface_t *iface;
- ksock_interface_t *best_iface;
- int best_netmatch;
- int this_netmatch;
- int best_nroutes;
- int i;
- int j;
-
- /* CAVEAT EMPTOR: We do all our interface matching with an
- * exclusive hold of global lock at IRQ priority. We're only
- * expecting to be dealing with small numbers of interfaces, so the
- * O(n**3)-ness here shouldn't matter */
-
- write_lock_bh (global_lock);
-
- if (net->ksnn_ninterfaces < 2) {
- /* Only create additional connections
- * if I have > 1 interface */
- write_unlock_bh (global_lock);
- return;
- }
-
- LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES);
-
- for (i = 0; i < npeer_ipaddrs; i++) {
- if (newroute != NULL) {
- newroute->ksnr_ipaddr = peer_ipaddrs[i];
- } else {
- write_unlock_bh (global_lock);
-
- newroute = ksocknal_create_route(peer_ipaddrs[i], port);
- if (newroute == NULL)
- return;
-
- write_lock_bh (global_lock);
- }
-
- if (peer->ksnp_closing) {
- /* peer got closed under me */
- break;
- }
-
- /* Already got a route? */
- route = NULL;
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, ksock_route_t, ksnr_list);
-
- if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
- break;
-
- route = NULL;
- }
- if (route != NULL)
- continue;
-
- best_iface = NULL;
- best_nroutes = 0;
- best_netmatch = 0;
-
- LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
-
- /* Select interface to connect from */
- for (j = 0; j < net->ksnn_ninterfaces; j++) {
- iface = &net->ksnn_interfaces[j];
-
- /* Using this interface already? */
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, ksock_route_t, ksnr_list);
-
- if (route->ksnr_myipaddr == iface->ksni_ipaddr)
- break;
-
- route = NULL;
- }
- if (route != NULL)
- continue;
-
- this_netmatch = (((iface->ksni_ipaddr ^
- newroute->ksnr_ipaddr) &
- iface->ksni_netmask) == 0) ? 1 : 0;
-
- if (!(best_iface == NULL ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_nroutes > iface->ksni_nroutes)))
- continue;
-
- best_iface = iface;
- best_netmatch = this_netmatch;
- best_nroutes = iface->ksni_nroutes;
- }
-
- if (best_iface == NULL)
- continue;
-
- newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
- best_iface->ksni_nroutes++;
-
- ksocknal_add_route_locked(peer, newroute);
- newroute = NULL;
- }
-
- write_unlock_bh (global_lock);
- if (newroute != NULL)
- ksocknal_route_decref(newroute);
-}
-
-int
-ksocknal_accept (lnet_ni_t *ni, cfs_socket_t *sock)
-{
- ksock_connreq_t *cr;
- int rc;
- __u32 peer_ip;
- int peer_port;
-
- rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- LASSERT (rc == 0); /* we succeeded before */
-
- LIBCFS_ALLOC(cr, sizeof(*cr));
- if (cr == NULL) {
- LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from "
- "%u.%u.%u.%u: memory exhausted\n",
- HIPQUAD(peer_ip));
- return -ENOMEM;
- }
-
- lnet_ni_addref(ni);
- cr->ksncr_ni = ni;
- cr->ksncr_sock = sock;
-
- spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
-
- list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
- cfs_waitq_signal(&ksocknal_data.ksnd_connd_waitq);
-
- spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
- return 0;
-}
-
-int
-ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr)
-{
- ksock_route_t *route;
-
- list_for_each_entry (route, &peer->ksnp_routes, ksnr_list) {
-
- if (route->ksnr_ipaddr == ipaddr)
- return route->ksnr_connecting;
- }
- return 0;
-}
-
-int
-ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
- cfs_socket_t *sock, int type)
-{
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- CFS_LIST_HEAD (zombies);
- lnet_process_id_t peerid;
- struct list_head *tmp;
- __u64 incarnation;
- ksock_conn_t *conn;
- ksock_conn_t *conn2;
- ksock_peer_t *peer = NULL;
- ksock_peer_t *peer2;
- ksock_sched_t *sched;
- ksock_hello_msg_t *hello;
- unsigned int irq;
- ksock_tx_t *tx;
- int rc;
- int active;
- char *warn = NULL;
-
- active = (route != NULL);
-
- LASSERT (active == (type != SOCKLND_CONN_NONE));
-
- irq = ksocknal_lib_sock_irq (sock);
-
- LIBCFS_ALLOC(conn, sizeof(*conn));
- if (conn == NULL) {
- rc = -ENOMEM;
- goto failed_0;
- }
-
- memset (conn, 0, sizeof (*conn));
- conn->ksnc_peer = NULL;
- conn->ksnc_route = NULL;
- conn->ksnc_sock = sock;
- /* 2 ref, 1 for conn, another extra ref prevents socket
- * being closed before establishment of connection */
- atomic_set (&conn->ksnc_sock_refcount, 2);
- conn->ksnc_type = type;
- ksocknal_lib_save_callback(sock, conn);
- atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
-
- conn->ksnc_zc_capable = ksocknal_lib_zc_capable(sock);
- conn->ksnc_rx_ready = 0;
- conn->ksnc_rx_scheduled = 0;
-
- CFS_INIT_LIST_HEAD (&conn->ksnc_tx_queue);
- conn->ksnc_tx_ready = 0;
- conn->ksnc_tx_scheduled = 0;
- conn->ksnc_tx_mono = NULL;
- atomic_set (&conn->ksnc_tx_nob, 0);
-
- LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t,
- kshm_ips[LNET_MAX_INTERFACES]));
- if (hello == NULL) {
- rc = -ENOMEM;
- goto failed_1;
- }
-
- /* stash conn's local and remote addrs */
- rc = ksocknal_lib_get_conn_addrs (conn);
- if (rc != 0)
- goto failed_1;
-
- /* Find out/confirm peer's NID and connection type and get the
- * vector of interfaces she's willing to let me connect to.
- * Passive connections use the listener timeout since the peer sends
- * eagerly */
-
- if (active) {
- peer = route->ksnr_peer;
- LASSERT(ni == peer->ksnp_ni);
-
- /* Active connection sends HELLO eagerly */
- hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
- peerid = peer->ksnp_id;
-
- write_lock_bh(global_lock);
- conn->ksnc_proto = peer->ksnp_proto;
- write_unlock_bh(global_lock);
-
- if (conn->ksnc_proto == NULL) {
- conn->ksnc_proto = &ksocknal_protocol_v2x;
-#if SOCKNAL_VERSION_DEBUG
- if (*ksocknal_tunables.ksnd_protocol != 2)
- conn->ksnc_proto = &ksocknal_protocol_v1x;
-#endif
- }
-
- rc = ksocknal_send_hello (ni, conn, peerid.nid, hello);
- if (rc != 0)
- goto failed_1;
- } else {
- peerid.nid = LNET_NID_ANY;
- peerid.pid = LNET_PID_ANY;
-
- /* Passive, get protocol from peer */
- conn->ksnc_proto = NULL;
- }
-
- rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation);
- if (rc < 0)
- goto failed_1;
-
- LASSERT (rc == 0 || active);
- LASSERT (conn->ksnc_proto != NULL);
- LASSERT (peerid.nid != LNET_NID_ANY);
-
- if (active) {
- ksocknal_peer_addref(peer);
- write_lock_bh (global_lock);
- } else {
- rc = ksocknal_create_peer(&peer, ni, peerid);
- if (rc != 0)
- goto failed_1;
-
- write_lock_bh (global_lock);
-
- /* called with a ref on ni, so shutdown can't have started */
- LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
-
- peer2 = ksocknal_find_peer_locked(ni, peerid);
- if (peer2 == NULL) {
- /* NB this puts an "empty" peer in the peer
- * table (which takes my ref) */
- list_add_tail(&peer->ksnp_list,
- ksocknal_nid2peerlist(peerid.nid));
- } else {
- ksocknal_peer_decref(peer);
- peer = peer2;
- }
-
- /* +1 ref for me */
- ksocknal_peer_addref(peer);
- peer->ksnp_accepting++;
-
- /* Am I already connecting to this guy? Resolve in
- * favour of higher NID... */
- if (peerid.nid < ni->ni_nid &&
- ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
- rc = EALREADY;
- warn = "connection race resolution";
- goto failed_2;
- }
- }
-
- if (peer->ksnp_closing ||
- (active && route->ksnr_deleted)) {
- /* peer/route got closed under me */
- rc = -ESTALE;
- warn = "peer/route removed";
- goto failed_2;
- }
-
- if (peer->ksnp_proto == NULL) {
- /* Never connected before.
- * NB recv_hello may have returned EPROTO to signal my peer
- * wants a different protocol than the one I asked for.
- */
- LASSERT (list_empty(&peer->ksnp_conns));
-
- peer->ksnp_proto = conn->ksnc_proto;
- peer->ksnp_incarnation = incarnation;
- }
-
- if (peer->ksnp_proto != conn->ksnc_proto ||
- peer->ksnp_incarnation != incarnation) {
- /* Peer rebooted or I've got the wrong protocol version */
- ksocknal_close_peer_conns_locked(peer, 0, 0);
-
- peer->ksnp_proto = NULL;
- rc = ESTALE;
- warn = peer->ksnp_incarnation != incarnation ?
- "peer rebooted" :
- "wrong proto version";
- goto failed_2;
- }
-
- switch (rc) {
- default:
- LBUG();
- case 0:
- break;
- case EALREADY:
- warn = "lost conn race";
- goto failed_2;
- case EPROTO:
- warn = "retry with different protocol version";
- goto failed_2;
- }
-
- /* Refuse to duplicate an existing connection, unless this is a
- * loopback connection */
- if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
- list_for_each(tmp, &peer->ksnp_conns) {
- conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
-
- if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
- conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
- conn2->ksnc_type != conn->ksnc_type)
- continue;
-
- /* Reply on a passive connection attempt so the peer
- * realises we're connected. */
- LASSERT (rc == 0);
- if (!active)
- rc = EALREADY;
-
- warn = "duplicate";
- goto failed_2;
- }
- }
-
- /* If the connection created by this route didn't bind to the IP
- * address the route connected to, the connection/route matching
- * code below probably isn't going to work. */
- if (active &&
- route->ksnr_ipaddr != conn->ksnc_ipaddr) {
- CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n",
- libcfs_id2str(peer->ksnp_id),
- HIPQUAD(route->ksnr_ipaddr),
- HIPQUAD(conn->ksnc_ipaddr));
- }
-
- /* Search for a route corresponding to the new connection and
- * create an association. This allows incoming connections created
- * by routes in my peer to match my own route entries so I don't
- * continually create duplicate routes. */
- list_for_each (tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, ksock_route_t, ksnr_list);
-
- if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
- continue;
-
- ksocknal_associate_route_conn_locked(route, conn);
- break;
- }
-
- conn->ksnc_peer = peer; /* conn takes my ref on peer */
- peer->ksnp_last_alive = cfs_time_current();
- peer->ksnp_error = 0;
-
- sched = ksocknal_choose_scheduler_locked (irq);
- sched->kss_nconns++;
- conn->ksnc_scheduler = sched;
-
- /* Set the deadline for the outgoing HELLO to drain */
- conn->ksnc_tx_bufnob = SOCK_WMEM_QUEUED(sock);
- conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- mb(); /* order with adding to peer's conn list */
-
- list_add (&conn->ksnc_list, &peer->ksnp_conns);
- ksocknal_conn_addref(conn);
-
- ksocknal_new_packet(conn, 0);
-
- /* Take all the packets blocking for a connection.
- * NB, it might be nicer to share these blocked packets among any
- * other connections that are becoming established. */
- while (!list_empty (&peer->ksnp_tx_queue)) {
- tx = list_entry (peer->ksnp_tx_queue.next,
- ksock_tx_t, tx_list);
-
- list_del (&tx->tx_list);
- ksocknal_queue_tx_locked (tx, conn);
- }
-
- write_unlock_bh (global_lock);
-
- /* We've now got a new connection. Any errors from here on are just
- * like "normal" comms errors and we close the connection normally.
- * NB (a) we still have to send the reply HELLO for passive
- * connections,
- * (b) normal I/O on the conn is blocked until I setup and call the
- * socket callbacks.
- */
-
- ksocknal_lib_bind_irq (irq);
-
- CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d"
- " incarnation:"LPD64" sched[%d]/%d\n",
- libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
- HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr),
- conn->ksnc_port, incarnation,
- (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
-
- if (active) {
- /* additional routes after interface exchange? */
- ksocknal_create_routes(peer, conn->ksnc_port,
- hello->kshm_ips, hello->kshm_nips);
- } else {
- hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
- hello->kshm_nips);
- rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
- }
-
- LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
- kshm_ips[LNET_MAX_INTERFACES]));
-
- /* setup the socket AFTER I've received hello (it disables
- * SO_LINGER). I might call back to the acceptor who may want
- * to send a protocol version response and then close the
- * socket; this ensures the socket only tears down after the
- * response has been sent. */
- if (rc == 0)
- rc = ksocknal_lib_setup_sock(sock);
-
- write_lock_bh(global_lock);
-
- /* NB my callbacks block while I hold ksnd_global_lock */
- ksocknal_lib_set_callback(sock, conn);
-
- if (!active)
- peer->ksnp_accepting--;
-
- write_unlock_bh(global_lock);
-
- if (rc != 0) {
- write_lock_bh(global_lock);
- ksocknal_close_conn_locked(conn, rc);
- write_unlock_bh(global_lock);
- } else if (ksocknal_connsock_addref(conn) == 0) {
- /* Allow I/O to proceed. */
- ksocknal_read_callback(conn);
- ksocknal_write_callback(conn);
- ksocknal_connsock_decref(conn);
- }
-
- ksocknal_connsock_decref(conn);
- ksocknal_conn_decref(conn);
- return rc;
-
- failed_2:
- if (!peer->ksnp_closing &&
- list_empty (&peer->ksnp_conns) &&
- list_empty (&peer->ksnp_routes)) {
- list_add(&zombies, &peer->ksnp_tx_queue);
- list_del_init(&peer->ksnp_tx_queue);
- ksocknal_unlink_peer_locked(peer);
- }
-
- write_unlock_bh (global_lock);
-
- if (warn != NULL) {
- if (rc < 0)
- CERROR("Not creating conn %s type %d: %s\n",
- libcfs_id2str(peerid), conn->ksnc_type, warn);
- else
- CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
- libcfs_id2str(peerid), conn->ksnc_type, warn);
- }
-
- if (!active) {
- if (rc > 0) {
- /* Request retry by replying with CONN_NONE
- * ksnc_proto has been set already */
- conn->ksnc_type = SOCKLND_CONN_NONE;
- hello->kshm_nips = 0;
- ksocknal_send_hello(ni, conn, peerid.nid, hello);
- }
-
- write_lock_bh(global_lock);
- peer->ksnp_accepting--;
- write_unlock_bh(global_lock);
- }
-
- ksocknal_txlist_done(ni, &zombies, 1);
- ksocknal_peer_decref(peer);
-
- failed_1:
- if (hello != NULL)
- LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
- kshm_ips[LNET_MAX_INTERFACES]));
-
- LIBCFS_FREE (conn, sizeof(*conn));
-
- failed_0:
- libcfs_sock_release(sock);
- return rc;
-}
-
-void
-ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
-{
- /* This just does the immmediate housekeeping, and queues the
- * connection for the reaper to terminate.
- * Caller holds ksnd_global_lock exclusively in irq context */
- ksock_peer_t *peer = conn->ksnc_peer;
- ksock_route_t *route;
- ksock_conn_t *conn2;
- struct list_head *tmp;
-
- LASSERT (peer->ksnp_error == 0);
- LASSERT (!conn->ksnc_closing);
- conn->ksnc_closing = 1;
-
- /* ksnd_deathrow_conns takes over peer's ref */
- list_del (&conn->ksnc_list);
-
- route = conn->ksnc_route;
- if (route != NULL) {
- /* dissociate conn from route... */
- LASSERT (!route->ksnr_deleted);
- LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0);
-
- conn2 = NULL;
- list_for_each(tmp, &peer->ksnp_conns) {
- conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
-
- if (conn2->ksnc_route == route &&
- conn2->ksnc_type == conn->ksnc_type)
- break;
-
- conn2 = NULL;
- }
- if (conn2 == NULL)
- route->ksnr_connected &= ~(1 << conn->ksnc_type);
-
- conn->ksnc_route = NULL;
-
-#if 0 /* irrelevent with only eager routes */
- list_del (&route->ksnr_list); /* make route least favourite */
- list_add_tail (&route->ksnr_list, &peer->ksnp_routes);
-#endif
- ksocknal_route_decref(route); /* drop conn's ref on route */
- }
-
- if (list_empty (&peer->ksnp_conns)) {
- /* No more connections to this peer */
-
- peer->ksnp_proto = NULL; /* renegotiate protocol version */
- peer->ksnp_error = error; /* stash last conn close reason */
-
- if (list_empty (&peer->ksnp_routes)) {
- /* I've just closed last conn belonging to a
- * peer with no routes to it */
- ksocknal_unlink_peer_locked (peer);
- }
- }
-
- spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
-
- list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns);
- cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
-
- spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
-}
-
-void
-ksocknal_peer_failed (ksock_peer_t *peer)
-{
- time_t last_alive = 0;
- int notify = 0;
-
- /* There has been a connection failure or comms error; but I'll only
- * tell LNET I think the peer is dead if it's to another kernel and
- * there are no connections or connection attempts in existance. */
-
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
- list_empty(&peer->ksnp_conns) &&
- peer->ksnp_accepting == 0 &&
- ksocknal_find_connecting_route_locked(peer) == NULL) {
- notify = 1;
- last_alive = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() -
- peer->ksnp_last_alive);
- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- if (notify)
- lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0,
- last_alive);
-}
-
-void
-ksocknal_terminate_conn (ksock_conn_t *conn)
-{
- /* This gets called by the reaper (guaranteed thread context) to
- * disengage the socket from its callbacks and close it.
- * ksnc_refcount will eventually hit zero, and then the reaper will
- * destroy it. */
- ksock_peer_t *peer = conn->ksnc_peer;
- ksock_sched_t *sched = conn->ksnc_scheduler;
- int failed = 0;
- struct list_head *tmp;
- struct list_head *nxt;
- ksock_tx_t *tx;
- LIST_HEAD (zlist);
-
- LASSERT(conn->ksnc_closing);
-
- /* wake up the scheduler to "send" all remaining packets to /dev/null */
- spin_lock_bh (&sched->kss_lock);
-
- /* a closing conn is always ready to tx */
- conn->ksnc_tx_ready = 1;
-
- if (!conn->ksnc_tx_scheduled &&
- !list_empty(&conn->ksnc_tx_queue)){
- list_add_tail (&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- /* extra ref for scheduler */
- ksocknal_conn_addref(conn);
-
- cfs_waitq_signal (&sched->kss_waitq);
- }
-
- spin_unlock_bh (&sched->kss_lock);
-
- spin_lock(&peer->ksnp_lock);
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_zc_req_list) {
- tx = list_entry(tmp, ksock_tx_t, tx_zc_list);
-
- if (tx->tx_conn != conn)
- continue;
-
- LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0);
-
- tx->tx_msg.ksm_zc_req_cookie = 0;
- list_del(&tx->tx_zc_list);
- list_add(&tx->tx_zc_list, &zlist);
- }
-
- spin_unlock(&peer->ksnp_lock);
-
- list_for_each_safe(tmp, nxt, &zlist) {
- tx = list_entry(tmp, ksock_tx_t, tx_zc_list);
-
- list_del(&tx->tx_zc_list);
- ksocknal_tx_decref(tx);
- }
-
- /* serialise with callbacks */
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
- ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
-
- /* OK, so this conn may not be completely disengaged from its
- * scheduler yet, but it _has_ committed to terminate... */
- conn->ksnc_scheduler->kss_nconns--;
-
- if (peer->ksnp_error != 0) {
- /* peer's last conn closed in error */
- LASSERT (list_empty (&peer->ksnp_conns));
- failed = 1;
- peer->ksnp_error = 0; /* avoid multiple notifications */
- }
-
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
- if (failed)
- ksocknal_peer_failed(peer);
-
- /* The socket is closed on the final put; either here, or in
- * ksocknal_{send,recv}msg(). Since we set up the linger2 option
- * when the connection was established, this will close the socket
- * immediately, aborting anything buffered in it. Any hung
- * zero-copy transmits will therefore complete in finite time. */
- ksocknal_connsock_decref(conn);
-}
-
-void
-ksocknal_queue_zombie_conn (ksock_conn_t *conn)
-{
- /* Queue the conn for the reaper to destroy */
-
- LASSERT (atomic_read(&conn->ksnc_conn_refcount) == 0);
- spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
-
- list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
- cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq);
-
- spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
-}
-
-void
-ksocknal_destroy_conn (ksock_conn_t *conn)
-{
- /* Final coup-de-grace of the reaper */
- CDEBUG (D_NET, "connection %p\n", conn);
-
- LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0);
- LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0);
- LASSERT (conn->ksnc_sock == NULL);
- LASSERT (conn->ksnc_route == NULL);
- LASSERT (!conn->ksnc_tx_scheduled);
- LASSERT (!conn->ksnc_rx_scheduled);
- LASSERT (list_empty(&conn->ksnc_tx_queue));
-
- /* complete current receive if any */
- switch (conn->ksnc_rx_state) {
- case SOCKNAL_RX_LNET_PAYLOAD:
- CERROR("Completing partial receive from %s"
- ", ip %d.%d.%d.%d:%d, with error\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
- lnet_finalize (conn->ksnc_peer->ksnp_ni,
- conn->ksnc_cookie, -EIO);
- break;
- case SOCKNAL_RX_LNET_HEADER:
- if (conn->ksnc_rx_started)
- CERROR("Incomplete receive of lnet header from %s"
- ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
- conn->ksnc_proto->pro_version);
- break;
- case SOCKNAL_RX_KSM_HEADER:
- if (conn->ksnc_rx_started)
- CERROR("Incomplete receive of ksock message from %s"
- ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
- conn->ksnc_proto->pro_version);
- break;
- case SOCKNAL_RX_SLOP:
- if (conn->ksnc_rx_started)
- CERROR("Incomplete receive of slops from %s"
- ", ip %d.%d.%d.%d:%d, with error\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
- break;
- default:
- LBUG ();
- break;
- }
-
- ksocknal_peer_decref(conn->ksnc_peer);
-
- LIBCFS_FREE (conn, sizeof (*conn));
-}
-
-int
-ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why)
-{
- ksock_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
- conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
-
- if (ipaddr == 0 ||
- conn->ksnc_ipaddr == ipaddr) {
- count++;
- ksocknal_close_conn_locked (conn, why);
- }
- }
-
- return (count);
-}
-
-int
-ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
-{
- ksock_peer_t *peer = conn->ksnc_peer;
- __u32 ipaddr = conn->ksnc_ipaddr;
- int count;
-
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
- count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
-
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
- return (count);
-}
-
-int
-ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr)
-{
- ksock_peer_t *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- int count = 0;
-
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
- if (id.nid != LNET_NID_ANY)
- lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers;
- else {
- lo = 0;
- hi = ksocknal_data.ksnd_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
-
- peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
-
- if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
- (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
- continue;
-
- count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
- }
- }
-
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
- /* wildcards always succeed */
- if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0)
- return (0);
-
- return (count == 0 ? -ENOENT : 0);
-}
-
-void
-ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive)
-{
- /* The router is telling me she's been notified of a change in
- * gateway state.... */
- lnet_process_id_t id = {.nid = gw_nid, .pid = LNET_PID_ANY};
-
- CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
- alive ? "up" : "down");
-
- if (!alive) {
- /* If the gateway crashed, close all open connections... */
- ksocknal_close_matching_conns (id, 0);
- return;
- }
-
- /* ...otherwise do nothing. We can only establish new connections
- * if we have autroutes, and these connect on demand. */
-}
-
-void
-ksocknal_push_peer (ksock_peer_t *peer)
-{
- int index;
- int i;
- struct list_head *tmp;
- ksock_conn_t *conn;
-
- for (index = 0; ; index++) {
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- i = 0;
- conn = NULL;
-
- list_for_each (tmp, &peer->ksnp_conns) {
- if (i++ == index) {
- conn = list_entry (tmp, ksock_conn_t, ksnc_list);
- ksocknal_conn_addref(conn);
- break;
- }
- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- if (conn == NULL)
- break;
-
- ksocknal_lib_push_conn (conn);
- ksocknal_conn_decref(conn);
- }
-}
-
-int
-ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id)
-{
- ksock_peer_t *peer;
- struct list_head *tmp;
- int index;
- int i;
- int j;
- int rc = -ENOENT;
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- for (j = 0; ; j++) {
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- index = 0;
- peer = NULL;
-
- list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(tmp, ksock_peer_t,
- ksnp_list);
-
- if (!((id.nid == LNET_NID_ANY ||
- id.nid == peer->ksnp_id.nid) &&
- (id.pid == LNET_PID_ANY ||
- id.pid == peer->ksnp_id.pid))) {
- peer = NULL;
- continue;
- }
-
- if (index++ == j) {
- ksocknal_peer_addref(peer);
- break;
- }
- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- if (peer != NULL) {
- rc = 0;
- ksocknal_push_peer (peer);
- ksocknal_peer_decref(peer);
- }
- }
-
- }
-
- return (rc);
-}
-
-int
-ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
-{
- ksock_net_t *net = ni->ni_data;
- ksock_interface_t *iface;
- int rc;
- int i;
- int j;
- struct list_head *ptmp;
- ksock_peer_t *peer;
- struct list_head *rtmp;
- ksock_route_t *route;
-
- if (ipaddress == 0 ||
- netmask == 0)
- return (-EINVAL);
-
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
- iface = ksocknal_ip2iface(ni, ipaddress);
- if (iface != NULL) {
- /* silently ignore dups */
- rc = 0;
- } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
- rc = -ENOSPC;
- } else {
- iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
-
- iface->ksni_ipaddr = ipaddress;
- iface->ksni_netmask = netmask;
- iface->ksni_nroutes = 0;
- iface->ksni_npeers = 0;
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
-
- for (j = 0; j < peer->ksnp_n_passive_ips; j++)
- if (peer->ksnp_passive_ips[j] == ipaddress)
- iface->ksni_npeers++;
-
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, ksock_route_t, ksnr_list);
-
- if (route->ksnr_myipaddr == ipaddress)
- iface->ksni_nroutes++;
- }
- }
- }
-
- rc = 0;
- /* NB only new connections will pay attention to the new interface! */
- }
-
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
- return (rc);
-}
-
-void
-ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
-{
- struct list_head *tmp;
- struct list_head *nxt;
- ksock_route_t *route;
- ksock_conn_t *conn;
- int i;
- int j;
-
- for (i = 0; i < peer->ksnp_n_passive_ips; i++)
- if (peer->ksnp_passive_ips[i] == ipaddr) {
- for (j = i+1; j < peer->ksnp_n_passive_ips; j++)
- peer->ksnp_passive_ips[j-1] =
- peer->ksnp_passive_ips[j];
- peer->ksnp_n_passive_ips--;
- break;
- }
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
- route = list_entry (tmp, ksock_route_t, ksnr_list);
-
- if (route->ksnr_myipaddr != ipaddr)
- continue;
-
- if (route->ksnr_share_count != 0) {
- /* Manually created; keep, but unbind */
- route->ksnr_myipaddr = 0;
- } else {
- ksocknal_del_route_locked(route);
- }
- }
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-
- if (conn->ksnc_myipaddr == ipaddr)
- ksocknal_close_conn_locked (conn, 0);
- }
-}
-
-int
-ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
-{
- ksock_net_t *net = ni->ni_data;
- int rc = -ENOENT;
- struct list_head *tmp;
- struct list_head *nxt;
- ksock_peer_t *peer;
- __u32 this_ip;
- int i;
- int j;
-
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
-
- if (!(ipaddress == 0 ||
- ipaddress == this_ip))
- continue;
-
- rc = 0;
-
- for (j = i+1; j < net->ksnn_ninterfaces; j++)
- net->ksnn_interfaces[j-1] =
- net->ksnn_interfaces[j];
-
- net->ksnn_ninterfaces--;
-
- for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
- list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) {
- peer = list_entry(tmp, ksock_peer_t, ksnp_list);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- ksocknal_peer_del_interface_locked(peer, this_ip);
- }
- }
- }
-
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
- return (rc);
-}
-
-int
-ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc;
-
- switch(cmd) {
- case IOC_LIBCFS_GET_INTERFACE: {
- ksock_net_t *net = ni->ni_data;
- ksock_interface_t *iface;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- if (data->ioc_count < 0 ||
- data->ioc_count >= net->ksnn_ninterfaces) {
- rc = -ENOENT;
- } else {
- rc = 0;
- iface = &net->ksnn_interfaces[data->ioc_count];
-
- data->ioc_u32[0] = iface->ksni_ipaddr;
- data->ioc_u32[1] = iface->ksni_netmask;
- data->ioc_u32[2] = iface->ksni_npeers;
- data->ioc_u32[3] = iface->ksni_nroutes;
- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return rc;
- }
-
- case IOC_LIBCFS_ADD_INTERFACE:
- return ksocknal_add_interface(ni,
- data->ioc_u32[0], /* IP address */
- data->ioc_u32[1]); /* net mask */
-
- case IOC_LIBCFS_DEL_INTERFACE:
- return ksocknal_del_interface(ni,
- data->ioc_u32[0]); /* IP address */
-
- case IOC_LIBCFS_GET_PEER: {
- lnet_process_id_t id = {0,};
- __u32 myip = 0;
- __u32 ip = 0;
- int port = 0;
- int conn_count = 0;
- int share_count = 0;
-
- rc = ksocknal_get_peer_info(ni, data->ioc_count,
- &id, &myip, &ip, &port,
- &conn_count, &share_count);
- if (rc != 0)
- return rc;
-
- data->ioc_nid = id.nid;
- data->ioc_count = share_count;
- data->ioc_u32[0] = ip;
- data->ioc_u32[1] = port;
- data->ioc_u32[2] = myip;
- data->ioc_u32[3] = conn_count;
- data->ioc_u32[4] = id.pid;
- return 0;
- }
-
- case IOC_LIBCFS_ADD_PEER: {
- lnet_process_id_t id = {.nid = data->ioc_nid,
- .pid = LUSTRE_SRV_LNET_PID};
- return ksocknal_add_peer (ni, id,
- data->ioc_u32[0], /* IP */
- data->ioc_u32[1]); /* port */
- }
- case IOC_LIBCFS_DEL_PEER: {
- lnet_process_id_t id = {.nid = data->ioc_nid,
- .pid = LNET_PID_ANY};
- return ksocknal_del_peer (ni, id,
- data->ioc_u32[0]); /* IP */
- }
- case IOC_LIBCFS_GET_CONN: {
- int txmem;
- int rxmem;
- int nagle;
- ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count);
-
- if (conn == NULL)
- return -ENOENT;
-
- ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
-
- data->ioc_count = txmem;
- data->ioc_nid = conn->ksnc_peer->ksnp_id.nid;
- data->ioc_flags = nagle;
- data->ioc_u32[0] = conn->ksnc_ipaddr;
- data->ioc_u32[1] = conn->ksnc_port;
- data->ioc_u32[2] = conn->ksnc_myipaddr;
- data->ioc_u32[3] = conn->ksnc_type;
- data->ioc_u32[4] = conn->ksnc_scheduler -
- ksocknal_data.ksnd_schedulers;
- data->ioc_u32[5] = rxmem;
- data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
- ksocknal_conn_decref(conn);
- return 0;
- }
-
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- lnet_process_id_t id = {.nid = data->ioc_nid,
- .pid = LNET_PID_ANY};
-
- return ksocknal_close_matching_conns (id,
- data->ioc_u32[0]);
- }
- case IOC_LIBCFS_REGISTER_MYNID:
- /* Ignore if this is a noop */
- if (data->ioc_nid == ni->ni_nid)
- return 0;
-
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- return -EINVAL;
-
- case IOC_LIBCFS_PUSH_CONNECTION: {
- lnet_process_id_t id = {.nid = data->ioc_nid,
- .pid = LNET_PID_ANY};
-
- return ksocknal_push(ni, id);
- }
- default:
- return -EINVAL;
- }
- /* not reached */
-}
-
-void
-ksocknal_free_buffers (void)
-{
- LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0);
-
- if (ksocknal_data.ksnd_schedulers != NULL)
- LIBCFS_FREE (ksocknal_data.ksnd_schedulers,
- sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
-
- LIBCFS_FREE (ksocknal_data.ksnd_peers,
- sizeof (struct list_head) *
- ksocknal_data.ksnd_peer_hash_size);
-
- spin_lock(&ksocknal_data.ksnd_tx_lock);
-
- if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
- struct list_head zlist;
- ksock_tx_t *tx;
-
- list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
- list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
-
- while(!list_empty(&zlist)) {
- tx = list_entry(zlist.next, ksock_tx_t, tx_list);
- list_del(&tx->tx_list);
- LIBCFS_FREE(tx, tx->tx_desc_size);
- }
- } else {
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
- }
-}
-
-void
-ksocknal_base_shutdown (void)
-{
- ksock_sched_t *sched;
- int i;
-
- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
- LASSERT (ksocknal_data.ksnd_nnets == 0);
-
- switch (ksocknal_data.ksnd_init) {
- default:
- LASSERT (0);
-
- case SOCKNAL_INIT_ALL:
- case SOCKNAL_INIT_DATA:
- LASSERT (ksocknal_data.ksnd_peers != NULL);
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- LASSERT (list_empty (&ksocknal_data.ksnd_peers[i]));
- }
- LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns));
- LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns));
- LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs));
- LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes));
-
- if (ksocknal_data.ksnd_schedulers != NULL)
- for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
- ksock_sched_t *kss =
- &ksocknal_data.ksnd_schedulers[i];
-
- LASSERT (list_empty (&kss->kss_tx_conns));
- LASSERT (list_empty (&kss->kss_rx_conns));
- LASSERT (list_empty (&kss->kss_zombie_noop_txs));
- LASSERT (kss->kss_nconns == 0);
- }
-
- /* flag threads to terminate; wake and wait for them to die */
- ksocknal_data.ksnd_shuttingdown = 1;
- cfs_waitq_broadcast (&ksocknal_data.ksnd_connd_waitq);
- cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq);
-
- if (ksocknal_data.ksnd_schedulers != NULL)
- for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
- sched = &ksocknal_data.ksnd_schedulers[i];
- cfs_waitq_broadcast(&sched->kss_waitq);
- }
-
- i = 4;
- read_lock (&ksocknal_data.ksnd_global_lock);
- while (ksocknal_data.ksnd_nthreads != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d threads to terminate\n",
- ksocknal_data.ksnd_nthreads);
- read_unlock (&ksocknal_data.ksnd_global_lock);
- cfs_pause(cfs_time_seconds(1));
- read_lock (&ksocknal_data.ksnd_global_lock);
- }
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- ksocknal_free_buffers();
-
- ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
- break;
- }
-
- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- PORTAL_MODULE_UNUSE;
-}
-
-
-__u64
-ksocknal_new_incarnation (void)
-{
- struct timeval tv;
-
- /* The incarnation number is the time this module loaded and it
- * identifies this particular instance of the socknal. Hopefully
- * we won't be able to reboot more frequently than 1MHz for the
- * forseeable future :) */
-
- do_gettimeofday(&tv);
-
- return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-}
-
-int
-ksocknal_base_startup (void)
-{
- int rc;
- int i;
-
- LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
- LASSERT (ksocknal_data.ksnd_nnets == 0);
-
- memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
-
- ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
- LIBCFS_ALLOC (ksocknal_data.ksnd_peers,
- sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
- if (ksocknal_data.ksnd_peers == NULL)
- return -ENOMEM;
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
- CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
-
- rwlock_init(&ksocknal_data.ksnd_global_lock);
-
- spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
- CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
- CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
- CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
- cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq);
-
- spin_lock_init (&ksocknal_data.ksnd_connd_lock);
- CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs);
- CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes);
- cfs_waitq_init(&ksocknal_data.ksnd_connd_waitq);
-
- spin_lock_init (&ksocknal_data.ksnd_tx_lock);
- CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_idle_noop_txs);
-
- /* NB memset above zeros whole of ksocknal_data, including
- * ksocknal_data.ksnd_irqinfo[all].ksni_valid */
-
- /* flag lists/ptrs/locks initialised */
- ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
- PORTAL_MODULE_USE;
-
- ksocknal_data.ksnd_nschedulers = ksocknal_nsched();
- LIBCFS_ALLOC(ksocknal_data.ksnd_schedulers,
- sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
- if (ksocknal_data.ksnd_schedulers == NULL)
- goto failed;
-
- for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
- ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i];
-
- spin_lock_init (&kss->kss_lock);
- CFS_INIT_LIST_HEAD (&kss->kss_rx_conns);
- CFS_INIT_LIST_HEAD (&kss->kss_tx_conns);
- CFS_INIT_LIST_HEAD (&kss->kss_zombie_noop_txs);
- cfs_waitq_init (&kss->kss_waitq);
- }
-
- for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
- rc = ksocknal_thread_start (ksocknal_scheduler,
- &ksocknal_data.ksnd_schedulers[i]);
- if (rc != 0) {
- CERROR("Can't spawn socknal scheduler[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- /* must have at least 2 connds to remain responsive to accepts while
- * connecting */
- if (*ksocknal_tunables.ksnd_nconnds < 2)
- *ksocknal_tunables.ksnd_nconnds = 2;
-
- for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
- rc = ksocknal_thread_start (ksocknal_connd, (void *)((long)i));
- if (rc != 0) {
- CERROR("Can't spawn socknal connd: %d\n", rc);
- goto failed;
- }
- }
-
- rc = ksocknal_thread_start (ksocknal_reaper, NULL);
- if (rc != 0) {
- CERROR ("Can't spawn socknal reaper: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
-
- return 0;
-
- failed:
- ksocknal_base_shutdown();
- return -ENETDOWN;
-}
-
-void
-ksocknal_debug_peerhash (lnet_ni_t *ni)
-{
- ksock_peer_t *peer = NULL;
- struct list_head *tmp;
- int i;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry (tmp, ksock_peer_t, ksnp_list);
-
- if (peer->ksnp_ni == ni) break;
-
- peer = NULL;
- }
- }
-
- if (peer != NULL) {
- ksock_route_t *route;
- ksock_conn_t *conn;
-
- CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, "
- "closing %d, accepting %d, err %d, zcookie "LPU64", "
- "txq %d, zc_req %d\n", libcfs_id2str(peer->ksnp_id),
- atomic_read(&peer->ksnp_refcount),
- peer->ksnp_sharecount, peer->ksnp_closing,
- peer->ksnp_accepting, peer->ksnp_error,
- peer->ksnp_zc_next_cookie,
- !list_empty(&peer->ksnp_tx_queue),
- !list_empty(&peer->ksnp_zc_req_list));
-
- list_for_each (tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, ksock_route_t, ksnr_list);
- CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, "
- "del %d\n", atomic_read(&route->ksnr_refcount),
- route->ksnr_scheduled, route->ksnr_connecting,
- route->ksnr_connected, route->ksnr_deleted);
- }
-
- list_for_each (tmp, &peer->ksnp_conns) {
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
- CWARN ("Conn: ref %d, sref %d, t %d, c %d\n",
- atomic_read(&conn->ksnc_conn_refcount),
- atomic_read(&conn->ksnc_sock_refcount),
- conn->ksnc_type, conn->ksnc_closing);
- }
- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return;
-}
-
-void
-ksocknal_shutdown (lnet_ni_t *ni)
-{
- ksock_net_t *net = ni->ni_data;
- int i;
- lnet_process_id_t anyid = {.nid = LNET_NID_ANY,
- .pid = LNET_PID_ANY};
-
- LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
- LASSERT(ksocknal_data.ksnd_nnets > 0);
-
- spin_lock_bh (&net->ksnn_lock);
- net->ksnn_shutdown = 1; /* prevent new peers */
- spin_unlock_bh (&net->ksnn_lock);
-
- /* Delete all peers */
- ksocknal_del_peer(ni, anyid, 0);
-
- /* Wait for all peer state to clean up */
- i = 2;
- spin_lock_bh (&net->ksnn_lock);
- while (net->ksnn_npeers != 0) {
- spin_unlock_bh (&net->ksnn_lock);
-
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d peers to disconnect\n",
- net->ksnn_npeers);
- cfs_pause(cfs_time_seconds(1));
-
- ksocknal_debug_peerhash(ni);
-
- spin_lock_bh (&net->ksnn_lock);
- }
- spin_unlock_bh (&net->ksnn_lock);
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0);
- LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0);
- }
-
- LIBCFS_FREE(net, sizeof(*net));
-
- ksocknal_data.ksnd_nnets--;
- if (ksocknal_data.ksnd_nnets == 0)
- ksocknal_base_shutdown();
-}
-
-int
-ksocknal_enumerate_interfaces(ksock_net_t *net)
-{
- char **names;
- int i;
- int j;
- int rc;
- int n;
-
- n = libcfs_ipif_enumerate(&names);
- if (n <= 0) {
- CERROR("Can't enumerate interfaces: %d\n", n);
- return n;
- }
-
- for (i = j = 0; i < n; i++) {
- int up;
- __u32 ip;
- __u32 mask;
-
- if (!strcmp(names[i], "lo")) /* skip the loopback IF */
- continue;
-
- rc = libcfs_ipif_query(names[i], &up, &ip, &mask);
- if (rc != 0) {
- CWARN("Can't get interface %s info: %d\n",
- names[i], rc);
- continue;
- }
-
- if (!up) {
- CWARN("Ignoring interface %s (down)\n",
- names[i]);
- continue;
- }
-
- if (j == LNET_MAX_INTERFACES) {
- CWARN("Ignoring interface %s (too many interfaces)\n",
- names[i]);
- continue;
- }
-
- net->ksnn_interfaces[j].ksni_ipaddr = ip;
- net->ksnn_interfaces[j].ksni_netmask = mask;
- j++;
- }
-
- libcfs_ipif_free_enumeration(names, n);
-
- if (j == 0)
- CERROR("Can't find any usable interfaces\n");
-
- return j;
-}
-
-int
-ksocknal_startup (lnet_ni_t *ni)
-{
- ksock_net_t *net;
- int rc;
- int i;
-
- LASSERT (ni->ni_lnd == &the_ksocklnd);
-
- if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
- rc = ksocknal_base_startup();
- if (rc != 0)
- return rc;
- }
-
- LIBCFS_ALLOC(net, sizeof(*net));
- if (net == NULL)
- goto fail_0;
-
- memset(net, 0, sizeof(*net));
- spin_lock_init(&net->ksnn_lock);
- net->ksnn_incarnation = ksocknal_new_incarnation();
- ni->ni_data = net;
- ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
- ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peercredits;
-
- if (ni->ni_interfaces[0] == NULL) {
- rc = ksocknal_enumerate_interfaces(net);
- if (rc <= 0)
- goto fail_1;
-
- net->ksnn_ninterfaces = 1;
- } else {
- for (i = 0; i < LNET_MAX_INTERFACES; i++) {
- int up;
-
- if (ni->ni_interfaces[i] == NULL)
- break;
-
- rc = libcfs_ipif_query(
- ni->ni_interfaces[i], &up,
- &net->ksnn_interfaces[i].ksni_ipaddr,
- &net->ksnn_interfaces[i].ksni_netmask);
-
- if (rc != 0) {
- CERROR("Can't get interface %s info: %d\n",
- ni->ni_interfaces[i], rc);
- goto fail_1;
- }
-
- if (!up) {
- CERROR("Interface %s is down\n",
- ni->ni_interfaces[i]);
- goto fail_1;
- }
- }
- net->ksnn_ninterfaces = i;
- }
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
- net->ksnn_interfaces[0].ksni_ipaddr);
-
- ksocknal_data.ksnd_nnets++;
-
- return 0;
-
- fail_1:
- LIBCFS_FREE(net, sizeof(*net));
- fail_0:
- if (ksocknal_data.ksnd_nnets == 0)
- ksocknal_base_shutdown();
-
- return -ENETDOWN;
-}
-
-
-void __exit
-ksocknal_module_fini (void)
-{
- lnet_unregister_lnd(&the_ksocklnd);
- ksocknal_lib_tunables_fini();
-}
-
-int __init
-ksocknal_module_init (void)
-{
- int rc;
-
- /* check ksnr_connected/connecting field large enough */
- CLASSERT(SOCKLND_CONN_NTYPES <= 4);
-
- rc = ksocknal_lib_tunables_init();
- if (rc != 0)
- return rc;
-
- lnet_register_lnd(&the_ksocklnd);
-
- return 0;
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel TCP Socket LND v2.0.0");
-MODULE_LICENSE("GPL");
-
-cfs_module(ksocknal, "2.0.0", ksocknal_module_init, ksocknal_module_fini);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#define DEBUG_PORTAL_ALLOC
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#if defined(__linux__)
-#include "socklnd_lib-linux.h"
-#elif defined(__APPLE__)
-#include "socklnd_lib-darwin.h"
-#elif defined(__WINNT__)
-#include "socklnd_lib-winnt.h"
-#else
-#error Unsupported Operating System
-#endif
-
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-#include <lnet/socklnd.h>
-
-#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
-#define SOCKNAL_ENOMEM_RETRY CFS_TICK /* jiffies between retries */
-
-#define SOCKNAL_ROUND_ROBIN 0 /* round robin / load balance */
-
-#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */
-#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */
-
-#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */
-
-/* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
- * no risk if we're not running on a CONFIG_HIGHMEM platform. */
-#ifdef CONFIG_HIGHMEM
-# define SOCKNAL_RISK_KMAP_DEADLOCK 0
-#else
-# define SOCKNAL_RISK_KMAP_DEADLOCK 1
-#endif
-
-typedef struct /* per scheduler state */
-{
- spinlock_t kss_lock; /* serialise */
- struct list_head kss_rx_conns; /* conn waiting to be read */
- struct list_head kss_tx_conns; /* conn waiting to be written */
- struct list_head kss_zombie_noop_txs; /* zombie noop tx list */
- cfs_waitq_t kss_waitq; /* where scheduler sleeps */
- int kss_nconns; /* # connections assigned to this scheduler */
-} ksock_sched_t;
-
-typedef struct
-{
- unsigned int ksni_valid:1; /* been set yet? */
- unsigned int ksni_bound:1; /* bound to a cpu yet? */
- unsigned int ksni_sched:6; /* which scheduler (assumes < 64) */
-} ksock_irqinfo_t;
-
-typedef struct /* in-use interface */
-{
- __u32 ksni_ipaddr; /* interface's IP address */
- __u32 ksni_netmask; /* interface's network mask */
- int ksni_nroutes; /* # routes using (active) */
- int ksni_npeers; /* # peers using (passive) */
- char ksni_name[16]; /* interface name */
-} ksock_interface_t;
-
-typedef struct
-{
- int *ksnd_timeout; /* "stuck" socket timeout (seconds) */
- int *ksnd_nconnds; /* # connection daemons */
- int *ksnd_min_reconnectms; /* first connection retry after (ms)... */
- int *ksnd_max_reconnectms; /* ...exponentially increasing to this */
- int *ksnd_eager_ack; /* make TCP ack eagerly? */
- int *ksnd_typed_conns; /* drive sockets by type? */
- int *ksnd_min_bulk; /* smallest "large" message */
- int *ksnd_tx_buffer_size; /* socket tx buffer size */
- int *ksnd_rx_buffer_size; /* socket rx buffer size */
- int *ksnd_nagle; /* enable NAGLE? */
- int *ksnd_keepalive_idle; /* # idle secs before 1st probe */
- int *ksnd_keepalive_count; /* # probes */
- int *ksnd_keepalive_intvl; /* time between probes */
- int *ksnd_credits; /* # concurrent sends */
- int *ksnd_peercredits; /* # concurrent sends to 1 peer */
- int *ksnd_enable_csum; /* enable check sum */
- int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
- unsigned int *ksnd_zc_min_frag; /* minimum zero copy frag size */
-#ifdef CPU_AFFINITY
- int *ksnd_irq_affinity; /* enable IRQ affinity? */
-#endif
-#ifdef SOCKNAL_BACKOFF
- int *ksnd_backoff_init; /* initial TCP backoff */
- int *ksnd_backoff_max; /* maximum TCP backoff */
-#endif
-#if SOCKNAL_VERSION_DEBUG
- int *ksnd_protocol; /* protocol version */
-#endif
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *ksnd_sysctl; /* sysctl interface */
-#endif
-} ksock_tunables_t;
-
-typedef struct
-{
- __u64 ksnn_incarnation; /* my epoch */
- spinlock_t ksnn_lock; /* serialise */
- int ksnn_npeers; /* # peers */
- int ksnn_shutdown; /* shutting down? */
- int ksnn_ninterfaces; /* IP interfaces */
- ksock_interface_t ksnn_interfaces[LNET_MAX_INTERFACES];
-} ksock_net_t;
-
-typedef struct
-{
- int ksnd_init; /* initialisation state */
- int ksnd_nnets; /* # networks set up */
-
- rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */
- struct list_head *ksnd_peers; /* hash table of all my known peers */
- int ksnd_peer_hash_size; /* size of ksnd_peers */
-
- int ksnd_nthreads; /* # live threads */
- int ksnd_shuttingdown; /* tell threads to exit */
- int ksnd_nschedulers; /* # schedulers */
- ksock_sched_t *ksnd_schedulers; /* their state */
-
- atomic_t ksnd_nactive_txs; /* #active txs */
-
- struct list_head ksnd_deathrow_conns; /* conns to close: reaper_lock*/
- struct list_head ksnd_zombie_conns; /* conns to free: reaper_lock */
- struct list_head ksnd_enomem_conns; /* conns to retry: reaper_lock*/
- cfs_waitq_t ksnd_reaper_waitq; /* reaper sleeps here */
- cfs_time_t ksnd_reaper_waketime; /* when reaper will wake */
- spinlock_t ksnd_reaper_lock; /* serialise */
-
- int ksnd_enomem_tx; /* test ENOMEM sender */
- int ksnd_stall_tx; /* test sluggish sender */
- int ksnd_stall_rx; /* test sluggish receiver */
-
- struct list_head ksnd_connd_connreqs; /* incoming connection requests */
- struct list_head ksnd_connd_routes; /* routes waiting to be connected */
- cfs_waitq_t ksnd_connd_waitq; /* connds sleep here */
- int ksnd_connd_connecting;/* # connds connecting */
- spinlock_t ksnd_connd_lock; /* serialise */
-
- struct list_head ksnd_idle_noop_txs; /* list head for freed noop tx */
- spinlock_t ksnd_tx_lock; /* serialise, NOT safe in g_lock */
-
- ksock_irqinfo_t ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */
-
-} ksock_nal_data_t;
-
-#define SOCKNAL_INIT_NOTHING 0
-#define SOCKNAL_INIT_DATA 1
-#define SOCKNAL_INIT_ALL 2
-
-/* A packet just assembled for transmission is represented by 1 or more
- * struct iovec fragments (the first frag contains the portals header),
- * followed by 0 or more lnet_kiov_t fragments.
- *
- * On the receive side, initially 1 struct iovec fragment is posted for
- * receive (the header). Once the header has been received, the payload is
- * received into either struct iovec or lnet_kiov_t fragments, depending on
- * what the header matched or whether the message needs forwarding. */
-
-struct ksock_conn; /* forward ref */
-struct ksock_peer; /* forward ref */
-struct ksock_route; /* forward ref */
-struct ksock_proto; /* forward ref */
-
-typedef struct /* transmit packet */
-{
- struct list_head tx_list; /* queue on conn for transmission etc */
- struct list_head tx_zc_list; /* queue on peer for ZC request */
- atomic_t tx_refcount; /* tx reference count */
- int tx_nob; /* # packet bytes */
- int tx_resid; /* residual bytes */
- int tx_niov; /* # packet iovec frags */
- struct iovec *tx_iov; /* packet iovec frags */
- int tx_nkiov; /* # packet page frags */
- unsigned int tx_checked_zc; /* Have I checked if I should ZC? */
- lnet_kiov_t *tx_kiov; /* packet page frags */
- struct ksock_conn *tx_conn; /* owning conn */
- lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */
- ksock_msg_t tx_msg; /* socklnd message buffer */
- int tx_desc_size; /* size of this descriptor */
- union {
- struct {
- struct iovec iov; /* virt hdr */
- lnet_kiov_t kiov[0]; /* paged payload */
- } paged;
- struct {
- struct iovec iov[1]; /* virt hdr + payload */
- } virt;
- } tx_frags;
-} ksock_tx_t;
-
-#define KSOCK_NOOP_TX_SIZE offsetof(ksock_tx_t, tx_frags.paged.kiov[0])
-
-/* network zero copy callback descriptor embedded in ksock_tx_t */
-
-/* space for the rx frag descriptors; we either read a single contiguous
- * header, or up to LNET_MAX_IOV frags of payload of either type. */
-typedef union {
- struct iovec iov[LNET_MAX_IOV];
- lnet_kiov_t kiov[LNET_MAX_IOV];
-} ksock_rxiovspace_t;
-
-#define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */
-#define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */
-#define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */
-#define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */
-#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */
-#define SOCKNAL_RX_SLOP 6 /* skipping body */
-
-typedef struct ksock_conn
-{
- struct ksock_peer *ksnc_peer; /* owning peer */
- struct ksock_route *ksnc_route; /* owning route */
- struct list_head ksnc_list; /* stash on peer's conn list */
- cfs_socket_t *ksnc_sock; /* actual socket */
- void *ksnc_saved_data_ready; /* socket's original data_ready() callback */
- void *ksnc_saved_write_space; /* socket's original write_space() callback */
- atomic_t ksnc_conn_refcount; /* conn refcount */
- atomic_t ksnc_sock_refcount; /* sock refcount */
- ksock_sched_t *ksnc_scheduler; /* who schedules this connection */
- __u32 ksnc_myipaddr; /* my IP */
- __u32 ksnc_ipaddr; /* peer's IP */
- int ksnc_port; /* peer's port */
- int ksnc_type:3; /* type of connection, should be signed value */
- int ksnc_closing:1; /* being shut down */
- int ksnc_flip:1; /* flip or not, only for V2.x */
- int ksnc_zc_capable:1; /* enable to ZC */
- struct ksock_proto *ksnc_proto; /* protocol for the connection */
-
- /* reader */
- struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
- cfs_time_t ksnc_rx_deadline; /* when (in jiffies) receive times out */
- __u8 ksnc_rx_started; /* started receiving a message */
- __u8 ksnc_rx_ready; /* data ready to read */
- __u8 ksnc_rx_scheduled; /* being progressed */
- __u8 ksnc_rx_state; /* what is being read */
- int ksnc_rx_nob_left; /* # bytes to next hdr/body */
- int ksnc_rx_nob_wanted; /* bytes actually wanted */
- int ksnc_rx_niov; /* # iovec frags */
- struct iovec *ksnc_rx_iov; /* the iovec frags */
- int ksnc_rx_nkiov; /* # page frags */
- lnet_kiov_t *ksnc_rx_kiov; /* the page frags */
- ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */
- __u32 ksnc_rx_csum; /* partial checksum for incoming data */
- void *ksnc_cookie; /* rx lnet_finalize passthru arg */
- ksock_msg_t ksnc_msg; /* incoming message buffer:
- * V2.x message takes the whole struct
- * V1.x message is a bare lnet_hdr_t, it's stored
- * in ksnc_msg.ksm_u.lnetmsg */
-
- /* WRITER */
- struct list_head ksnc_tx_list; /* where I enq waiting for output space */
- struct list_head ksnc_tx_queue; /* packets waiting to be sent */
- ksock_tx_t *ksnc_tx_mono; /* V2.x only, next mono-packet, mono-packet is :
- * a. lnet packet without piggyback
- * b. noop ZC-ACK packet */
- cfs_time_t ksnc_tx_deadline; /* when (in jiffies) tx times out */
- int ksnc_tx_bufnob; /* send buffer marker */
- atomic_t ksnc_tx_nob; /* # bytes queued */
- int ksnc_tx_ready; /* write space */
- int ksnc_tx_scheduled; /* being progressed */
-
-#if !SOCKNAL_SINGLE_FRAG_RX
- struct iovec ksnc_rx_scratch_iov[LNET_MAX_IOV];
-#endif
-#if !SOCKNAL_SINGLE_FRAG_TX
- struct iovec ksnc_tx_scratch_iov[LNET_MAX_IOV];
-#endif
-} ksock_conn_t;
-
-typedef struct ksock_route
-{
- struct list_head ksnr_list; /* chain on peer route list */
- struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */
- struct ksock_peer *ksnr_peer; /* owning peer */
- atomic_t ksnr_refcount; /* # users */
- cfs_time_t ksnr_timeout; /* when (in jiffies) reconnection can happen next */
- cfs_duration_t ksnr_retry_interval; /* how long between retries */
- __u32 ksnr_myipaddr; /* my IP */
- __u32 ksnr_ipaddr; /* IP address to connect to */
- int ksnr_port; /* port to connect to */
- unsigned int ksnr_scheduled:1; /* scheduled for attention */
- unsigned int ksnr_connecting:1; /* connection establishment in progress */
- unsigned int ksnr_connected:4; /* connections established by type */
- unsigned int ksnr_deleted:1; /* been removed from peer? */
- unsigned int ksnr_share_count; /* created explicitly? */
- int ksnr_conn_count; /* # conns established by this route */
-} ksock_route_t;
-
-typedef struct ksock_peer
-{
- struct list_head ksnp_list; /* stash on global peer list */
- lnet_process_id_t ksnp_id; /* who's on the other end(s) */
- atomic_t ksnp_refcount; /* # users */
- int ksnp_sharecount; /* lconf usage counter */
- int ksnp_closing; /* being closed */
- int ksnp_accepting; /* # passive connections pending */
- int ksnp_error; /* errno on closing last conn */
- __u64 ksnp_zc_next_cookie;/* ZC completion cookie */
- __u64 ksnp_incarnation; /* latest known peer incarnation */
- struct ksock_proto *ksnp_proto; /* latest known peer protocol */
- struct list_head ksnp_conns; /* all active connections */
- struct list_head ksnp_routes; /* routes */
- struct list_head ksnp_tx_queue; /* waiting packets */
- spinlock_t ksnp_lock; /* serialize, NOT safe in g_lock */
- struct list_head ksnp_zc_req_list; /* zero copy requests wait for ACK */
- cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */
- lnet_ni_t *ksnp_ni; /* which network */
- int ksnp_n_passive_ips; /* # of... */
- __u32 ksnp_passive_ips[LNET_MAX_INTERFACES]; /* preferred local interfaces */
-} ksock_peer_t;
-
-typedef struct ksock_connreq
-{
- struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */
- lnet_ni_t *ksncr_ni; /* chosen NI */
- cfs_socket_t *ksncr_sock; /* accepted socket */
-} ksock_connreq_t;
-
-extern ksock_nal_data_t ksocknal_data;
-extern ksock_tunables_t ksocknal_tunables;
-
-typedef struct ksock_proto
-{
- int pro_version; /* version number of protocol */
- int (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *); /* handshake function */
- int (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int);/* handshake function */
- void (*pro_pack)(ksock_tx_t *); /* message pack */
- void (*pro_unpack)(ksock_msg_t *); /* message unpack */
-} ksock_proto_t;
-
-extern ksock_proto_t ksocknal_protocol_v1x;
-extern ksock_proto_t ksocknal_protocol_v2x;
-
-#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
-#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
-#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR
-
-#ifndef CPU_MASK_NONE
-#define CPU_MASK_NONE 0UL
-#endif
-
-static inline int
-ksocknal_route_mask(void)
-{
- if (!*ksocknal_tunables.ksnd_typed_conns)
- return (1 << SOCKLND_CONN_ANY);
-
- return ((1 << SOCKLND_CONN_CONTROL) |
- (1 << SOCKLND_CONN_BULK_IN) |
- (1 << SOCKLND_CONN_BULK_OUT));
-}
-
-static inline struct list_head *
-ksocknal_nid2peerlist (lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
-
- return (&ksocknal_data.ksnd_peers [hash]);
-}
-
-static inline void
-ksocknal_conn_addref (ksock_conn_t *conn)
-{
- LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
- atomic_inc(&conn->ksnc_conn_refcount);
-}
-
-extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn);
-
-static inline void
-ksocknal_conn_decref (ksock_conn_t *conn)
-{
- LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
- if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
- ksocknal_queue_zombie_conn(conn);
-}
-
-static inline int
-ksocknal_connsock_addref (ksock_conn_t *conn)
-{
- int rc = -ESHUTDOWN;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
- if (!conn->ksnc_closing) {
- LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0);
- atomic_inc(&conn->ksnc_sock_refcount);
- rc = 0;
- }
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- return (rc);
-}
-
-static inline void
-ksocknal_connsock_decref (ksock_conn_t *conn)
-{
- LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0);
- if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
- LASSERT (conn->ksnc_closing);
- libcfs_sock_release(conn->ksnc_sock);
- conn->ksnc_sock = NULL;
- }
-}
-
-static inline void
-ksocknal_tx_addref (ksock_tx_t *tx)
-{
- LASSERT (atomic_read(&tx->tx_refcount) > 0);
- atomic_inc(&tx->tx_refcount);
-}
-
-extern void ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx);
-
-static inline void
-ksocknal_tx_decref (ksock_tx_t *tx)
-{
- LASSERT (atomic_read(&tx->tx_refcount) > 0);
- if (atomic_dec_and_test(&tx->tx_refcount))
- ksocknal_tx_done(NULL, tx);
-}
-
-static inline void
-ksocknal_route_addref (ksock_route_t *route)
-{
- LASSERT (atomic_read(&route->ksnr_refcount) > 0);
- atomic_inc(&route->ksnr_refcount);
-}
-
-extern void ksocknal_destroy_route (ksock_route_t *route);
-
-static inline void
-ksocknal_route_decref (ksock_route_t *route)
-{
- LASSERT (atomic_read (&route->ksnr_refcount) > 0);
- if (atomic_dec_and_test(&route->ksnr_refcount))
- ksocknal_destroy_route (route);
-}
-
-static inline void
-ksocknal_peer_addref (ksock_peer_t *peer)
-{
- LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
- atomic_inc(&peer->ksnp_refcount);
-}
-
-extern void ksocknal_destroy_peer (ksock_peer_t *peer);
-
-static inline void
-ksocknal_peer_decref (ksock_peer_t *peer)
-{
- LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
- if (atomic_dec_and_test(&peer->ksnp_refcount))
- ksocknal_destroy_peer (peer);
-}
-
-int ksocknal_startup (lnet_ni_t *ni);
-void ksocknal_shutdown (lnet_ni_t *ni);
-int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int ksocknal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-int ksocknal_accept(lnet_ni_t *ni, cfs_socket_t *sock);
-
-extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
-extern ksock_peer_t *ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id);
-extern ksock_peer_t *ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id);
-extern void ksocknal_peer_failed (ksock_peer_t *peer);
-extern int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
- cfs_socket_t *sock, int type);
-extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
-extern void ksocknal_terminate_conn (ksock_conn_t *conn);
-extern void ksocknal_destroy_conn (ksock_conn_t *conn);
-extern int ksocknal_close_peer_conns_locked (ksock_peer_t *peer,
- __u32 ipaddr, int why);
-extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
-extern int ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr);
-
-extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
-extern void ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int error);
-extern void ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive);
-extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg);
-extern void ksocknal_thread_fini (void);
-extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_t *peer);
-extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
-extern int ksocknal_scheduler (void *arg);
-extern int ksocknal_connd (void *arg);
-extern int ksocknal_reaper (void *arg);
-extern int ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn,
- lnet_nid_t peer_nid, ksock_hello_msg_t *hello);
-extern int ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
- ksock_hello_msg_t *hello, lnet_process_id_t *id,
- __u64 *incarnation);
-extern void ksocknal_read_callback(ksock_conn_t *conn);
-extern void ksocknal_write_callback(ksock_conn_t *conn);
-
-extern int ksocknal_lib_zc_capable(cfs_socket_t *sock);
-extern void ksocknal_lib_save_callback(cfs_socket_t *sock, ksock_conn_t *conn);
-extern void ksocknal_lib_set_callback(cfs_socket_t *sock, ksock_conn_t *conn);
-extern void ksocknal_lib_reset_callback(cfs_socket_t *sock, ksock_conn_t *conn);
-extern void ksocknal_lib_push_conn (ksock_conn_t *conn);
-extern void ksocknal_lib_bind_irq (unsigned int irq);
-extern int ksocknal_lib_get_conn_addrs (ksock_conn_t *conn);
-extern unsigned int ksocknal_lib_sock_irq (cfs_socket_t *sock);
-extern int ksocknal_lib_setup_sock (cfs_socket_t *so);
-extern int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx);
-extern int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx);
-extern void ksocknal_lib_eager_ack (ksock_conn_t *conn);
-extern int ksocknal_lib_recv_iov (ksock_conn_t *conn);
-extern int ksocknal_lib_recv_kiov (ksock_conn_t *conn);
-extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem,
- int *rxmem, int *nagle);
-
-extern int ksocknal_lib_tunables_init(void);
-extern void ksocknal_lib_tunables_fini(void);
-
-extern void ksocknal_lib_csum_tx(ksock_tx_t *tx);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "socklnd.h"
-
-ksock_tx_t *
-ksocknal_alloc_tx (int size)
-{
- ksock_tx_t *tx = NULL;
-
- if (size == KSOCK_NOOP_TX_SIZE) {
- /* searching for a noop tx in free list */
- spin_lock(&ksocknal_data.ksnd_tx_lock);
-
- if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
- tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next,
- ksock_tx_t, tx_list);
- LASSERT(tx->tx_desc_size == size);
- list_del(&tx->tx_list);
- }
-
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
- }
-
- if (tx == NULL)
- LIBCFS_ALLOC(tx, size);
-
- if (tx == NULL)
- return NULL;
-
- atomic_set(&tx->tx_refcount, 1);
- tx->tx_desc_size = size;
- atomic_inc(&ksocknal_data.ksnd_nactive_txs);
-
- return tx;
-}
-
-void
-ksocknal_free_tx (ksock_tx_t *tx)
-{
- atomic_dec(&ksocknal_data.ksnd_nactive_txs);
-
- if (tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) {
- /* it's a noop tx */
- spin_lock(&ksocknal_data.ksnd_tx_lock);
-
- list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs);
-
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
- } else {
- LIBCFS_FREE(tx, tx->tx_desc_size);
- }
-}
-
-void
-ksocknal_init_msg(ksock_msg_t *msg, int type)
-{
- msg->ksm_type = type;
- msg->ksm_csum = 0;
- msg->ksm_zc_req_cookie = 0;
- msg->ksm_zc_ack_cookie = 0;
-}
-
-int
-ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- struct iovec *iov = tx->tx_iov;
- int nob;
- int rc;
-
- LASSERT (tx->tx_niov > 0);
-
- /* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */
- rc = ksocknal_lib_send_iov(conn, tx);
-
- if (rc <= 0) /* sent nothing? */
- return (rc);
-
- nob = rc;
- LASSERT (nob <= tx->tx_resid);
- tx->tx_resid -= nob;
-
- /* "consume" iov */
- do {
- LASSERT (tx->tx_niov > 0);
-
- if (nob < iov->iov_len) {
- iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob);
- iov->iov_len -= nob;
- return (rc);
- }
-
- nob -= iov->iov_len;
- tx->tx_iov = ++iov;
- tx->tx_niov--;
- } while (nob != 0);
-
- return (rc);
-}
-
-int
-ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- lnet_kiov_t *kiov = tx->tx_kiov;
- int nob;
- int rc;
-
- LASSERT (tx->tx_niov == 0);
- LASSERT (tx->tx_nkiov > 0);
-
- /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */
- rc = ksocknal_lib_send_kiov(conn, tx);
-
- if (rc <= 0) /* sent nothing? */
- return (rc);
-
- nob = rc;
- LASSERT (nob <= tx->tx_resid);
- tx->tx_resid -= nob;
-
- /* "consume" kiov */
- do {
- LASSERT(tx->tx_nkiov > 0);
-
- if (nob < kiov->kiov_len) {
- kiov->kiov_offset += nob;
- kiov->kiov_len -= nob;
- return rc;
- }
-
- nob -= kiov->kiov_len;
- tx->tx_kiov = ++kiov;
- tx->tx_nkiov--;
- } while (nob != 0);
-
- return (rc);
-}
-
-int
-ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- int rc;
- int bufnob;
-
- if (ksocknal_data.ksnd_stall_tx != 0) {
- cfs_pause(cfs_time_seconds(ksocknal_data.ksnd_stall_tx));
- }
-
- LASSERT (tx->tx_resid != 0);
-
- rc = ksocknal_connsock_addref(conn);
- if (rc != 0) {
- LASSERT (conn->ksnc_closing);
- return (-ESHUTDOWN);
- }
-
- do {
- if (ksocknal_data.ksnd_enomem_tx > 0) {
- /* testing... */
- ksocknal_data.ksnd_enomem_tx--;
- rc = -EAGAIN;
- } else if (tx->tx_niov != 0) {
- rc = ksocknal_send_iov (conn, tx);
- } else {
- rc = ksocknal_send_kiov (conn, tx);
- }
-
- bufnob = SOCK_WMEM_QUEUED(conn->ksnc_sock);
- if (rc > 0) /* sent something? */
- conn->ksnc_tx_bufnob += rc; /* account it */
-
- if (bufnob < conn->ksnc_tx_bufnob) {
- /* allocated send buffer bytes < computed; infer
- * something got ACKed */
- conn->ksnc_tx_deadline =
- cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
- conn->ksnc_tx_bufnob = bufnob;
- mb();
- }
-
- if (rc <= 0) { /* Didn't write anything? */
- ksock_sched_t *sched;
-
- if (rc == 0) /* some stacks return 0 instead of -EAGAIN */
- rc = -EAGAIN;
-
- if (rc != -EAGAIN)
- break;
-
- /* Check if EAGAIN is due to memory pressure */
-
- sched = conn->ksnc_scheduler;
- spin_lock_bh (&sched->kss_lock);
-
- if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
- !conn->ksnc_tx_ready) {
- /* SOCK_NOSPACE is set when the socket fills
- * and cleared in the write_space callback
- * (which also sets ksnc_tx_ready). If
- * SOCK_NOSPACE and ksnc_tx_ready are BOTH
- * zero, I didn't fill the socket and
- * write_space won't reschedule me, so I
- * return -ENOMEM to get my caller to retry
- * after a timeout */
- rc = -ENOMEM;
- }
-
- spin_unlock_bh (&sched->kss_lock);
- break;
- }
-
- /* socket's wmem_queued now includes 'rc' bytes */
- atomic_sub (rc, &conn->ksnc_tx_nob);
- rc = 0;
-
- } while (tx->tx_resid != 0);
-
- ksocknal_connsock_decref(conn);
- return (rc);
-}
-
-int
-ksocknal_recv_iov (ksock_conn_t *conn)
-{
- struct iovec *iov = conn->ksnc_rx_iov;
- int nob;
- int rc;
-
- LASSERT (conn->ksnc_rx_niov > 0);
-
- /* Never touch conn->ksnc_rx_iov or change connection
- * status inside ksocknal_lib_recv_iov */
- rc = ksocknal_lib_recv_iov(conn);
-
- if (rc <= 0)
- return (rc);
-
- /* received something... */
- nob = rc;
-
- conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
- conn->ksnc_rx_deadline =
- cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- mb(); /* order with setting rx_started */
- conn->ksnc_rx_started = 1;
-
- conn->ksnc_rx_nob_wanted -= nob;
- conn->ksnc_rx_nob_left -= nob;
-
- do {
- LASSERT (conn->ksnc_rx_niov > 0);
-
- if (nob < iov->iov_len) {
- iov->iov_len -= nob;
- iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob);
- return (-EAGAIN);
- }
-
- nob -= iov->iov_len;
- conn->ksnc_rx_iov = ++iov;
- conn->ksnc_rx_niov--;
- } while (nob != 0);
-
- return (rc);
-}
-
-int
-ksocknal_recv_kiov (ksock_conn_t *conn)
-{
- lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
- int nob;
- int rc;
- LASSERT (conn->ksnc_rx_nkiov > 0);
-
- /* Never touch conn->ksnc_rx_kiov or change connection
- * status inside ksocknal_lib_recv_iov */
- rc = ksocknal_lib_recv_kiov(conn);
-
- if (rc <= 0)
- return (rc);
-
- /* received something... */
- nob = rc;
-
- conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
- conn->ksnc_rx_deadline =
- cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- mb(); /* order with setting rx_started */
- conn->ksnc_rx_started = 1;
-
- conn->ksnc_rx_nob_wanted -= nob;
- conn->ksnc_rx_nob_left -= nob;
-
- do {
- LASSERT (conn->ksnc_rx_nkiov > 0);
-
- if (nob < kiov->kiov_len) {
- kiov->kiov_offset += nob;
- kiov->kiov_len -= nob;
- return -EAGAIN;
- }
-
- nob -= kiov->kiov_len;
- conn->ksnc_rx_kiov = ++kiov;
- conn->ksnc_rx_nkiov--;
- } while (nob != 0);
-
- return 1;
-}
-
-int
-ksocknal_receive (ksock_conn_t *conn)
-{
- /* Return 1 on success, 0 on EOF, < 0 on error.
- * Caller checks ksnc_rx_nob_wanted to determine
- * progress/completion. */
- int rc;
- ENTRY;
-
- if (ksocknal_data.ksnd_stall_rx != 0) {
- cfs_pause(cfs_time_seconds (ksocknal_data.ksnd_stall_rx));
- }
-
- rc = ksocknal_connsock_addref(conn);
- if (rc != 0) {
- LASSERT (conn->ksnc_closing);
- return (-ESHUTDOWN);
- }
-
- for (;;) {
- if (conn->ksnc_rx_niov != 0)
- rc = ksocknal_recv_iov (conn);
- else
- rc = ksocknal_recv_kiov (conn);
-
- if (rc <= 0) {
- /* error/EOF or partial receive */
- if (rc == -EAGAIN) {
- rc = 1;
- } else if (rc == 0 && conn->ksnc_rx_started) {
- /* EOF in the middle of a message */
- rc = -EPROTO;
- }
- break;
- }
-
- /* Completed a fragment */
-
- if (conn->ksnc_rx_nob_wanted == 0) {
- rc = 1;
- break;
- }
- }
-
- ksocknal_connsock_decref(conn);
- RETURN (rc);
-}
-
-void
-ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx)
-{
- lnet_msg_t *lnetmsg = tx->tx_lnetmsg;
- int rc = (tx->tx_resid == 0) ? 0 : -EIO;
- ENTRY;
-
- LASSERT(ni != NULL || tx->tx_conn != NULL);
-
- if (tx->tx_conn != NULL)
- ksocknal_conn_decref(tx->tx_conn);
-
- if (ni == NULL && tx->tx_conn != NULL)
- ni = tx->tx_conn->ksnc_peer->ksnp_ni;
-
- ksocknal_free_tx (tx);
- if (lnetmsg != NULL) /* KSOCK_MSG_NOOP go without lnetmsg */
- lnet_finalize (ni, lnetmsg, rc);
-
- EXIT;
-}
-
-void
-ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int error)
-{
- ksock_tx_t *tx;
-
- while (!list_empty (txlist)) {
- tx = list_entry (txlist->next, ksock_tx_t, tx_list);
-
- if (error && tx->tx_lnetmsg != NULL) {
- CDEBUG (D_NETERROR, "Deleting packet type %d len %d %s->%s\n",
- le32_to_cpu (tx->tx_lnetmsg->msg_hdr.type),
- le32_to_cpu (tx->tx_lnetmsg->msg_hdr.payload_length),
- libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)),
- libcfs_nid2str(le64_to_cpu (tx->tx_lnetmsg->msg_hdr.dest_nid)));
- } else if (error) {
- CDEBUG (D_NETERROR, "Deleting noop packet\n");
- }
-
- list_del (&tx->tx_list);
-
- LASSERT (atomic_read(&tx->tx_refcount) == 1);
- ksocknal_tx_done (ni, tx);
- }
-}
-
-static void
-ksocknal_check_zc_req(ksock_tx_t *tx)
-{
- ksock_conn_t *conn = tx->tx_conn;
- ksock_peer_t *peer = conn->ksnc_peer;
- lnet_kiov_t *kiov = tx->tx_kiov;
- int nkiov = tx->tx_nkiov;
-
- /* Set tx_msg.ksm_zc_req_cookie to a unique non-zero cookie and add tx
- * to ksnp_zc_req_list if some fragment of this message should be sent
- * zero-copy. Our peer will send an ACK containing this cookie when
- * she has received this message to tell us we can signal completion.
- * tx_msg.ksm_zc_req_cookie remains non-zero while tx is on
- * ksnp_zc_req_list. */
-
- if (conn->ksnc_proto != &ksocknal_protocol_v2x ||
- !conn->ksnc_zc_capable)
- return;
-
- while (nkiov > 0) {
- if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag)
- break;
- --nkiov;
- ++kiov;
- }
-
- if (nkiov == 0)
- return;
-
- /* assign cookie and queue tx to pending list, it will be released when
- * a matching ack is received. See ksocknal_handle_zc_ack() */
-
- ksocknal_tx_addref(tx);
-
- spin_lock(&peer->ksnp_lock);
-
- LASSERT (tx->tx_msg.ksm_zc_req_cookie == 0);
- tx->tx_msg.ksm_zc_req_cookie = peer->ksnp_zc_next_cookie++;
- list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
-
- spin_unlock(&peer->ksnp_lock);
-}
-
-static void
-ksocknal_unzc_req(ksock_tx_t *tx)
-{
- ksock_peer_t *peer = tx->tx_conn->ksnc_peer;
-
- spin_lock(&peer->ksnp_lock);
-
- if (tx->tx_msg.ksm_zc_req_cookie == 0) {
- /* Not waiting for an ACK */
- spin_unlock(&peer->ksnp_lock);
- return;
- }
-
- tx->tx_msg.ksm_zc_req_cookie = 0;
- list_del(&tx->tx_zc_list);
-
- spin_unlock(&peer->ksnp_lock);
-
- ksocknal_tx_decref(tx);
-}
-
-int
-ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- int rc;
-
- if (!tx->tx_checked_zc) {
- tx->tx_checked_zc = 1;
- ksocknal_check_zc_req(tx);
- }
-
- rc = ksocknal_transmit (conn, tx);
-
- CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
-
- if (tx->tx_resid == 0) {
- /* Sent everything OK */
- LASSERT (rc == 0);
-
- return (0);
- }
-
- if (rc == -EAGAIN)
- return (rc);
-
- if (rc == -ENOMEM) {
- static int counter;
-
- counter++; /* exponential backoff warnings */
- if ((counter & (-counter)) == counter)
- CWARN("%u ENOMEM tx %p (%u allocated)\n",
- counter, conn, atomic_read(&libcfs_kmemory));
-
- /* Queue on ksnd_enomem_conns for retry after a timeout */
- spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
-
- /* enomem list takes over scheduler's ref... */
- LASSERT (conn->ksnc_tx_scheduled);
- list_add_tail(&conn->ksnc_tx_list,
- &ksocknal_data.ksnd_enomem_conns);
- if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(),
- SOCKNAL_ENOMEM_RETRY),
- ksocknal_data.ksnd_reaper_waketime))
- cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
-
- spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
- return (rc);
- }
-
- /* Actual error */
- LASSERT (rc < 0);
-
- if (!conn->ksnc_closing) {
- switch (rc) {
- case -ECONNRESET:
- LCONSOLE_WARN("Host %u.%u.%u.%u reset our connection "
- "while we were sending data; it may have "
- "rebooted.\n",
- HIPQUAD(conn->ksnc_ipaddr));
- break;
- default:
- LCONSOLE_WARN("There was an unexpected network error "
- "while writing to %u.%u.%u.%u: %d.\n",
- HIPQUAD(conn->ksnc_ipaddr), rc);
- break;
- }
- CDEBUG(D_NET, "[%p] Error %d on write to %s"
- " ip %d.%d.%d.%d:%d\n", conn, rc,
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr),
- conn->ksnc_port);
- }
-
- ksocknal_unzc_req(tx);
-
- /* it's not an error if conn is being closed */
- ksocknal_close_conn_and_siblings (conn,
- (conn->ksnc_closing) ? 0 : rc);
-
- return (rc);
-}
-
-void
-ksocknal_launch_connection_locked (ksock_route_t *route)
-{
-
- /* called holding write lock on ksnd_global_lock */
-
- LASSERT (!route->ksnr_scheduled);
- LASSERT (!route->ksnr_connecting);
- LASSERT ((ksocknal_route_mask() & ~route->ksnr_connected) != 0);
-
- route->ksnr_scheduled = 1; /* scheduling conn for connd */
- ksocknal_route_addref(route); /* extra ref for connd */
-
- spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
-
- list_add_tail (&route->ksnr_connd_list,
- &ksocknal_data.ksnd_connd_routes);
- cfs_waitq_signal (&ksocknal_data.ksnd_connd_waitq);
-
- spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
-}
-
-ksock_conn_t *
-ksocknal_find_conn_locked (int payload_nob, ksock_peer_t *peer)
-{
- struct list_head *tmp;
- ksock_conn_t *typed = NULL;
- int tnob = 0;
- ksock_conn_t *fallback = NULL;
- int fnob = 0;
- ksock_conn_t *conn;
-
- list_for_each (tmp, &peer->ksnp_conns) {
- ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list);
- int hdr_nob = 0;
-#if SOCKNAL_ROUND_ROBIN
- const int nob = 0;
-#else
- int nob = atomic_read(&c->ksnc_tx_nob) +
- SOCK_WMEM_QUEUED(c->ksnc_sock);
-#endif
- LASSERT (!c->ksnc_closing);
- LASSERT (c->ksnc_proto != NULL);
-
- if (fallback == NULL || nob < fnob) {
- fallback = c;
- fnob = nob;
- }
-
- if (!*ksocknal_tunables.ksnd_typed_conns)
- continue;
-
- if (payload_nob == 0) {
- /* noop packet */
- hdr_nob = offsetof(ksock_msg_t, ksm_u);
- } else {
- /* lnet packet */
- hdr_nob = (c->ksnc_proto == &ksocknal_protocol_v2x)?
- offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload):
- sizeof(lnet_hdr_t);
- }
-
- switch (c->ksnc_type) {
- default:
- CERROR("ksnc_type bad: %u\n", c->ksnc_type);
- LBUG();
- case SOCKLND_CONN_ANY:
- break;
- case SOCKLND_CONN_BULK_IN:
- continue;
- case SOCKLND_CONN_BULK_OUT:
- if ((hdr_nob + payload_nob) < *ksocknal_tunables.ksnd_min_bulk)
- continue;
- break;
- case SOCKLND_CONN_CONTROL:
- if ((hdr_nob + payload_nob) >= *ksocknal_tunables.ksnd_min_bulk)
- continue;
- break;
- }
-
- if (typed == NULL || nob < tnob) {
- typed = c;
- tnob = nob;
- }
- }
-
- /* prefer the typed selection */
- conn = (typed != NULL) ? typed : fallback;
-
-#if SOCKNAL_ROUND_ROBIN
- if (conn != NULL) {
- /* round-robin all else being equal */
- list_del (&conn->ksnc_list);
- list_add_tail (&conn->ksnc_list, &peer->ksnp_conns);
- }
-#endif
- return conn;
-}
-
-void
-ksocknal_next_mono_tx(ksock_conn_t *conn)
-{
- ksock_tx_t *tx = conn->ksnc_tx_mono;
-
- /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
- LASSERT(conn->ksnc_proto == &ksocknal_protocol_v2x);
- LASSERT(!list_empty(&conn->ksnc_tx_queue));
- LASSERT(tx != NULL);
-
- if (tx->tx_list.next == &conn->ksnc_tx_queue) {
- /* no more packets queued */
- conn->ksnc_tx_mono = NULL;
- } else {
- conn->ksnc_tx_mono = list_entry(tx->tx_list.next, ksock_tx_t, tx_list);
- LASSERT(conn->ksnc_tx_mono->tx_msg.ksm_type == tx->tx_msg.ksm_type);
- }
-}
-
-int
-ksocknal_piggyback_zcack(ksock_conn_t *conn, __u64 cookie)
-{
- ksock_tx_t *tx = conn->ksnc_tx_mono;
-
- /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
-
- if (tx == NULL)
- return 0;
-
- if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
- /* tx is noop zc-ack, can't piggyback zc-ack cookie */
- return 0;
- }
-
- LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
- LASSERT(tx->tx_msg.ksm_zc_ack_cookie == 0);
-
- /* piggyback the zc-ack cookie */
- tx->tx_msg.ksm_zc_ack_cookie = cookie;
- ksocknal_next_mono_tx(conn);
-
- return 1;
-}
-
-void
-ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
-{
- ksock_sched_t *sched = conn->ksnc_scheduler;
- ksock_msg_t *msg = &tx->tx_msg;
- ksock_tx_t *ztx;
- int bufnob = 0;
-
- /* called holding global lock (read or irq-write) and caller may
- * not have dropped this lock between finding conn and calling me,
- * so we don't need the {get,put}connsock dance to deref
- * ksnc_sock... */
- LASSERT(!conn->ksnc_closing);
-
- CDEBUG (D_NET, "Sending to %s ip %d.%d.%d.%d:%d\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr),
- conn->ksnc_port);
-
- tx->tx_checked_zc = 0;
- conn->ksnc_proto->pro_pack(tx);
-
- /* Ensure the frags we've been given EXACTLY match the number of
- * bytes we want to send. Many TCP/IP stacks disregard any total
- * size parameters passed to them and just look at the frags.
- *
- * We always expect at least 1 mapped fragment containing the
- * complete ksocknal message header. */
- LASSERT (lnet_iov_nob (tx->tx_niov, tx->tx_iov) +
- lnet_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob);
- LASSERT (tx->tx_niov >= 1);
- LASSERT (tx->tx_resid == tx->tx_nob);
-
- CDEBUG (D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n",
- tx, (tx->tx_lnetmsg != NULL)? tx->tx_lnetmsg->msg_hdr.type:
- KSOCK_MSG_NOOP,
- tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
-
- atomic_add (tx->tx_nob, &conn->ksnc_tx_nob);
- tx->tx_conn = conn;
- ksocknal_conn_addref(conn); /* +1 ref for tx */
-
- /*
- * NB Darwin: SOCK_WMEM_QUEUED()->sock_getsockopt() will take
- * a blockable lock(socket lock), so SOCK_WMEM_QUEUED can't be
- * put in spinlock.
- */
- bufnob = SOCK_WMEM_QUEUED(conn->ksnc_sock);
- spin_lock_bh (&sched->kss_lock);
-
- if (list_empty(&conn->ksnc_tx_queue) && bufnob == 0) {
- /* First packet starts the timeout */
- conn->ksnc_tx_deadline =
- cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
- conn->ksnc_tx_bufnob = 0;
- mb(); /* order with adding to tx_queue */
- }
-
- ztx = NULL;
-
- if (msg->ksm_type == KSOCK_MSG_NOOP) {
- /* The packet is noop ZC ACK, try to piggyback the ack_cookie
- * on a normal packet so I don't need to send it */
- LASSERT(msg->ksm_zc_req_cookie == 0);
- LASSERT(msg->ksm_zc_ack_cookie != 0);
-
- if (conn->ksnc_tx_mono != NULL) {
- if (ksocknal_piggyback_zcack(conn, msg->ksm_zc_ack_cookie)) {
- /* zc-ack cookie is piggybacked */
- atomic_sub (tx->tx_nob, &conn->ksnc_tx_nob);
- ztx = tx; /* Put to freelist later */
- } else {
- /* no packet can piggyback zc-ack cookie */
- list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
- }
- } else {
- /* It's the first mono-packet */
- conn->ksnc_tx_mono = tx;
- list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
- }
-
- } else {
- /* It's a normal packet - can it piggback a noop zc-ack that
- * has been queued already? */
- LASSERT(msg->ksm_zc_ack_cookie == 0);
-
- if (conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x packet */
- conn->ksnc_tx_mono != NULL) {
- if (conn->ksnc_tx_mono->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
- /* There is a noop zc-ack can be piggybacked */
- ztx = conn->ksnc_tx_mono;
-
- msg->ksm_zc_ack_cookie = ztx->tx_msg.ksm_zc_ack_cookie;
- ksocknal_next_mono_tx(conn);
-
- /* use tx to replace the noop zc-ack packet, ztx will
- * be put to freelist later */
- list_add(&tx->tx_list, &ztx->tx_list);
- list_del(&ztx->tx_list);
-
- atomic_sub (ztx->tx_nob, &conn->ksnc_tx_nob);
- } else {
- /* no noop zc-ack packet, just enqueue it */
- LASSERT(conn->ksnc_tx_mono->tx_msg.ksm_type == KSOCK_MSG_LNET);
- list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
- }
-
- } else if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
- /* it's the first mono-packet, enqueue it */
- conn->ksnc_tx_mono = tx;
- list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
- } else {
- /* V1.x packet, just enqueue it */
- list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
- }
- }
-
- if (ztx != NULL)
- list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs);
-
- if (conn->ksnc_tx_ready && /* able to send */
- !conn->ksnc_tx_scheduled) { /* not scheduled to send */
- /* +1 ref for scheduler */
- ksocknal_conn_addref(conn);
- list_add_tail (&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- cfs_waitq_signal (&sched->kss_waitq);
- }
-
- spin_unlock_bh (&sched->kss_lock);
-}
-
-ksock_route_t *
-ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
-{
- struct list_head *tmp;
- ksock_route_t *route;
-
- list_for_each (tmp, &peer->ksnp_routes) {
- route = list_entry (tmp, ksock_route_t, ksnr_list);
-
- LASSERT (!route->ksnr_connecting || route->ksnr_scheduled);
-
- if (route->ksnr_scheduled) /* connections being established */
- continue;
-
- /* all route types connected ? */
- if ((ksocknal_route_mask() & ~route->ksnr_connected) == 0)
- continue;
-
- /* too soon to retry this guy? */
- if (!(route->ksnr_retry_interval == 0 || /* first attempt */
- cfs_time_aftereq (cfs_time_current(),
- route->ksnr_timeout)))
- continue;
-
- return (route);
- }
-
- return (NULL);
-}
-
-ksock_route_t *
-ksocknal_find_connecting_route_locked (ksock_peer_t *peer)
-{
- struct list_head *tmp;
- ksock_route_t *route;
-
- list_for_each (tmp, &peer->ksnp_routes) {
- route = list_entry (tmp, ksock_route_t, ksnr_list);
-
- LASSERT (!route->ksnr_connecting || route->ksnr_scheduled);
-
- if (route->ksnr_scheduled)
- return (route);
- }
-
- return (NULL);
-}
-
-int
-ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id)
-{
- ksock_peer_t *peer;
- ksock_conn_t *conn;
- ksock_route_t *route;
- rwlock_t *g_lock;
- int retry;
- int rc;
-
- LASSERT (tx->tx_conn == NULL);
- LASSERT (tx->tx_lnetmsg != NULL);
-
- g_lock = &ksocknal_data.ksnd_global_lock;
-
- for (retry = 0;; retry = 1) {
-#if !SOCKNAL_ROUND_ROBIN
- read_lock (g_lock);
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer != NULL) {
- if (ksocknal_find_connectable_route_locked(peer) == NULL) {
- conn = ksocknal_find_conn_locked (tx->tx_lnetmsg->msg_len, peer);
- if (conn != NULL) {
- /* I've got no routes that need to be
- * connecting and I do have an actual
- * connection... */
- ksocknal_queue_tx_locked (tx, conn);
- read_unlock (g_lock);
- return (0);
- }
- }
- }
-
- /* I'll need a write lock... */
- read_unlock (g_lock);
-#endif
- write_lock_bh (g_lock);
-
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer != NULL)
- break;
-
- write_unlock_bh (g_lock);
-
- if ((id.pid & LNET_PID_USERFLAG) != 0) {
- CERROR("Refusing to create a connection to "
- "userspace process %s\n", libcfs_id2str(id));
- return -EHOSTUNREACH;
- }
-
- if (retry) {
- CERROR("Can't find peer %s\n", libcfs_id2str(id));
- return -EHOSTUNREACH;
- }
-
- rc = ksocknal_add_peer(ni, id,
- LNET_NIDADDR(id.nid),
- lnet_acceptor_port());
- if (rc != 0) {
- CERROR("Can't add peer %s: %d\n",
- libcfs_id2str(id), rc);
- return rc;
- }
- }
-
- for (;;) {
- /* launch any/all connections that need it */
- route = ksocknal_find_connectable_route_locked (peer);
- if (route == NULL)
- break;
-
- ksocknal_launch_connection_locked (route);
- }
-
- conn = ksocknal_find_conn_locked (tx->tx_lnetmsg->msg_len, peer);
- if (conn != NULL) {
- /* Connection exists; queue message on it */
- ksocknal_queue_tx_locked (tx, conn);
- write_unlock_bh (g_lock);
- return (0);
- }
-
- if (peer->ksnp_accepting > 0 ||
- ksocknal_find_connecting_route_locked (peer) != NULL) {
- /* Queue the message until a connection is established */
- list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
- write_unlock_bh (g_lock);
- return 0;
- }
-
- write_unlock_bh (g_lock);
-
- /* NB Routes may be ignored if connections to them failed recently */
- CDEBUG(D_NETERROR, "No usable routes to %s\n", libcfs_id2str(id));
- return (-EHOSTUNREACH);
-}
-
-int
-ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- ksock_tx_t *tx;
- int desc_size;
- int rc;
-
- /* NB 'private' is different depending on what we're sending.
- * Just ignore it... */
-
- CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT (payload_nob == 0 || payload_niov > 0);
- LASSERT (payload_niov <= LNET_MAX_IOV);
- /* payload is either all vaddrs or all pages */
- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
- LASSERT (!in_interrupt ());
-
- if (payload_iov != NULL)
- desc_size = offsetof(ksock_tx_t,
- tx_frags.virt.iov[1 + payload_niov]);
- else
- desc_size = offsetof(ksock_tx_t,
- tx_frags.paged.kiov[payload_niov]);
-
- tx = ksocknal_alloc_tx(desc_size);
- if (tx == NULL) {
- CERROR("Can't allocate tx desc type %d size %d\n",
- type, desc_size);
- return (-ENOMEM);
- }
-
- tx->tx_conn = NULL; /* set when assigned a conn */
- tx->tx_lnetmsg = lntmsg;
-
- if (payload_iov != NULL) {
- tx->tx_kiov = NULL;
- tx->tx_nkiov = 0;
- tx->tx_iov = tx->tx_frags.virt.iov;
- tx->tx_niov = 1 +
- lnet_extract_iov(payload_niov, &tx->tx_iov[1],
- payload_niov, payload_iov,
- payload_offset, payload_nob);
- } else {
- tx->tx_niov = 1;
- tx->tx_iov = &tx->tx_frags.paged.iov;
- tx->tx_kiov = tx->tx_frags.paged.kiov;
- tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- }
-
- ksocknal_init_msg(&tx->tx_msg, KSOCK_MSG_LNET);
-
- /* The first fragment will be set later in pro_pack */
- rc = ksocknal_launch_packet(ni, tx, target);
- if (rc == 0)
- return (0);
-
- ksocknal_free_tx(tx);
- return (-EIO);
-}
-
-int
-ksocknal_thread_start (int (*fn)(void *arg), void *arg)
-{
- long pid = cfs_kernel_thread (fn, arg, 0);
-
- if (pid < 0)
- return ((int)pid);
-
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
- ksocknal_data.ksnd_nthreads++;
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
- return (0);
-}
-
-void
-ksocknal_thread_fini (void)
-{
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
- ksocknal_data.ksnd_nthreads--;
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-}
-
-int
-ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip)
-{
- static char ksocknal_slop_buffer[4096];
-
- int nob;
- unsigned int niov;
- int skipped;
-
- LASSERT(conn->ksnc_proto != NULL);
-
- if ((*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0) {
- /* Remind the socket to ack eagerly... */
- ksocknal_lib_eager_ack(conn);
- }
-
- if (nob_to_skip == 0) { /* right at next packet boundary now */
- conn->ksnc_rx_started = 0;
- mb (); /* racing with timeout thread */
-
- switch (conn->ksnc_proto->pro_version) {
- case KSOCK_PROTO_V2:
- conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
- conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
- conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg;
-
- conn->ksnc_rx_nob_wanted = offsetof(ksock_msg_t, ksm_u);
- conn->ksnc_rx_nob_left = offsetof(ksock_msg_t, ksm_u);
- conn->ksnc_rx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u);
- break;
-
- case KSOCK_PROTO_V1:
- /* Receiving bare lnet_hdr_t */
- conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
- conn->ksnc_rx_nob_wanted = sizeof(lnet_hdr_t);
- conn->ksnc_rx_nob_left = sizeof(lnet_hdr_t);
-
- conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
- conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg;
- conn->ksnc_rx_iov[0].iov_len = sizeof (lnet_hdr_t);
- break;
-
- default:
- LBUG ();
- }
- conn->ksnc_rx_niov = 1;
-
- conn->ksnc_rx_kiov = NULL;
- conn->ksnc_rx_nkiov = 0;
- conn->ksnc_rx_csum = ~0;
- return (1);
- }
-
- /* Set up to skip as much as possible now. If there's more left
- * (ran out of iov entries) we'll get called again */
-
- conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
- conn->ksnc_rx_nob_left = nob_to_skip;
- conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
- skipped = 0;
- niov = 0;
-
- do {
- nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer));
-
- conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer;
- conn->ksnc_rx_iov[niov].iov_len = nob;
- niov++;
- skipped += nob;
- nob_to_skip -=nob;
-
- } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
- niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec));
-
- conn->ksnc_rx_niov = niov;
- conn->ksnc_rx_kiov = NULL;
- conn->ksnc_rx_nkiov = 0;
- conn->ksnc_rx_nob_wanted = skipped;
- return (0);
-}
-
-/* (Sink) handle incoming ZC request from sender */
-static int
-ksocknal_handle_zc_req(ksock_peer_t *peer, __u64 cookie)
-{
- ksock_conn_t *conn;
- ksock_tx_t *tx;
- ksock_sched_t *sched;
- int rc;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- conn = ksocknal_find_conn_locked (0, peer);
- if (conn == NULL) {
- read_unlock (&ksocknal_data.ksnd_global_lock);
- CERROR("Can't find connection to send zcack.\n");
- return -ECONNRESET;
- }
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh (&sched->kss_lock);
- rc = ksocknal_piggyback_zcack(conn, cookie);
- spin_unlock_bh (&sched->kss_lock);
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
- if (rc) {
- /* Ack cookie is piggybacked */
- return 0;
- }
-
- tx = ksocknal_alloc_tx(KSOCK_NOOP_TX_SIZE);
- if (tx == NULL) {
- CERROR("Can't allocate noop tx desc\n");
- return -ENOMEM;
- }
-
- tx->tx_conn = NULL;
- tx->tx_lnetmsg = NULL;
- tx->tx_kiov = NULL;
- tx->tx_nkiov = 0;
- tx->tx_iov = tx->tx_frags.virt.iov;
- tx->tx_niov = 1;
-
- ksocknal_init_msg(&tx->tx_msg, KSOCK_MSG_NOOP);
- tx->tx_msg.ksm_zc_ack_cookie = cookie; /* incoming cookie */
-
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- conn = ksocknal_find_conn_locked (0, peer);
- if (conn == NULL) {
- read_unlock (&ksocknal_data.ksnd_global_lock);
- ksocknal_free_tx(tx);
- CERROR("Can't find connection to send zcack.\n");
- return -ECONNRESET;
- }
- ksocknal_queue_tx_locked(tx, conn);
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- return 0;
-}
-
-/* (Sender) handle ZC_ACK from sink */
-static int
-ksocknal_handle_zc_ack(ksock_peer_t *peer, __u64 cookie)
-{
- ksock_tx_t *tx;
- struct list_head *ctmp;
-
- spin_lock(&peer->ksnp_lock);
-
- list_for_each(ctmp, &peer->ksnp_zc_req_list) {
- tx = list_entry (ctmp, ksock_tx_t, tx_zc_list);
- if (tx->tx_msg.ksm_zc_req_cookie != cookie)
- continue;
-
- tx->tx_msg.ksm_zc_req_cookie = 0;
- list_del(&tx->tx_zc_list);
-
- spin_unlock(&peer->ksnp_lock);
-
- ksocknal_tx_decref(tx);
- return 0;
- }
- spin_unlock(&peer->ksnp_lock);
-
- return -EPROTO;
-}
-
-int
-ksocknal_process_receive (ksock_conn_t *conn)
-{
- int rc;
-
- LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
-
- /* NB: sched lock NOT held */
- /* SOCKNAL_RX_LNET_HEADER is here for backward compatability */
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER ||
- conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD ||
- conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER ||
- conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
- again:
- if (conn->ksnc_rx_nob_wanted != 0) {
- rc = ksocknal_receive(conn);
-
- if (rc <= 0) {
- LASSERT (rc != -EAGAIN);
-
- if (rc == 0)
- CDEBUG (D_NET, "[%p] EOF from %s"
- " ip %d.%d.%d.%d:%d\n", conn,
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr),
- conn->ksnc_port);
- else if (!conn->ksnc_closing)
- CERROR ("[%p] Error %d on read from %s"
- " ip %d.%d.%d.%d:%d\n",
- conn, rc,
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr),
- conn->ksnc_port);
-
- /* it's not an error if conn is being closed */
- ksocknal_close_conn_and_siblings (conn,
- (conn->ksnc_closing) ? 0 : rc);
- return (rc == 0 ? -ESHUTDOWN : rc);
- }
-
- if (conn->ksnc_rx_nob_wanted != 0) {
- /* short read */
- return (-EAGAIN);
- }
- }
- switch (conn->ksnc_rx_state) {
- case SOCKNAL_RX_KSM_HEADER:
- if (conn->ksnc_flip) {
- __swab32s(&conn->ksnc_msg.ksm_type);
- __swab32s(&conn->ksnc_msg.ksm_csum);
- __swab64s(&conn->ksnc_msg.ksm_zc_req_cookie);
- __swab64s(&conn->ksnc_msg.ksm_zc_ack_cookie);
- }
-
- if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
- conn->ksnc_msg.ksm_csum != 0 && /* has checksum */
- conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
- /* NOOP Checksum error */
- CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, -EPROTO);
- return (-EIO);
- }
-
- if (conn->ksnc_msg.ksm_zc_ack_cookie != 0) {
- LASSERT(conn->ksnc_proto == &ksocknal_protocol_v2x);
-
- rc = ksocknal_handle_zc_ack(conn->ksnc_peer,
- conn->ksnc_msg.ksm_zc_ack_cookie);
- if (rc != 0) {
- CERROR("%s: Unknown zero copy ACK cookie: "LPU64"\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- conn->ksnc_msg.ksm_zc_ack_cookie);
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, -EPROTO);
- return (rc);
- }
- }
-
- if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) {
- ksocknal_new_packet (conn, 0);
- return 0; /* NOOP is done and just return */
- }
- LASSERT (conn->ksnc_msg.ksm_type == KSOCK_MSG_LNET);
-
- conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
- conn->ksnc_rx_nob_wanted = sizeof(ksock_lnet_msg_t);
- conn->ksnc_rx_nob_left = sizeof(ksock_lnet_msg_t);
-
- conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
- conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg;
- conn->ksnc_rx_iov[0].iov_len = sizeof(ksock_lnet_msg_t);
-
- conn->ksnc_rx_niov = 1;
- conn->ksnc_rx_kiov = NULL;
- conn->ksnc_rx_nkiov = 0;
-
- goto again; /* read lnet header now */
-
- case SOCKNAL_RX_LNET_HEADER:
- /* unpack message header */
- conn->ksnc_proto->pro_unpack(&conn->ksnc_msg);
-
- if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) {
- /* Userspace peer */
- lnet_process_id_t *id = &conn->ksnc_peer->ksnp_id;
- lnet_hdr_t *lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
-
- /* Substitute process ID assigned at connection time */
- lhdr->src_pid = cpu_to_le32(id->pid);
- lhdr->src_nid = cpu_to_le64(id->nid);
- }
-
- conn->ksnc_rx_state = SOCKNAL_RX_PARSE;
- ksocknal_conn_addref(conn); /* ++ref while parsing */
-
- rc = lnet_parse(conn->ksnc_peer->ksnp_ni,
- &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr,
- conn->ksnc_peer->ksnp_id.nid, conn, 0);
- if (rc < 0) {
- /* I just received garbage: give up on this conn */
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings (conn, rc);
- ksocknal_conn_decref(conn);
- return (-EPROTO);
- }
-
- /* I'm racing with ksocknal_recv() */
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_PARSE ||
- conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD);
-
- if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD)
- return 0;
-
- /* ksocknal_recv() got called */
- goto again;
-
- case SOCKNAL_RX_LNET_PAYLOAD:
- /* payload all received */
- rc = 0;
-
- if (conn->ksnc_rx_nob_left == 0 && /* not truncating */
- conn->ksnc_msg.ksm_csum != 0 && /* has checksum */
- conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
- CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
- rc = -EIO;
- }
-
- lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc);
-
- if (rc == 0 && conn->ksnc_msg.ksm_zc_req_cookie != 0) {
- LASSERT(conn->ksnc_proto == &ksocknal_protocol_v2x);
- rc = ksocknal_handle_zc_req(conn->ksnc_peer,
- conn->ksnc_msg.ksm_zc_req_cookie);
- }
-
- if (rc != 0) {
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings (conn, rc);
- return (-EPROTO);
- }
- /* Fall through */
-
- case SOCKNAL_RX_SLOP:
- /* starting new packet? */
- if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left))
- return 0; /* come back later */
- goto again; /* try to finish reading slop now */
-
- default:
- break;
- }
-
- /* Not Reached */
- LBUG ();
- return (-EINVAL); /* keep gcc happy */
-}
-
-int
-ksocknal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- ksock_conn_t *conn = (ksock_conn_t *)private;
- ksock_sched_t *sched = conn->ksnc_scheduler;
-
- LASSERT (mlen <= rlen);
- LASSERT (niov <= LNET_MAX_IOV);
-
- conn->ksnc_cookie = msg;
- conn->ksnc_rx_nob_wanted = mlen;
- conn->ksnc_rx_nob_left = rlen;
-
- if (mlen == 0 || iov != NULL) {
- conn->ksnc_rx_nkiov = 0;
- conn->ksnc_rx_kiov = NULL;
- conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
- conn->ksnc_rx_niov =
- lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov,
- niov, iov, offset, mlen);
- } else {
- conn->ksnc_rx_niov = 0;
- conn->ksnc_rx_iov = NULL;
- conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
- conn->ksnc_rx_nkiov =
- lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov,
- niov, kiov, offset, mlen);
- }
-
- LASSERT (mlen ==
- lnet_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
- lnet_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
-
- LASSERT (conn->ksnc_rx_scheduled);
-
- spin_lock_bh (&sched->kss_lock);
-
- switch (conn->ksnc_rx_state) {
- case SOCKNAL_RX_PARSE_WAIT:
- list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
- cfs_waitq_signal (&sched->kss_waitq);
- LASSERT (conn->ksnc_rx_ready);
- break;
-
- case SOCKNAL_RX_PARSE:
- /* scheduler hasn't noticed I'm parsing yet */
- break;
- }
-
- conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD;
-
- spin_unlock_bh (&sched->kss_lock);
- ksocknal_conn_decref(conn);
- return (0);
-}
-
-static inline int
-ksocknal_sched_cansleep(ksock_sched_t *sched)
-{
- int rc;
-
- spin_lock_bh (&sched->kss_lock);
-
- rc = (!ksocknal_data.ksnd_shuttingdown &&
- list_empty(&sched->kss_rx_conns) &&
- list_empty(&sched->kss_tx_conns));
-
- spin_unlock_bh (&sched->kss_lock);
- return (rc);
-}
-
-int ksocknal_scheduler (void *arg)
-{
- ksock_sched_t *sched = (ksock_sched_t *)arg;
- ksock_conn_t *conn;
- ksock_tx_t *tx;
- int rc;
- int nloops = 0;
- int id = sched - ksocknal_data.ksnd_schedulers;
- char name[16];
-
- snprintf (name, sizeof (name),"socknal_sd%02d", id);
- cfs_daemonize (name);
- cfs_block_allsigs ();
-
-#if defined(CONFIG_SMP) && defined(CPU_AFFINITY)
- id = ksocknal_sched2cpu(id);
- if (cpu_online(id)) {
- cpumask_t m = CPU_MASK_NONE;
- cpu_set(id, m);
- set_cpus_allowed(current, m);
- } else {
- CERROR ("Can't set CPU affinity for %s to %d\n", name, id);
- }
-#endif /* CONFIG_SMP && CPU_AFFINITY */
-
- spin_lock_bh (&sched->kss_lock);
-
- while (!ksocknal_data.ksnd_shuttingdown) {
- int did_something = 0;
-
- /* Ensure I progress everything semi-fairly */
-
- if (!list_empty (&sched->kss_rx_conns)) {
- conn = list_entry(sched->kss_rx_conns.next,
- ksock_conn_t, ksnc_rx_list);
- list_del(&conn->ksnc_rx_list);
-
- LASSERT(conn->ksnc_rx_scheduled);
- LASSERT(conn->ksnc_rx_ready);
-
- /* clear rx_ready in case receive isn't complete.
- * Do it BEFORE we call process_recv, since
- * data_ready can set it any time after we release
- * kss_lock. */
- conn->ksnc_rx_ready = 0;
- spin_unlock_bh (&sched->kss_lock);
-
- rc = ksocknal_process_receive(conn);
-
- spin_lock_bh (&sched->kss_lock);
-
- /* I'm the only one that can clear this flag */
- LASSERT(conn->ksnc_rx_scheduled);
-
- /* Did process_receive get everything it wanted? */
- if (rc == 0)
- conn->ksnc_rx_ready = 1;
-
- if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) {
- /* Conn blocked waiting for ksocknal_recv()
- * I change its state (under lock) to signal
- * it can be rescheduled */
- conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT;
- } else if (conn->ksnc_rx_ready) {
- /* reschedule for rx */
- list_add_tail (&conn->ksnc_rx_list,
- &sched->kss_rx_conns);
- } else {
- conn->ksnc_rx_scheduled = 0;
- /* drop my ref */
- ksocknal_conn_decref(conn);
- }
-
- did_something = 1;
- }
-
- if (!list_empty (&sched->kss_tx_conns)) {
- CFS_LIST_HEAD (zlist);
-
- if (!list_empty(&sched->kss_zombie_noop_txs)) {
- list_add(&zlist, &sched->kss_zombie_noop_txs);
- list_del_init(&sched->kss_zombie_noop_txs);
- }
-
- conn = list_entry(sched->kss_tx_conns.next,
- ksock_conn_t, ksnc_tx_list);
- list_del (&conn->ksnc_tx_list);
-
- LASSERT(conn->ksnc_tx_scheduled);
- LASSERT(conn->ksnc_tx_ready);
- LASSERT(!list_empty(&conn->ksnc_tx_queue));
-
- tx = list_entry(conn->ksnc_tx_queue.next,
- ksock_tx_t, tx_list);
-
- if (conn->ksnc_tx_mono == tx)
- ksocknal_next_mono_tx(conn);
-
- /* dequeue now so empty list => more to send */
- list_del(&tx->tx_list);
-
- /* Clear tx_ready in case send isn't complete. Do
- * it BEFORE we call process_transmit, since
- * write_space can set it any time after we release
- * kss_lock. */
- conn->ksnc_tx_ready = 0;
- spin_unlock_bh (&sched->kss_lock);
-
- if (!list_empty(&zlist)) {
- /* free zombie noop txs, it's fast because
- * noop txs are just put in freelist */
- ksocknal_txlist_done(NULL, &zlist, 0);
- }
-
- rc = ksocknal_process_transmit(conn, tx);
-
- if (rc == -ENOMEM || rc == -EAGAIN) {
- /* Incomplete send: replace tx on HEAD of tx_queue */
- spin_lock_bh (&sched->kss_lock);
- list_add (&tx->tx_list, &conn->ksnc_tx_queue);
- } else {
- /* Complete send; tx -ref */
- ksocknal_tx_decref (tx);
-
- spin_lock_bh (&sched->kss_lock);
- /* assume space for more */
- conn->ksnc_tx_ready = 1;
- }
-
- if (rc == -ENOMEM) {
- /* Do nothing; after a short timeout, this
- * conn will be reposted on kss_tx_conns. */
- } else if (conn->ksnc_tx_ready &&
- !list_empty (&conn->ksnc_tx_queue)) {
- /* reschedule for tx */
- list_add_tail (&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- } else {
- conn->ksnc_tx_scheduled = 0;
- /* drop my ref */
- ksocknal_conn_decref(conn);
- }
-
- did_something = 1;
- }
- if (!did_something || /* nothing to do */
- ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
- spin_unlock_bh (&sched->kss_lock);
-
- nloops = 0;
-
- if (!did_something) { /* wait for something to do */
- rc = wait_event_interruptible_exclusive(
- sched->kss_waitq,
- !ksocknal_sched_cansleep(sched));
- LASSERT (rc == 0);
- } else {
- our_cond_resched();
- }
-
- spin_lock_bh (&sched->kss_lock);
- }
- }
-
- spin_unlock_bh (&sched->kss_lock);
- ksocknal_thread_fini ();
- return (0);
-}
-
-/*
- * Add connection to kss_rx_conns of scheduler
- * and wakeup the scheduler.
- */
-void ksocknal_read_callback (ksock_conn_t *conn)
-{
- ksock_sched_t *sched;
- ENTRY;
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh (&sched->kss_lock);
-
- conn->ksnc_rx_ready = 1;
-
- if (!conn->ksnc_rx_scheduled) { /* not being progressed */
- list_add_tail(&conn->ksnc_rx_list,
- &sched->kss_rx_conns);
- conn->ksnc_rx_scheduled = 1;
- /* extra ref for scheduler */
- ksocknal_conn_addref(conn);
-
- cfs_waitq_signal (&sched->kss_waitq);
- }
- spin_unlock_bh (&sched->kss_lock);
-
- EXIT;
-}
-
-/*
- * Add connection to kss_tx_conns of scheduler
- * and wakeup the scheduler.
- */
-void ksocknal_write_callback (ksock_conn_t *conn)
-{
- ksock_sched_t *sched;
- ENTRY;
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh (&sched->kss_lock);
-
- conn->ksnc_tx_ready = 1;
-
- if (!conn->ksnc_tx_scheduled && // not being progressed
- !list_empty(&conn->ksnc_tx_queue)){//packets to send
- list_add_tail (&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- /* extra ref for scheduler */
- ksocknal_conn_addref(conn);
-
- cfs_waitq_signal (&sched->kss_waitq);
- }
-
- spin_unlock_bh (&sched->kss_lock);
-
- EXIT;
-}
-
-ksock_proto_t *
-ksocknal_parse_proto_version (ksock_hello_msg_t *hello)
-{
- if ((hello->kshm_magic == LNET_PROTO_MAGIC &&
- hello->kshm_version == KSOCK_PROTO_V2) ||
- (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC) &&
- hello->kshm_version == __swab32(KSOCK_PROTO_V2))) {
-#if SOCKNAL_VERSION_DEBUG
- if (*ksocknal_tunables.ksnd_protocol != 2)
- return NULL;
-#endif
- return &ksocknal_protocol_v2x;
- }
-
- if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
- lnet_magicversion_t *hmv = (lnet_magicversion_t *)hello;
-
- CLASSERT (sizeof (lnet_magicversion_t) ==
- offsetof (ksock_hello_msg_t, kshm_src_nid));
-
- if (hmv->version_major == cpu_to_le16 (KSOCK_PROTO_V1_MAJOR) &&
- hmv->version_minor == cpu_to_le16 (KSOCK_PROTO_V1_MINOR))
- return &ksocknal_protocol_v1x;
- }
-
- return NULL;
-}
-
-static int
-ksocknal_send_hello_v1 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
-{
- cfs_socket_t *sock = conn->ksnc_sock;
- lnet_hdr_t *hdr;
- lnet_magicversion_t *hmv;
- int rc;
- int i;
-
- CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid));
-
- LIBCFS_ALLOC(hdr, sizeof(*hdr));
- if (hdr == NULL) {
- CERROR("Can't allocate lnet_hdr_t\n");
- return -ENOMEM;
- }
-
- hmv = (lnet_magicversion_t *)&hdr->dest_nid;
-
- /* Re-organize V2.x message header to V1.x (lnet_hdr_t)
- * header and send out */
- hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC);
- hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR);
- hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR);
-
- if (the_lnet.ln_testprotocompat != 0) {
- /* single-shot proto check */
- LNET_LOCK();
- if ((the_lnet.ln_testprotocompat & 1) != 0) {
- hmv->version_major++; /* just different! */
- the_lnet.ln_testprotocompat &= ~1;
- }
- if ((the_lnet.ln_testprotocompat & 2) != 0) {
- hmv->magic = LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~2;
- }
- LNET_UNLOCK();
- }
-
- hdr->src_nid = cpu_to_le64 (hello->kshm_src_nid);
- hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid);
- hdr->type = cpu_to_le32 (LNET_MSG_HELLO);
- hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32));
- hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype);
- hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation);
-
- rc = libcfs_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout());
-
- if (rc != 0) {
- CDEBUG (D_NETERROR, "Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
- goto out;
- }
-
- if (hello->kshm_nips == 0)
- goto out;
-
- for (i = 0; i < hello->kshm_nips; i++) {
- hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]);
- }
-
- rc = libcfs_sock_write(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CDEBUG (D_NETERROR, "Error %d sending HELLO payload (%d)"
- " to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips,
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
- }
-out:
- LIBCFS_FREE(hdr, sizeof(*hdr));
-
- return rc;
-}
-
-static int
-ksocknal_send_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
-{
- cfs_socket_t *sock = conn->ksnc_sock;
- int rc;
-
- hello->kshm_magic = LNET_PROTO_MAGIC;
- hello->kshm_version = KSOCK_PROTO_V2;
-
- if (the_lnet.ln_testprotocompat != 0) {
- /* single-shot proto check */
- LNET_LOCK();
- if ((the_lnet.ln_testprotocompat & 1) != 0) {
- hello->kshm_version++; /* just different! */
- the_lnet.ln_testprotocompat &= ~1;
- }
- LNET_UNLOCK();
- }
-
- rc = libcfs_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips),
- lnet_acceptor_timeout());
-
- if (rc != 0) {
- CDEBUG (D_NETERROR, "Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
- return rc;
- }
-
- if (hello->kshm_nips == 0)
- return 0;
-
- rc = libcfs_sock_write(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CDEBUG (D_NETERROR, "Error %d sending HELLO payload (%d)"
- " to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips,
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
- }
-
- return rc;
-}
-
-static int
-ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,int timeout)
-{
- cfs_socket_t *sock = conn->ksnc_sock;
- lnet_hdr_t *hdr;
- int rc;
- int i;
-
- LIBCFS_ALLOC(hdr, sizeof(*hdr));
- if (hdr == NULL) {
- CERROR("Can't allocate lnet_hdr_t\n");
- return -ENOMEM;
- }
-
- rc = libcfs_sock_read(sock, &hdr->src_nid,
- sizeof (*hdr) - offsetof (lnet_hdr_t, src_nid),
- timeout);
- if (rc != 0) {
- CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n",
- rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0 && rc != -EALREADY);
- goto out;
- }
-
- /* ...and check we got what we expected */
- if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) {
- CERROR ("Expecting a HELLO hdr,"
- " but got type %d from %u.%u.%u.%u\n",
- le32_to_cpu (hdr->type),
- HIPQUAD(conn->ksnc_ipaddr));
- rc = -EPROTO;
- goto out;
- }
-
- hello->kshm_src_nid = le64_to_cpu (hdr->src_nid);
- hello->kshm_src_pid = le32_to_cpu (hdr->src_pid);
- hello->kshm_src_incarnation = le64_to_cpu (hdr->msg.hello.incarnation);
- hello->kshm_ctype = le32_to_cpu (hdr->msg.hello.type);
- hello->kshm_nips = le32_to_cpu (hdr->payload_length) /
- sizeof (__u32);
-
- if (hello->kshm_nips > LNET_MAX_INTERFACES) {
- CERROR("Bad nips %d from ip %u.%u.%u.%u\n",
- hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr));
- rc = -EPROTO;
- goto out;
- }
-
- if (hello->kshm_nips == 0)
- goto out;
-
- rc = libcfs_sock_read(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32), timeout);
- if (rc != 0) {
- CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n",
- rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0 && rc != -EALREADY);
- goto out;
- }
-
- for (i = 0; i < hello->kshm_nips; i++) {
- hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
-
- if (hello->kshm_ips[i] == 0) {
- CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n",
- i, HIPQUAD(conn->ksnc_ipaddr));
- rc = -EPROTO;
- break;
- }
- }
-out:
- LIBCFS_FREE(hdr, sizeof(*hdr));
-
- return rc;
-}
-
-static int
-ksocknal_recv_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout)
-{
- cfs_socket_t *sock = conn->ksnc_sock;
- int rc;
- int i;
-
- if (hello->kshm_magic == LNET_PROTO_MAGIC)
- conn->ksnc_flip = 0;
- else
- conn->ksnc_flip = 1;
-
- rc = libcfs_sock_read(sock, &hello->kshm_src_nid,
- offsetof(ksock_hello_msg_t, kshm_ips) -
- offsetof(ksock_hello_msg_t, kshm_src_nid),
- timeout);
- if (rc != 0) {
- CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
- rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0 && rc != -EALREADY);
- return rc;
- }
-
- if (conn->ksnc_flip) {
- __swab32s(&hello->kshm_src_pid);
- __swab64s(&hello->kshm_src_nid);
- __swab32s(&hello->kshm_dst_pid);
- __swab64s(&hello->kshm_dst_nid);
- __swab64s(&hello->kshm_src_incarnation);
- __swab64s(&hello->kshm_dst_incarnation);
- __swab32s(&hello->kshm_ctype);
- __swab32s(&hello->kshm_nips);
- }
-
- if (hello->kshm_nips > LNET_MAX_INTERFACES) {
- CERROR("Bad nips %d from ip %u.%u.%u.%u\n",
- hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr));
- return -EPROTO;
- }
-
- if (hello->kshm_nips == 0)
- return 0;
-
- rc = libcfs_sock_read(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32), timeout);
- if (rc != 0) {
- CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n",
- rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0 && rc != -EALREADY);
- return rc;
- }
-
- for (i = 0; i < hello->kshm_nips; i++) {
- if (conn->ksnc_flip)
- __swab32s(&hello->kshm_ips[i]);
-
- if (hello->kshm_ips[i] == 0) {
- CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n",
- i, HIPQUAD(conn->ksnc_ipaddr));
- return -EPROTO;
- }
- }
-
- return 0;
-}
-
-static void
-ksocknal_pack_msg_v1(ksock_tx_t *tx)
-{
- /* V1.x has no KSOCK_MSG_NOOP */
- LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
- LASSERT(tx->tx_lnetmsg != NULL);
-
- tx->tx_iov[0].iov_base = (void *)&tx->tx_lnetmsg->msg_hdr;
- tx->tx_iov[0].iov_len = sizeof(lnet_hdr_t);
-
- tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
-}
-
-static void
-ksocknal_pack_msg_v2(ksock_tx_t *tx)
-{
- tx->tx_iov[0].iov_base = (void *)&tx->tx_msg;
-
- if (tx->tx_lnetmsg != NULL) {
- LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
-
- tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
- tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload);
- tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload) +
- tx->tx_lnetmsg->msg_len;
- } else {
- LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
- tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
- tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
- }
- /* Don't checksum before start sending, because packet can be piggybacked with ACK */
-}
-
-static void
-ksocknal_unpack_msg_v1(ksock_msg_t *msg)
-{
- msg->ksm_type = KSOCK_MSG_LNET;
- msg->ksm_csum = 0;
- msg->ksm_zc_req_cookie = 0;
- msg->ksm_zc_ack_cookie = 0;
-}
-
-static void
-ksocknal_unpack_msg_v2(ksock_msg_t *msg)
-{
- return; /* Do nothing */
-}
-
-ksock_proto_t ksocknal_protocol_v1x =
-{
- KSOCK_PROTO_V1,
- ksocknal_send_hello_v1,
- ksocknal_recv_hello_v1,
- ksocknal_pack_msg_v1,
- ksocknal_unpack_msg_v1
-};
-
-ksock_proto_t ksocknal_protocol_v2x =
-{
- KSOCK_PROTO_V2,
- ksocknal_send_hello_v2,
- ksocknal_recv_hello_v2,
- ksocknal_pack_msg_v2,
- ksocknal_unpack_msg_v2
-};
-
-int
-ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn,
- lnet_nid_t peer_nid, ksock_hello_msg_t *hello)
-{
- /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
- ksock_net_t *net = (ksock_net_t *)ni->ni_data;
- lnet_nid_t srcnid;
-
- LASSERT (0 <= hello->kshm_nips && hello->kshm_nips <= LNET_MAX_INTERFACES);
-
- /* rely on caller to hold a ref on socket so it wouldn't disappear */
- LASSERT (conn->ksnc_proto != NULL);
-
- srcnid = lnet_ptlcompat_srcnid(ni->ni_nid, peer_nid);
-
- hello->kshm_src_nid = srcnid;
- hello->kshm_dst_nid = peer_nid;
- hello->kshm_src_pid = the_lnet.ln_pid;
-
- hello->kshm_src_incarnation = net->ksnn_incarnation;
- hello->kshm_ctype = conn->ksnc_type;
-
- return conn->ksnc_proto->pro_send_hello(conn, hello);
-}
-
-int
-ksocknal_invert_type(int type)
-{
- switch (type)
- {
- case SOCKLND_CONN_ANY:
- case SOCKLND_CONN_CONTROL:
- return (type);
- case SOCKLND_CONN_BULK_IN:
- return SOCKLND_CONN_BULK_OUT;
- case SOCKLND_CONN_BULK_OUT:
- return SOCKLND_CONN_BULK_IN;
- default:
- return (SOCKLND_CONN_NONE);
- }
-}
-
-int
-ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
- ksock_hello_msg_t *hello, lnet_process_id_t *peerid,
- __u64 *incarnation)
-{
- /* Return < 0 fatal error
- * 0 success
- * EALREADY lost connection race
- * EPROTO protocol version mismatch
- */
- cfs_socket_t *sock = conn->ksnc_sock;
- int active = (conn->ksnc_proto != NULL);
- int timeout;
- int proto_match;
- int rc;
- ksock_proto_t *proto;
- lnet_process_id_t recv_id;
-
- /* socket type set on active connections - not set on passive */
- LASSERT (!active == !(conn->ksnc_type != SOCKLND_CONN_NONE));
-
- timeout = active ? *ksocknal_tunables.ksnd_timeout :
- lnet_acceptor_timeout();
-
- rc = libcfs_sock_read(sock, &hello->kshm_magic, sizeof (hello->kshm_magic), timeout);
- if (rc != 0) {
- CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
- rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0);
- return rc;
- }
-
- if (hello->kshm_magic != LNET_PROTO_MAGIC &&
- hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) &&
- hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) {
- /* Unexpected magic! */
- if (active ||
- the_lnet.ln_ptlcompat == 0) {
- CERROR ("Bad magic(1) %#08x (%#08x expected) from "
- "%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic),
- LNET_PROTO_TCP_MAGIC,
- HIPQUAD(conn->ksnc_ipaddr));
- return -EPROTO;
- }
-
- /* When portals compatibility is set, I may be passed a new
- * connection "blindly" by the acceptor, and I have to
- * determine if my peer has sent an acceptor connection request
- * or not. This isn't a 'hello', so I'll get the acceptor to
- * look at it... */
- rc = lnet_accept(ni, sock, hello->kshm_magic);
- if (rc != 0)
- return -EPROTO;
-
- /* ...and if it's OK I'm back to looking for a 'hello'... */
- rc = libcfs_sock_read(sock, &hello->kshm_magic,
- sizeof (hello->kshm_magic), timeout);
- if (rc != 0) {
- CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
- rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0);
- return rc;
- }
-
- /* Only need to check V1.x magic */
- if (hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) {
- CERROR ("Bad magic(2) %#08x (%#08x expected) from "
- "%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic),
- LNET_PROTO_TCP_MAGIC,
- HIPQUAD(conn->ksnc_ipaddr));
- return -EPROTO;
- }
- }
-
- rc = libcfs_sock_read(sock, &hello->kshm_version,
- sizeof(hello->kshm_version), timeout);
- if (rc != 0) {
- CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
- rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0);
- return rc;
- }
-
- proto = ksocknal_parse_proto_version(hello);
- if (proto == NULL) {
- if (!active) {
- /* unknown protocol from peer, tell peer my protocol */
- conn->ksnc_proto = &ksocknal_protocol_v2x;
-#if SOCKNAL_VERSION_DEBUG
- if (*ksocknal_tunables.ksnd_protocol != 2)
- conn->ksnc_proto = &ksocknal_protocol_v1x;
-#endif
- hello->kshm_nips = 0;
- ksocknal_send_hello(ni, conn, ni->ni_nid, hello);
- }
-
- CERROR ("Unknown protocol version (%d.x expected)"
- " from %u.%u.%u.%u\n",
- conn->ksnc_proto->pro_version,
- HIPQUAD(conn->ksnc_ipaddr));
-
- return -EPROTO;
- }
-
- proto_match = (conn->ksnc_proto == proto);
- conn->ksnc_proto = proto;
-
- /* receive the rest of hello message anyway */
- rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout);
- if (rc != 0) {
- CERROR("Error %d reading or checking hello from from %u.%u.%u.%u\n",
- rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0);
- return rc;
- }
-
- *incarnation = hello->kshm_src_incarnation;
-
- if (hello->kshm_src_nid == LNET_NID_ANY) {
- CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY"
- "from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr));
- return -EPROTO;
- }
-
- if (!active &&
- conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
- /* Userspace NAL assigns peer process ID from socket */
- recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG;
- recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), conn->ksnc_ipaddr);
- } else {
- recv_id.nid = hello->kshm_src_nid;
-
- if (the_lnet.ln_ptlcompat > 1 && /* portals peers may exist */
- LNET_NIDNET(recv_id.nid) == 0) /* this is one */
- recv_id.pid = the_lnet.ln_pid; /* give it a sensible pid */
- else
- recv_id.pid = hello->kshm_src_pid;
-
- }
-
- if (!active) {
- *peerid = recv_id;
-
- /* peer determines type */
- conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
- if (conn->ksnc_type == SOCKLND_CONN_NONE) {
- CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n",
- hello->kshm_ctype, libcfs_id2str(*peerid),
- HIPQUAD(conn->ksnc_ipaddr));
- return -EPROTO;
- }
-
- return 0;
- }
-
- if (peerid->pid != recv_id.pid ||
- !lnet_ptlcompat_matchnid(peerid->nid, recv_id.nid)) {
- LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host"
- " %u.%u.%u.%u, but they claimed they were "
- "%s; please check your Lustre "
- "configuration.\n",
- libcfs_id2str(*peerid),
- HIPQUAD(conn->ksnc_ipaddr),
- libcfs_id2str(recv_id));
- return -EPROTO;
- }
-
- if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
- /* Possible protocol mismatch or I lost the connection race */
- return proto_match ? EALREADY : EPROTO;
- }
-
- if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
- CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n",
- conn->ksnc_type, libcfs_id2str(*peerid),
- HIPQUAD(conn->ksnc_ipaddr),
- hello->kshm_ctype);
- return -EPROTO;
- }
-
- return 0;
-}
-
-void
-ksocknal_connect (ksock_route_t *route)
-{
- CFS_LIST_HEAD (zombies);
- ksock_peer_t *peer = route->ksnr_peer;
- int type;
- int wanted;
- cfs_socket_t *sock;
- cfs_time_t deadline;
- int retry_later = 0;
- int rc = 0;
-
- deadline = cfs_time_add(cfs_time_current(),
- cfs_time_seconds(*ksocknal_tunables.ksnd_timeout));
-
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
- LASSERT (route->ksnr_scheduled);
- LASSERT (!route->ksnr_connecting);
-
- route->ksnr_connecting = 1;
-
- for (;;) {
- wanted = ksocknal_route_mask() & ~route->ksnr_connected;
-
- /* stop connecting if peer/route got closed under me, or
- * route got connected while queued */
- if (peer->ksnp_closing || route->ksnr_deleted ||
- wanted == 0) {
- retry_later = 0;
- break;
- }
-
- /* reschedule if peer is connecting to me */
- if (peer->ksnp_accepting > 0) {
- CDEBUG(D_NET,
- "peer %s(%d) already connecting to me, retry later.\n",
- libcfs_nid2str(peer->ksnp_id.nid), peer->ksnp_accepting);
- retry_later = 1;
- }
-
- if (retry_later) /* needs reschedule */
- break;
-
- if ((wanted & (1 << SOCKLND_CONN_ANY)) != 0) {
- type = SOCKLND_CONN_ANY;
- } else if ((wanted & (1 << SOCKLND_CONN_CONTROL)) != 0) {
- type = SOCKLND_CONN_CONTROL;
- } else if ((wanted & (1 << SOCKLND_CONN_BULK_IN)) != 0) {
- type = SOCKLND_CONN_BULK_IN;
- } else {
- LASSERT ((wanted & (1 << SOCKLND_CONN_BULK_OUT)) != 0);
- type = SOCKLND_CONN_BULK_OUT;
- }
-
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
- if (cfs_time_aftereq(cfs_time_current(), deadline)) {
- rc = -ETIMEDOUT;
- lnet_connect_console_error(rc, peer->ksnp_id.nid,
- route->ksnr_ipaddr,
- route->ksnr_port);
- goto failed;
- }
-
- rc = lnet_connect(&sock, peer->ksnp_id.nid,
- route->ksnr_myipaddr,
- route->ksnr_ipaddr, route->ksnr_port);
- if (rc != 0)
- goto failed;
-
- rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
- if (rc < 0) {
- lnet_connect_console_error(rc, peer->ksnp_id.nid,
- route->ksnr_ipaddr,
- route->ksnr_port);
- goto failed;
- }
-
- /* A +ve RC means I have to retry because I lost the connection
- * race or I have to renegotiate protocol version */
- retry_later = (rc != 0);
- if (retry_later)
- CDEBUG(D_NET, "peer %s: conn race, retry later.\n",
- libcfs_nid2str(peer->ksnp_id.nid));
-
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
- }
-
- route->ksnr_scheduled = 0;
- route->ksnr_connecting = 0;
-
- if (retry_later) {
- /* re-queue for attention; this frees me up to handle
- * the peer's incoming connection request */
- ksocknal_launch_connection_locked(route);
- }
-
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
- return;
-
- failed:
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
- route->ksnr_scheduled = 0;
- route->ksnr_connecting = 0;
-
- /* This is a retry rather than a new connection */
- route->ksnr_retry_interval *= 2;
- route->ksnr_retry_interval =
- MAX(route->ksnr_retry_interval,
- cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000);
- route->ksnr_retry_interval =
- MIN(route->ksnr_retry_interval,
- cfs_time_seconds(*ksocknal_tunables.ksnd_max_reconnectms)/1000);
-
- LASSERT (route->ksnr_retry_interval != 0);
- route->ksnr_timeout = cfs_time_add(cfs_time_current(),
- route->ksnr_retry_interval);
-
- if (!list_empty(&peer->ksnp_tx_queue) &&
- peer->ksnp_accepting == 0 &&
- ksocknal_find_connecting_route_locked(peer) == NULL) {
- /* ksnp_tx_queue is queued on a conn on successful
- * connection */
- LASSERT (list_empty (&peer->ksnp_conns));
-
- /* take all the blocked packets while I've got the lock and
- * complete below... */
- list_add(&zombies, &peer->ksnp_tx_queue);
- list_del_init(&peer->ksnp_tx_queue);
- }
-
-#if 0 /* irrelevent with only eager routes */
- if (!route->ksnr_deleted) {
- /* make this route least-favourite for re-selection */
- list_del(&route->ksnr_list);
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
- }
-#endif
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
- ksocknal_peer_failed(peer);
- ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1);
-}
-
-static inline int
-ksocknal_connd_connect_route_locked(void)
-{
- /* Only handle an outgoing connection request if there is someone left
- * to handle incoming connections */
- return !list_empty(&ksocknal_data.ksnd_connd_routes) &&
- ((ksocknal_data.ksnd_connd_connecting + 1) <
- *ksocknal_tunables.ksnd_nconnds);
-}
-
-static inline int
-ksocknal_connd_ready(void)
-{
- int rc;
-
- spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
-
- rc = ksocknal_data.ksnd_shuttingdown ||
- !list_empty(&ksocknal_data.ksnd_connd_connreqs) ||
- ksocknal_connd_connect_route_locked();
-
- spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
-
- return rc;
-}
-
-int
-ksocknal_connd (void *arg)
-{
- long id = (long)arg;
- char name[16];
- ksock_connreq_t *cr;
- ksock_route_t *route;
-
- snprintf (name, sizeof (name), "socknal_cd%02ld", id);
- cfs_daemonize (name);
- cfs_block_allsigs ();
-
- spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
-
- while (!ksocknal_data.ksnd_shuttingdown) {
-
- if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
- /* Connection accepted by the listener */
- cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next,
- ksock_connreq_t, ksncr_list);
-
- list_del(&cr->ksncr_list);
- spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
-
- ksocknal_create_conn(cr->ksncr_ni, NULL,
- cr->ksncr_sock, SOCKLND_CONN_NONE);
- lnet_ni_decref(cr->ksncr_ni);
- LIBCFS_FREE(cr, sizeof(*cr));
-
- spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
- }
-
- if (ksocknal_connd_connect_route_locked()) {
- /* Connection request */
- route = list_entry (ksocknal_data.ksnd_connd_routes.next,
- ksock_route_t, ksnr_connd_list);
-
- list_del (&route->ksnr_connd_list);
- ksocknal_data.ksnd_connd_connecting++;
- spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
-
- ksocknal_connect (route);
- ksocknal_route_decref(route);
-
- spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
- ksocknal_data.ksnd_connd_connecting--;
- }
-
- spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
-
- wait_event_interruptible_exclusive(
- ksocknal_data.ksnd_connd_waitq,
- ksocknal_connd_ready());
-
- spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
- }
-
- spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
-
- ksocknal_thread_fini ();
- return (0);
-}
-
-ksock_conn_t *
-ksocknal_find_timed_out_conn (ksock_peer_t *peer)
-{
- /* We're called with a shared lock on ksnd_global_lock */
- ksock_conn_t *conn;
- struct list_head *ctmp;
-
- list_for_each (ctmp, &peer->ksnp_conns) {
- int error;
- conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
-
- /* Don't need the {get,put}connsock dance to deref ksnc_sock */
- LASSERT (!conn->ksnc_closing);
-
- /* SOCK_ERROR will reset error code of socket in
- * some platform (like Darwin8.x) */
- error = SOCK_ERROR(conn->ksnc_sock);
- if (error != 0) {
- ksocknal_conn_addref(conn);
-
- switch (error) {
- case ECONNRESET:
- CDEBUG(D_NETERROR, "A connection with %s "
- "(%u.%u.%u.%u:%d) was reset; "
- "it may have rebooted.\n",
- libcfs_id2str(peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr),
- conn->ksnc_port);
- break;
- case ETIMEDOUT:
- CDEBUG(D_NETERROR, "A connection with %s "
- "(%u.%u.%u.%u:%d) timed out; the "
- "network or node may be down.\n",
- libcfs_id2str(peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr),
- conn->ksnc_port);
- break;
- default:
- CDEBUG(D_NETERROR, "An unexpected network error %d "
- "occurred with %s "
- "(%u.%u.%u.%u:%d\n", error,
- libcfs_id2str(peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr),
- conn->ksnc_port);
- break;
- }
-
- return (conn);
- }
-
- if (conn->ksnc_rx_started &&
- cfs_time_aftereq(cfs_time_current(),
- conn->ksnc_rx_deadline)) {
- /* Timed out incomplete incoming message */
- ksocknal_conn_addref(conn);
- CDEBUG(D_NETERROR, "Timeout receiving from %s "
- "(%u.%u.%u.%u:%d), state %d wanted %d left %d\n",
- libcfs_id2str(peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr),
- conn->ksnc_port,
- conn->ksnc_rx_state,
- conn->ksnc_rx_nob_wanted,
- conn->ksnc_rx_nob_left);
- return (conn);
- }
-
- if ((!list_empty(&conn->ksnc_tx_queue) ||
- SOCK_WMEM_QUEUED(conn->ksnc_sock) != 0) &&
- cfs_time_aftereq(cfs_time_current(),
- conn->ksnc_tx_deadline)) {
- /* Timed out messages queued for sending or
- * buffered in the socket's send buffer */
- ksocknal_conn_addref(conn);
- CDEBUG(D_NETERROR, "Timeout sending data to %s "
- "(%u.%u.%u.%u:%d) the network or that "
- "node may be down.\n",
- libcfs_id2str(peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr),
- conn->ksnc_port);
- return (conn);
- }
- }
-
- return (NULL);
-}
-
-void
-ksocknal_check_peer_timeouts (int idx)
-{
- struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
- struct list_head *ptmp;
- ksock_peer_t *peer;
- ksock_conn_t *conn;
-
- again:
- /* NB. We expect to have a look at all the peers and not find any
- * connections to time out, so we just use a shared lock while we
- * take a look... */
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- list_for_each (ptmp, peers) {
- peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
- conn = ksocknal_find_timed_out_conn (peer);
-
- if (conn != NULL) {
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
-
- /* NB we won't find this one again, but we can't
- * just proceed with the next peer, since we dropped
- * ksnd_global_lock and it might be dead already! */
- ksocknal_conn_decref(conn);
- goto again;
- }
- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
-}
-
-int
-ksocknal_reaper (void *arg)
-{
- cfs_waitlink_t wait;
- ksock_conn_t *conn;
- ksock_sched_t *sched;
- struct list_head enomem_conns;
- int nenomem_conns;
- cfs_duration_t timeout;
- int i;
- int peer_index = 0;
- cfs_time_t deadline = cfs_time_current();
-
- cfs_daemonize ("socknal_reaper");
- cfs_block_allsigs ();
-
- CFS_INIT_LIST_HEAD(&enomem_conns);
- cfs_waitlink_init (&wait);
-
- spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
-
- while (!ksocknal_data.ksnd_shuttingdown) {
-
- if (!list_empty (&ksocknal_data.ksnd_deathrow_conns)) {
- conn = list_entry (ksocknal_data.ksnd_deathrow_conns.next,
- ksock_conn_t, ksnc_list);
- list_del (&conn->ksnc_list);
-
- spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
-
- ksocknal_terminate_conn (conn);
- ksocknal_conn_decref(conn);
-
- spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
- continue;
- }
-
- if (!list_empty (&ksocknal_data.ksnd_zombie_conns)) {
- conn = list_entry (ksocknal_data.ksnd_zombie_conns.next,
- ksock_conn_t, ksnc_list);
- list_del (&conn->ksnc_list);
-
- spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
-
- ksocknal_destroy_conn (conn);
-
- spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
- continue;
- }
-
- if (!list_empty (&ksocknal_data.ksnd_enomem_conns)) {
- list_add(&enomem_conns, &ksocknal_data.ksnd_enomem_conns);
- list_del_init(&ksocknal_data.ksnd_enomem_conns);
- }
-
- spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
-
- /* reschedule all the connections that stalled with ENOMEM... */
- nenomem_conns = 0;
- while (!list_empty (&enomem_conns)) {
- conn = list_entry (enomem_conns.next,
- ksock_conn_t, ksnc_tx_list);
- list_del (&conn->ksnc_tx_list);
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh (&sched->kss_lock);
-
- LASSERT (conn->ksnc_tx_scheduled);
- conn->ksnc_tx_ready = 1;
- list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns);
- cfs_waitq_signal (&sched->kss_waitq);
-
- spin_unlock_bh (&sched->kss_lock);
- nenomem_conns++;
- }
-
- /* careful with the jiffy wrap... */
- while ((timeout = cfs_time_sub(deadline,
- cfs_time_current())) <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = ksocknal_data.ksnd_peer_hash_size;
-
- /* Time to check for timeouts on a few more peers: I do
- * checks every 'p' seconds on a proportion of the peer
- * table and I need to check every connection 'n' times
- * within a timeout interval, to ensure I detect a
- * timeout on any connection within (n+1)/n times the
- * timeout interval. */
-
- if (*ksocknal_tunables.ksnd_timeout > n * p)
- chunk = (chunk * n * p) /
- *ksocknal_tunables.ksnd_timeout;
- if (chunk == 0)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- ksocknal_check_peer_timeouts (peer_index);
- peer_index = (peer_index + 1) %
- ksocknal_data.ksnd_peer_hash_size;
- }
-
- deadline = cfs_time_add(deadline, cfs_time_seconds(p));
- }
-
- if (nenomem_conns != 0) {
- /* Reduce my timeout if I rescheduled ENOMEM conns.
- * This also prevents me getting woken immediately
- * if any go back on my enomem list. */
- timeout = SOCKNAL_ENOMEM_RETRY;
- }
- ksocknal_data.ksnd_reaper_waketime =
- cfs_time_add(cfs_time_current(), timeout);
-
- set_current_state (TASK_INTERRUPTIBLE);
- cfs_waitq_add (&ksocknal_data.ksnd_reaper_waitq, &wait);
-
- if (!ksocknal_data.ksnd_shuttingdown &&
- list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
- list_empty (&ksocknal_data.ksnd_zombie_conns))
- cfs_waitq_timedwait (&wait, CFS_TASK_INTERRUPTIBLE, timeout);
-
- set_current_state (TASK_RUNNING);
- cfs_waitq_del (&ksocknal_data.ksnd_reaper_waitq, &wait);
-
- spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
- }
-
- spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
-
- ksocknal_thread_fini ();
- return (0);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Darwin porting library
- * Make things easy to port
- */
-#include <mach/mach_types.h>
-#include <string.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <sys/file.h>
-
-#include "socklnd.h"
-
-# if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
-
-SYSCTL_DECL(_lnet);
-
-SYSCTL_NODE (_lnet, OID_AUTO, ksocknal, CTLFLAG_RW,
- 0, "ksocknal_sysctl");
-
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, timeout,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_timeout,
- 0, "timeout");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, credits,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_credits,
- 0, "credits");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, peer_credits,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_peercredits,
- 0, "peer_credits");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nconnds,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nconnds,
- 0, "nconnds");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_reconnectms,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_reconnectms,
- 0, "min_reconnectms");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, max_reconnectms,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_max_reconnectms,
- 0, "max_reconnectms");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, eager_ack,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_eager_ack,
- 0, "eager_ack");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, typed,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_typed_conns,
- 0, "typed");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_bulk,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_bulk,
- 0, "min_bulk");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, rx_buffer_size,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_rx_buffer_size,
- 0, "rx_buffer_size");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, tx_buffer_size,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_tx_buffer_size,
- 0, "tx_buffer_size");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nagle,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nagle,
- 0, "nagle");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_idle,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_idle,
- 0, "keepalive_idle");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_count,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_count,
- 0, "keepalive_count");
-SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_intvl,
- CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_intvl,
- 0, "keepalive_intvl");
-
-cfs_sysctl_table_t ksocknal_top_ctl_table [] = {
- &sysctl__lnet_ksocknal,
- &sysctl__lnet_ksocknal_timeout,
- &sysctl__lnet_ksocknal_credits,
- &sysctl__lnet_ksocknal_peer_credits,
- &sysctl__lnet_ksocknal_nconnds,
- &sysctl__lnet_ksocknal_min_reconnectms,
- &sysctl__lnet_ksocknal_max_reconnectms,
- &sysctl__lnet_ksocknal_eager_ack,
- &sysctl__lnet_ksocknal_typed,
- &sysctl__lnet_ksocknal_min_bulk,
- &sysctl__lnet_ksocknal_rx_buffer_size,
- &sysctl__lnet_ksocknal_tx_buffer_size,
- &sysctl__lnet_ksocknal_nagle,
- &sysctl__lnet_ksocknal_keepalive_idle,
- &sysctl__lnet_ksocknal_keepalive_count,
- &sysctl__lnet_ksocknal_keepalive_intvl,
- NULL
-};
-
-int
-ksocknal_lib_tunables_init ()
-{
- ksocknal_tunables.ksnd_sysctl =
- cfs_register_sysctl_table (ksocknal_top_ctl_table, 0);
-
- if (ksocknal_tunables.ksnd_sysctl == NULL)
- return -ENOMEM;
-
- return 0;
-}
-
-void
-ksocknal_lib_tunables_fini ()
-{
- if (ksocknal_tunables.ksnd_sysctl != NULL)
- cfs_unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl);
-}
-#else
-int
-ksocknal_lib_tunables_init ()
-{
- return 0;
-}
-
-void
-ksocknal_lib_tunables_fini ()
-{
-}
-#endif
-
-/*
- * To use bigger buffer for socket:
- * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so
- * we must patch kernel).
- * 2. Increase net.inet.tcp.reass.maxsegments
- * 3. Increase net.inet.tcp.sendspace
- * 4. Increase net.inet.tcp.recvspace
- * 5. Increase kern.ipc.maxsockbuf
- */
-#define KSOCKNAL_MAX_BUFFER (1152*1024)
-
-void
-ksocknal_lib_bind_irq (unsigned int irq)
-{
- return;
-}
-
-unsigned int
-ksocknal_lib_sock_irq (cfs_socket_t *sock)
-{
- return 0;
-}
-
-int
-ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
-{
- int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
- &conn->ksnc_ipaddr,
- &conn->ksnc_port);
-
- /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
- LASSERT (!conn->ksnc_closing);
-
- if (rc != 0) {
- CERROR ("Error %d getting sock peer IP\n", rc);
- return rc;
- }
-
- rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
- &conn->ksnc_myipaddr, NULL);
- if (rc != 0) {
- CERROR ("Error %d getting sock local IP\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-#ifdef __DARWIN8__
-
-int
-ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- socket_t sock = C2B_SOCK(conn->ksnc_sock);
- size_t sndlen;
- int nob;
- int rc;
-
-#if SOCKNAL_SINGLE_FRAG_TX
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
- unsigned int niov = tx->tx_niov;
-#endif
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = MSG_DONTWAIT
- };
-
- int i;
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = tx->tx_iov[i];
- nob += scratchiov[i].iov_len;
- }
-
- /*
- * XXX Liang:
- * Linux has MSG_MORE, do we have anything to
- * reduce number of partial TCP segments sent?
- */
- rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen);
- if (rc == 0)
- rc = sndlen;
- return rc;
-}
-
-int
-ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- socket_t sock = C2B_SOCK(conn->ksnc_sock);
- lnet_kiov_t *kiov = tx->tx_kiov;
- int rc;
- int nob;
- size_t sndlen;
-
-#if SOCKNAL_SINGLE_FRAG_TX
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
- unsigned int niov = tx->tx_nkiov;
-#endif
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = MSG_DONTWAIT
- };
-
- int i;
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) +
- kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
-
- /*
- * XXX Liang:
- * Linux has MSG_MORE, do wen have anyting to
- * reduce number of partial TCP segments sent?
- */
- rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen);
- for (i = 0; i < niov; i++)
- cfs_kunmap(kiov[i].kiov_page);
- if (rc == 0)
- rc = sndlen;
- return rc;
-}
-
-int
-ksocknal_lib_recv_iov (ksock_conn_t *conn)
-{
-#if SOCKNAL_SINGLE_FRAG_RX
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
- unsigned int niov = conn->ksnc_rx_niov;
-#endif
- struct iovec *iov = conn->ksnc_rx_iov;
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
- size_t rcvlen;
- int nob;
- int i;
- int rc;
-
- LASSERT (niov > 0);
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = iov[i];
- nob += scratchiov[i].iov_len;
- }
- LASSERT (nob <= conn->ksnc_rx_nob_wanted);
- rc = -sock_receive (C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen);
- if (rc == 0)
- rc = rcvlen;
-
- return rc;
-}
-
-int
-ksocknal_lib_recv_kiov (ksock_conn_t *conn)
-{
-#if SOCKNAL_SINGLE_FRAG_RX
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
- unsigned int niov = conn->ksnc_rx_nkiov;
-#endif
- lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
- int nob;
- int i;
- size_t rcvlen;
- int rc;
-
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + \
- kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
- LASSERT (nob <= conn->ksnc_rx_nob_wanted);
- rc = -sock_receive(C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen);
- for (i = 0; i < niov; i++)
- cfs_kunmap(kiov[i].kiov_page);
- if (rc == 0)
- rc = rcvlen;
- return (rc);
-}
-
-void
-ksocknal_lib_eager_ack (ksock_conn_t *conn)
-{
- /* XXX Liang: */
-}
-
-int
-ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
-{
- socket_t sock = C2B_SOCK(conn->ksnc_sock);
- int len;
- int rc;
-
- rc = ksocknal_connsock_addref(conn);
- if (rc != 0) {
- LASSERT (conn->ksnc_closing);
- *txmem = *rxmem = *nagle = 0;
- return (-ESHUTDOWN);
- }
- rc = libcfs_sock_getbuf(conn->ksnc_sock, txmem, rxmem);
- if (rc == 0) {
- len = sizeof(*nagle);
- rc = -sock_getsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
- nagle, &len);
- }
- ksocknal_connsock_decref(conn);
-
- if (rc == 0)
- *nagle = !*nagle;
- else
- *txmem = *rxmem = *nagle = 0;
-
- return (rc);
-}
-
-int
-ksocknal_lib_setup_sock (cfs_socket_t *sock)
-{
- int rc;
- int option;
- int keep_idle;
- int keep_intvl;
- int keep_count;
- int do_keepalive;
- socket_t so = C2B_SOCK(sock);
- struct linger linger;
-
- /* Ensure this socket aborts active sends immediately when we close
- * it. */
- linger.l_onoff = 0;
- linger.l_linger = 0;
- rc = -sock_setsockopt(so, SOL_SOCKET, SO_LINGER, &linger, sizeof(linger));
- if (rc != 0) {
- CERROR ("Can't set SO_LINGER: %d\n", rc);
- return (rc);
- }
-
- if (!*ksocknal_tunables.ksnd_nagle) {
- option = 1;
- rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option));
- if (rc != 0) {
- CERROR ("Can't disable nagle: %d\n", rc);
- return (rc);
- }
- }
-
- rc = libcfs_sock_setbuf(sock,
- *ksocknal_tunables.ksnd_tx_buffer_size,
- *ksocknal_tunables.ksnd_rx_buffer_size);
- if (rc != 0) {
- CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
- *ksocknal_tunables.ksnd_tx_buffer_size,
- *ksocknal_tunables.ksnd_rx_buffer_size, rc);
- return (rc);
- }
-
- /* snapshot tunables */
- keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
- keep_count = *ksocknal_tunables.ksnd_keepalive_count;
- keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
-
- do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
- option = (do_keepalive ? 1 : 0);
-
- rc = -sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &option, sizeof(option));
- if (rc != 0) {
- CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
- return (rc);
- }
-
- if (!do_keepalive)
- return (rc);
- rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_KEEPALIVE,
- &keep_idle, sizeof(keep_idle));
-
- return (rc);
-}
-
-void
-ksocknal_lib_push_conn(ksock_conn_t *conn)
-{
- socket_t sock;
- int val = 1;
- int rc;
-
- rc = ksocknal_connsock_addref(conn);
- if (rc != 0) /* being shut down */
- return;
- sock = C2B_SOCK(conn->ksnc_sock);
-
- rc = -sock_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val));
- LASSERT(rc == 0);
-
- ksocknal_connsock_decref(conn);
- return;
-}
-
-extern void ksocknal_read_callback (ksock_conn_t *conn);
-extern void ksocknal_write_callback (ksock_conn_t *conn);
-
-static void
-ksocknal_upcall(socket_t so, void *arg, int waitf)
-{
- ksock_conn_t *conn = (ksock_conn_t *)arg;
- ENTRY;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
- if (conn == NULL)
- goto out;
-
- ksocknal_read_callback (conn);
- /* XXX Liang */
- ksocknal_write_callback (conn);
-out:
- read_unlock (&ksocknal_data.ksnd_global_lock);
- EXIT;
-}
-
-void
-ksocknal_lib_save_callback(cfs_socket_t *sock, ksock_conn_t *conn)
-{
- /* No callback need to save in osx */
- return;
-}
-
-void
-ksocknal_lib_set_callback(cfs_socket_t *sock, ksock_conn_t *conn)
-{
- libcfs_sock_set_cb(sock, ksocknal_upcall, (void *)conn);
- return;
-}
-
-void
-ksocknal_lib_reset_callback(cfs_socket_t *sock, ksock_conn_t *conn)
-{
- libcfs_sock_reset_cb(sock);
-}
-
-#else /* !__DARWIN8__ */
-
-int
-ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
-#if SOCKNAL_SINGLE_FRAG_TX
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
- unsigned int niov = tx->tx_niov;
-#endif
- struct socket *sock = conn->ksnc_sock;
- int nob;
- int rc;
- int i;
- struct uio suio = {
- .uio_iov = scratchiov,
- .uio_iovcnt = niov,
- .uio_offset = 0,
- .uio_resid = 0, /* This will be valued after a while */
- .uio_segflg = UIO_SYSSPACE,
- .uio_rw = UIO_WRITE,
- .uio_procp = NULL
- };
- int flags = MSG_DONTWAIT;
- CFS_DECL_NET_DATA;
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = tx->tx_iov[i];
- nob += scratchiov[i].iov_len;
- }
- suio.uio_resid = nob;
-
- CFS_NET_IN;
- rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags);
- CFS_NET_EX;
-
- /* NB there is no return value can indicate how many
- * have been sent and how many resid, we have to get
- * sent bytes from suio. */
- if (rc != 0) {
- if (suio.uio_resid != nob &&\
- (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
- /* We have sent something */
- rc = nob - suio.uio_resid;
- else if ( rc == EWOULDBLOCK )
- /* Actually, EAGAIN and EWOULDBLOCK have same value in OSX */
- rc = -EAGAIN;
- else
- rc = -rc;
- } else /* rc == 0 */
- rc = nob - suio.uio_resid;
-
- return rc;
-}
-
-int
-ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
-#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
- unsigned int niov = tx->tx_nkiov;
-#endif
- struct socket *sock = conn->ksnc_sock;
- lnet_kiov_t *kiov = tx->tx_kiov;
- int nob;
- int rc;
- int i;
- struct uio suio = {
- .uio_iov = scratchiov,
- .uio_iovcnt = niov,
- .uio_offset = 0,
- .uio_resid = 0, /* It should be valued after a while */
- .uio_segflg = UIO_SYSSPACE,
- .uio_rw = UIO_WRITE,
- .uio_procp = NULL
- };
- int flags = MSG_DONTWAIT;
- CFS_DECL_NET_DATA;
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) +
- kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
- suio.uio_resid = nob;
-
- CFS_NET_IN;
- rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags);
- CFS_NET_EX;
-
- for (i = 0; i < niov; i++)
- cfs_kunmap(kiov[i].kiov_page);
-
- if (rc != 0) {
- if (suio.uio_resid != nob &&\
- (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
- /* We have sent something */
- rc = nob - suio.uio_resid;
- else if ( rc == EWOULDBLOCK )
- /* EAGAIN and EWOULD BLOCK have same value in OSX */
- rc = -EAGAIN;
- else
- rc = -rc;
- } else /* rc == 0 */
- rc = nob - suio.uio_resid;
-
- return rc;
-}
-
-/*
- * liang: Hack of inpcb and tcpcb.
- * To get tcpcb of a socket, and call tcp_output
- * to send quick ack.
- */
-struct ks_tseg_qent{
- int foo;
-};
-
-struct ks_tcptemp{
- int foo;
-};
-
-LIST_HEAD(ks_tsegqe_head, ks_tseg_qent);
-
-struct ks_tcpcb {
- struct ks_tsegqe_head t_segq;
- int t_dupacks;
- struct ks_tcptemp *unused;
- int t_timer[4];
- struct inpcb *t_inpcb;
- int t_state;
- u_int t_flags;
- /*
- * There are more fields but we dont need
- * ......
- */
-};
-
-#define TF_ACKNOW 0x00001
-#define TF_DELACK 0x00002
-
-struct ks_inpcb {
- LIST_ENTRY(ks_inpcb) inp_hash;
- struct in_addr reserved1;
- struct in_addr reserved2;
- u_short inp_fport;
- u_short inp_lport;
- LIST_ENTRY(inpcb) inp_list;
- caddr_t inp_ppcb;
- /*
- * There are more fields but we dont need
- * ......
- */
-};
-
-#define ks_sotoinpcb(so) ((struct ks_inpcb *)(so)->so_pcb)
-#define ks_intotcpcb(ip) ((struct ks_tcpcb *)(ip)->inp_ppcb)
-#define ks_sototcpcb(so) (intotcpcb(sotoinpcb(so)))
-
-void
-ksocknal_lib_eager_ack (ksock_conn_t *conn)
-{
- struct socket *sock = conn->ksnc_sock;
- struct ks_inpcb *inp = ks_sotoinpcb(sock);
- struct ks_tcpcb *tp = ks_intotcpcb(inp);
- int s;
- CFS_DECL_NET_DATA;
-
- extern int tcp_output(register struct ks_tcpcb *tp);
-
- CFS_NET_IN;
- s = splnet();
-
- /*
- * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo
- * to send immediate ACK.
- */
- if (tp && tp->t_flags & TF_DELACK){
- tp->t_flags &= ~TF_DELACK;
- tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
- }
- splx(s);
-
- CFS_NET_EX;
-
- return;
-}
-
-int
-ksocknal_lib_recv_iov (ksock_conn_t *conn)
-{
-#if SOCKNAL_SINGLE_FRAG_RX
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
- unsigned int niov = conn->ksnc_rx_niov;
-#endif
- struct iovec *iov = conn->ksnc_rx_iov;
- int nob;
- int rc;
- int i;
- struct uio ruio = {
- .uio_iov = scratchiov,
- .uio_iovcnt = niov,
- .uio_offset = 0,
- .uio_resid = 0, /* It should be valued after a while */
- .uio_segflg = UIO_SYSSPACE,
- .uio_rw = UIO_READ,
- .uio_procp = NULL
- };
- int flags = MSG_DONTWAIT;
- CFS_DECL_NET_DATA;
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = iov[i];
- nob += scratchiov[i].iov_len;
- }
- LASSERT (nob <= conn->ksnc_rx_nob_wanted);
-
- ruio.uio_resid = nob;
-
- CFS_NET_IN;
- rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, &flags);
- CFS_NET_EX;
- if (rc){
- if (ruio.uio_resid != nob && \
- (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK || rc == EAGAIN))
- /* data particially received */
- rc = nob - ruio.uio_resid;
- else if (rc == EWOULDBLOCK)
- /* EAGAIN and EWOULD BLOCK have same value in OSX */
- rc = -EAGAIN;
- else
- rc = -rc;
- } else
- rc = nob - ruio.uio_resid;
-
- return (rc);
-}
-
-int
-ksocknal_lib_recv_kiov (ksock_conn_t *conn)
-{
-#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
- unsigned int niov = conn->ksnc_rx_nkiov;
-#endif
- lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
- int nob;
- int rc;
- int i;
- struct uio ruio = {
- .uio_iov = scratchiov,
- .uio_iovcnt = niov,
- .uio_offset = 0,
- .uio_resid = 0,
- .uio_segflg = UIO_SYSSPACE,
- .uio_rw = UIO_READ,
- .uio_procp = NULL
- };
- int flags = MSG_DONTWAIT;
- CFS_DECL_NET_DATA;
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
- LASSERT (nob <= conn->ksnc_rx_nob_wanted);
-
- ruio.uio_resid = nob;
-
- CFS_NET_IN;
- rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, NULL, &flags);
- CFS_NET_EX;
-
- for (i = 0; i < niov; i++)
- cfs_kunmap(kiov[i].kiov_page);
-
- if (rc){
- if (ruio.uio_resid != nob && \
- (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
- /* data particially received */
- rc = nob - ruio.uio_resid;
- else if (rc == EWOULDBLOCK)
- /* receive blocked, EWOULDBLOCK == EAGAIN */
- rc = -EAGAIN;
- else
- rc = -rc;
- } else
- rc = nob - ruio.uio_resid;
-
- return (rc);
-}
-
-int
-ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
-{
- struct socket *sock = conn->ksnc_sock;
- int rc;
-
- rc = ksocknal_connsock_addref(conn);
- if (rc != 0) {
- LASSERT (conn->ksnc_closing);
- *txmem = *rxmem = *nagle = 0;
- return -ESHUTDOWN;
- }
- rc = libcfs_sock_getbuf(sock, txmem, rxmem);
- if (rc == 0) {
- struct sockopt sopt;
- int len;
- CFS_DECL_NET_DATA;
-
- len = sizeof(*nagle);
- bzero(&sopt, sizeof sopt);
- sopt.sopt_dir = SOPT_GET;
- sopt.sopt_level = IPPROTO_TCP;
- sopt.sopt_name = TCP_NODELAY;
- sopt.sopt_val = nagle;
- sopt.sopt_valsize = len;
-
- CFS_NET_IN;
- rc = -sogetopt(sock, &sopt);
- CFS_NET_EX;
- }
-
- ksocknal_connsock_decref(conn);
-
- if (rc == 0)
- *nagle = !*nagle;
- else
- *txmem = *rxmem = *nagle = 0;
- return (rc);
-}
-
-int
-ksocknal_lib_setup_sock (struct socket *so)
-{
- struct sockopt sopt;
- int rc;
- int option;
- int keep_idle;
- int keep_intvl;
- int keep_count;
- int do_keepalive;
- struct linger linger;
- CFS_DECL_NET_DATA;
-
- rc = libcfs_sock_setbuf(so,
- *ksocknal_tunables.ksnd_tx_buffer_size,
- *ksocknal_tunables.ksnd_rx_buffer_size);
- if (rc != 0) {
- CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
- *ksocknal_tunables.ksnd_tx_buffer_size,
- *ksocknal_tunables.ksnd_rx_buffer_size, rc);
- return (rc);
- }
-
- /* Ensure this socket aborts active sends immediately when we close
- * it. */
- bzero(&sopt, sizeof sopt);
-
- linger.l_onoff = 0;
- linger.l_linger = 0;
- sopt.sopt_dir = SOPT_SET;
- sopt.sopt_level = SOL_SOCKET;
- sopt.sopt_name = SO_LINGER;
- sopt.sopt_val = &linger;
- sopt.sopt_valsize = sizeof(linger);
-
- CFS_NET_IN;
- rc = -sosetopt(so, &sopt);
- if (rc != 0) {
- CERROR ("Can't set SO_LINGER: %d\n", rc);
- goto out;
- }
-
- if (!*ksocknal_tunables.ksnd_nagle) {
- option = 1;
- bzero(&sopt, sizeof sopt);
- sopt.sopt_dir = SOPT_SET;
- sopt.sopt_level = IPPROTO_TCP;
- sopt.sopt_name = TCP_NODELAY;
- sopt.sopt_val = &option;
- sopt.sopt_valsize = sizeof(option);
- rc = -sosetopt(so, &sopt);
- if (rc != 0) {
- CERROR ("Can't disable nagle: %d\n", rc);
- goto out;
- }
- }
-
- /* snapshot tunables */
- keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
- keep_count = *ksocknal_tunables.ksnd_keepalive_count;
- keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
-
- do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
- option = (do_keepalive ? 1 : 0);
- bzero(&sopt, sizeof sopt);
- sopt.sopt_dir = SOPT_SET;
- sopt.sopt_level = SOL_SOCKET;
- sopt.sopt_name = SO_KEEPALIVE;
- sopt.sopt_val = &option;
- sopt.sopt_valsize = sizeof(option);
- rc = -sosetopt(so, &sopt);
- if (rc != 0) {
- CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
- goto out;
- }
-
- if (!do_keepalive) {
- /* no more setting, just return */
- rc = 0;
- goto out;
- }
-
- bzero(&sopt, sizeof sopt);
- sopt.sopt_dir = SOPT_SET;
- sopt.sopt_level = IPPROTO_TCP;
- sopt.sopt_name = TCP_KEEPALIVE;
- sopt.sopt_val = &keep_idle;
- sopt.sopt_valsize = sizeof(keep_idle);
- rc = -sosetopt(so, &sopt);
- if (rc != 0) {
- CERROR ("Can't set TCP_KEEPALIVE : %d\n", rc);
- goto out;
- }
-out:
- CFS_NET_EX;
- return (rc);
-}
-
-void
-ksocknal_lib_push_conn(ksock_conn_t *conn)
-{
- struct socket *sock;
- struct sockopt sopt;
- int val = 1;
- int rc;
- CFS_DECL_NET_DATA;
-
- rc = ksocknal_connsock_addref(conn);
- if (rc != 0) /* being shut down */
- return;
- sock = conn->ksnc_sock;
- bzero(&sopt, sizeof sopt);
- sopt.sopt_dir = SOPT_SET;
- sopt.sopt_level = IPPROTO_TCP;
- sopt.sopt_name = TCP_NODELAY;
- sopt.sopt_val = &val;
- sopt.sopt_valsize = sizeof val;
-
- CFS_NET_IN;
- sosetopt(sock, &sopt);
- CFS_NET_EX;
-
- ksocknal_connsock_decref(conn);
- return;
-}
-
-
-extern void ksocknal_read_callback (ksock_conn_t *conn);
-extern void ksocknal_write_callback (ksock_conn_t *conn);
-
-static void
-ksocknal_upcall(struct socket *so, caddr_t arg, int waitf)
-{
- ksock_conn_t *conn = (ksock_conn_t *)arg;
- ENTRY;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
- if (conn == NULL)
- goto out;
-
- if (so->so_rcv.sb_flags & SB_UPCALL) {
- extern int soreadable(struct socket *so);
- if (conn->ksnc_rx_nob_wanted && soreadable(so))
- /* To verify whether the upcall is for receive */
- ksocknal_read_callback (conn);
- }
- /* go foward? */
- if (so->so_snd.sb_flags & SB_UPCALL){
- extern int sowriteable(struct socket *so);
- if (sowriteable(so))
- /* socket is writable */
- ksocknal_write_callback(conn);
- }
-out:
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- EXIT;
-}
-
-void
-ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
-{
- /* No callback need to save in osx */
- return;
-}
-
-void
-ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
-{
- CFS_DECL_NET_DATA;
-
- CFS_NET_IN;
- sock->so_upcallarg = (void *)conn;
- sock->so_upcall = ksocknal_upcall;
- sock->so_snd.sb_timeo = 0;
- sock->so_rcv.sb_timeo = cfs_time_seconds(2);
- sock->so_rcv.sb_flags |= SB_UPCALL;
- sock->so_snd.sb_flags |= SB_UPCALL;
- CFS_NET_EX;
- return;
-}
-
-void
-ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn)
-{
- CFS_DECL_NET_DATA;
-
- CFS_NET_IN;
- ksocknal_upcall (sock, (void *)conn, 0);
- CFS_NET_EX;
-}
-
-void
-ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
-{
- CFS_DECL_NET_DATA;
-
- CFS_NET_IN;
- sock->so_rcv.sb_flags &= ~SB_UPCALL;
- sock->so_snd.sb_flags &= ~SB_UPCALL;
- sock->so_upcall = NULL;
- sock->so_upcallarg = NULL;
- CFS_NET_EX;
-}
-
-#endif /* !__DARWIN8__ */
+++ /dev/null
-#ifndef __XNU_SOCKNAL_LIB_H__
-#define __XNU_SOCKNAL_LIB_H__
-
-#include <sys/kernel.h>
-#include <sys/file.h>
-#include <sys/filedesc.h>
-#include <sys/stat.h>
-#include <sys/vnode.h>
-#include <sys/mount.h>
-#include <sys/proc.h>
-#include <sys/sysctl.h>
-#include <sys/ubc.h>
-#include <sys/uio.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/domain.h>
-#include <sys/protosw.h>
-#include <sys/namei.h>
-#include <sys/fcntl.h>
-#include <sys/lockf.h>
-#include <sys/syslog.h>
-#include <machine/spl.h>
-#include <mach/mach_types.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <stdarg.h>
-
-#include <libcfs/libcfs.h>
-
-static inline
-int ksocknal_nsched(void)
-{
- /* XXX Liang: fix it */
- return 1;
-}
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-
-#include "socklnd.h"
-
-# if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-static cfs_sysctl_table_t ksocknal_ctl_table[21];
-
-cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
- {
- .ctl_name = 200,
- .procname = "socknal",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ksocknal_ctl_table
- },
- { 0 }
-};
-
-int
-ksocknal_lib_tunables_init ()
-{
- int i = 0;
- int j = 1;
-
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "timeout",
- .data = ksocknal_tunables.ksnd_timeout,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "credits",
- .data = ksocknal_tunables.ksnd_credits,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "peer_credits",
- .data = ksocknal_tunables.ksnd_peercredits,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "nconnds",
- .data = ksocknal_tunables.ksnd_nconnds,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "min_reconnectms",
- .data = ksocknal_tunables.ksnd_min_reconnectms,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "max_reconnectms",
- .data = ksocknal_tunables.ksnd_max_reconnectms,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "eager_ack",
- .data = ksocknal_tunables.ksnd_eager_ack,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "zero_copy",
- .data = ksocknal_tunables.ksnd_zc_min_frag,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "typed",
- .data = ksocknal_tunables.ksnd_typed_conns,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "min_bulk",
- .data = ksocknal_tunables.ksnd_min_bulk,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "rx_buffer_size",
- .data = ksocknal_tunables.ksnd_rx_buffer_size,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "tx_buffer_size",
- .data = ksocknal_tunables.ksnd_tx_buffer_size,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "nagle",
- .data = ksocknal_tunables.ksnd_nagle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
-#ifdef CPU_AFFINITY
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "irq_affinity",
- .data = ksocknal_tunables.ksnd_irq_affinity,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
-#endif
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "keepalive_idle",
- .data = ksocknal_tunables.ksnd_keepalive_idle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "keepalive_count",
- .data = ksocknal_tunables.ksnd_keepalive_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "keepalive_intvl",
- .data = ksocknal_tunables.ksnd_keepalive_intvl,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
-#ifdef SOCKNAL_BACKOFF
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "backoff_init",
- .data = ksocknal_tunables.ksnd_backoff_init,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "backoff_max",
- .data = ksocknal_tunables.ksnd_backoff_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
-#endif
-#if SOCKNAL_VERSION_DEBUG
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
- .ctl_name = j++,
- .procname = "protocol",
- .data = ksocknal_tunables.ksnd_protocol,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
-#endif
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { 0 };
-
- LASSERT (j == i);
- LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
-
- ksocknal_tunables.ksnd_sysctl =
- cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
-
- if (ksocknal_tunables.ksnd_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-
- return 0;
-}
-
-void
-ksocknal_lib_tunables_fini ()
-{
- if (ksocknal_tunables.ksnd_sysctl != NULL)
- cfs_unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
-}
-#else
-int
-ksocknal_lib_tunables_init ()
-{
- return 0;
-}
-
-void
-ksocknal_lib_tunables_fini ()
-{
-}
-#endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */
-
-void
-ksocknal_lib_bind_irq (unsigned int irq)
-{
-#if (defined(CONFIG_SMP) && defined(CPU_AFFINITY))
- int bind;
- int cpu;
- char cmdline[64];
- ksock_irqinfo_t *info;
- char *argv[] = {"/bin/sh",
- "-c",
- cmdline,
- NULL};
- char *envp[] = {"HOME=/",
- "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
- NULL};
-
- LASSERT (irq < NR_IRQS);
- if (irq == 0) /* software NIC or affinity disabled */
- return;
-
- info = &ksocknal_data.ksnd_irqinfo[irq];
-
- write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
- LASSERT (info->ksni_valid);
- bind = !info->ksni_bound;
- info->ksni_bound = 1;
-
- write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
- if (!bind) /* bound already */
- return;
-
- cpu = ksocknal_irqsched2cpu(info->ksni_sched);
- snprintf (cmdline, sizeof (cmdline),
- "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq);
-
- LCONSOLE_INFO("Binding irq %u to CPU %d with cmd: %s\n",
- irq, cpu, cmdline);
-
- /* FIXME: Find a better method of setting IRQ affinity...
- */
-
- USERMODEHELPER(argv[0], argv, envp);
-#endif
-}
-
-int
-ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
-{
- int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
- &conn->ksnc_ipaddr,
- &conn->ksnc_port);
-
- /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
- LASSERT (!conn->ksnc_closing);
-
- if (rc != 0) {
- CERROR ("Error %d getting sock peer IP\n", rc);
- return rc;
- }
-
- rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
- &conn->ksnc_myipaddr, NULL);
- if (rc != 0) {
- CERROR ("Error %d getting sock local IP\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-unsigned int
-ksocknal_lib_sock_irq (struct socket *sock)
-{
- int irq = 0;
-#ifdef CPU_AFFINITY
- struct dst_entry *dst;
-
- if (!*ksocknal_tunables.ksnd_irq_affinity)
- return 0;
-
- dst = sk_dst_get (sock->sk);
- if (dst != NULL) {
- if (dst->dev != NULL) {
- irq = dst->dev->irq;
- if (irq >= NR_IRQS) {
- CERROR ("Unexpected IRQ %x\n", irq);
- irq = 0;
- }
- }
- dst_release (dst);
- }
-
-#endif
- return irq;
-}
-
-int
-ksocknal_lib_zc_capable(struct socket *sock)
-{
- int caps = sock->sk->sk_route_caps;
-
- /* ZC if the socket supports scatter/gather and doesn't need software
- * checksums */
- return ((caps & NETIF_F_SG) != 0 &&
- (caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) != 0);
-}
-
-int
-ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- struct socket *sock = conn->ksnc_sock;
- int nob;
- int rc;
-
- if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
- conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
- tx->tx_nob == tx->tx_resid && /* frist sending */
- tx->tx_msg.ksm_csum == 0) /* not checksummed */
- ksocknal_lib_csum_tx(tx);
-
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
-
- {
-#if SOCKNAL_SINGLE_FRAG_TX
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
- unsigned int niov = tx->tx_niov;
-#endif
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = MSG_DONTWAIT
- };
- mm_segment_t oldmm = get_fs();
- int i;
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = tx->tx_iov[i];
- nob += scratchiov[i].iov_len;
- }
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_MORE;
-
- set_fs (KERNEL_DS);
- rc = sock_sendmsg(sock, &msg, nob);
- set_fs (oldmm);
- }
- return rc;
-}
-
-int
-ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- struct socket *sock = conn->ksnc_sock;
- lnet_kiov_t *kiov = tx->tx_kiov;
- int rc;
- int nob;
-
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
-
- if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag &&
- tx->tx_msg.ksm_zc_req_cookie != 0) {
- /* Zero copy is enabled */
- struct sock *sk = sock->sk;
- struct page *page = kiov->kiov_page;
- int offset = kiov->kiov_offset;
- int fragsize = kiov->kiov_len;
- int msgflg = MSG_DONTWAIT;
-
- CDEBUG(D_NET, "page %p + offset %x for %d\n",
- page, offset, kiov->kiov_len);
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- fragsize < tx->tx_resid)
- msgflg |= MSG_MORE;
-
- if (sk->sk_prot->sendpage != NULL) {
- rc = sk->sk_prot->sendpage(sk, page,
- offset, fragsize, msgflg);
- } else {
- rc = tcp_sendpage(sock, page, offset, fragsize, msgflg);
- }
- } else {
-#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
- unsigned int niov = tx->tx_nkiov;
-#endif
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = MSG_DONTWAIT
- };
- mm_segment_t oldmm = get_fs();
- int i;
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
- kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_MORE;
-
- set_fs (KERNEL_DS);
- rc = sock_sendmsg(sock, &msg, nob);
- set_fs (oldmm);
-
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
- }
- return rc;
-}
-
-void
-ksocknal_lib_eager_ack (ksock_conn_t *conn)
-{
- int opt = 1;
- mm_segment_t oldmm = get_fs();
- struct socket *sock = conn->ksnc_sock;
-
- /* Remind the socket to ACK eagerly. If I don't, the socket might
- * think I'm about to send something it could piggy-back the ACK
- * on, introducing delay in completing zero-copy sends in my
- * peer. */
-
- set_fs(KERNEL_DS);
- sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK,
- (char *)&opt, sizeof (opt));
- set_fs(oldmm);
-}
-
-int
-ksocknal_lib_recv_iov (ksock_conn_t *conn)
-{
-#if SOCKNAL_SINGLE_FRAG_RX
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
- unsigned int niov = conn->ksnc_rx_niov;
-#endif
- struct iovec *iov = conn->ksnc_rx_iov;
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
- mm_segment_t oldmm = get_fs();
- int nob;
- int i;
- int rc;
- int fragnob;
- int sum;
- __u32 saved_csum;
-
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
- LASSERT (niov > 0);
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = iov[i];
- nob += scratchiov[i].iov_len;
- }
- LASSERT (nob <= conn->ksnc_rx_nob_wanted);
-
- set_fs (KERNEL_DS);
- rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
- /* NB this is just a boolean..........................^ */
- set_fs (oldmm);
-
- saved_csum = 0;
- if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
- saved_csum = conn->ksnc_msg.ksm_csum;
- conn->ksnc_msg.ksm_csum = 0;
- }
-
- if (saved_csum != 0) {
- /* accumulate checksum */
- for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
- LASSERT (i < niov);
-
- fragnob = iov[i].iov_len;
- if (fragnob > sum)
- fragnob = sum;
-
- conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
- iov[i].iov_base, fragnob);
- }
- conn->ksnc_msg.ksm_csum = saved_csum;
- }
-
- return rc;
-}
-
-int
-ksocknal_lib_recv_kiov (ksock_conn_t *conn)
-{
-#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
- unsigned int niov = conn->ksnc_rx_nkiov;
-#endif
- lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
- mm_segment_t oldmm = get_fs();
- int nob;
- int i;
- int rc;
- void *base;
- int sum;
- int fragnob;
-
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
- LASSERT (nob <= conn->ksnc_rx_nob_wanted);
-
- set_fs (KERNEL_DS);
- rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
- /* NB this is just a boolean.......................^ */
- set_fs (oldmm);
-
- if (conn->ksnc_msg.ksm_csum != 0) {
- for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
- LASSERT (i < niov);
-
- /* Dang! have to kmap again because I have nowhere to stash the
- * mapped address. But by doing it while the page is still
- * mapped, the kernel just bumps the map count and returns me
- * the address it stashed. */
- base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
- fragnob = kiov[i].kiov_len;
- if (fragnob > sum)
- fragnob = sum;
-
- conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
- base, fragnob);
-
- kunmap(kiov[i].kiov_page);
- }
- }
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
-
- return (rc);
-}
-
-void
-ksocknal_lib_csum_tx(ksock_tx_t *tx)
-{
- int i;
- __u32 csum;
- void *base;
-
- LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg);
- LASSERT(tx->tx_conn != NULL);
- LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
-
- tx->tx_msg.ksm_csum = 0;
-
- csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base,
- tx->tx_iov[0].iov_len);
-
- if (tx->tx_kiov != NULL) {
- for (i = 0; i < tx->tx_nkiov; i++) {
- base = kmap(tx->tx_kiov[i].kiov_page) +
- tx->tx_kiov[i].kiov_offset;
-
- csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
-
- kunmap(tx->tx_kiov[i].kiov_page);
- }
- } else {
- for (i = 1; i < tx->tx_niov; i++)
- csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base,
- tx->tx_iov[i].iov_len);
- }
-
- if (*ksocknal_tunables.ksnd_inject_csum_error) {
- csum++;
- *ksocknal_tunables.ksnd_inject_csum_error = 0;
- }
-
- tx->tx_msg.ksm_csum = csum;
-}
-
-int
-ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
-{
- mm_segment_t oldmm = get_fs ();
- struct socket *sock = conn->ksnc_sock;
- int len;
- int rc;
-
- rc = ksocknal_connsock_addref(conn);
- if (rc != 0) {
- LASSERT (conn->ksnc_closing);
- *txmem = *rxmem = *nagle = 0;
- return (-ESHUTDOWN);
- }
-
- rc = libcfs_sock_getbuf(sock, txmem, rxmem);
- if (rc == 0) {
- len = sizeof(*nagle);
- set_fs(KERNEL_DS);
- rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char *)nagle, &len);
- set_fs(oldmm);
- }
-
- ksocknal_connsock_decref(conn);
-
- if (rc == 0)
- *nagle = !*nagle;
- else
- *txmem = *rxmem = *nagle = 0;
-
- return (rc);
-}
-
-int
-ksocknal_lib_setup_sock (struct socket *sock)
-{
- mm_segment_t oldmm = get_fs ();
- int rc;
- int option;
- int keep_idle;
- int keep_intvl;
- int keep_count;
- int do_keepalive;
- struct linger linger;
-
- sock->sk->sk_allocation = GFP_NOFS;
-
- /* Ensure this socket aborts active sends immediately when we close
- * it. */
-
- linger.l_onoff = 0;
- linger.l_linger = 0;
-
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER,
- (char *)&linger, sizeof (linger));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set SO_LINGER: %d\n", rc);
- return (rc);
- }
-
- option = -1;
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set SO_LINGER2: %d\n", rc);
- return (rc);
- }
-
- if (!*ksocknal_tunables.ksnd_nagle) {
- option = 1;
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't disable nagle: %d\n", rc);
- return (rc);
- }
- }
-
- rc = libcfs_sock_setbuf(sock,
- *ksocknal_tunables.ksnd_tx_buffer_size,
- *ksocknal_tunables.ksnd_rx_buffer_size);
- if (rc != 0) {
- CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
- *ksocknal_tunables.ksnd_tx_buffer_size,
- *ksocknal_tunables.ksnd_rx_buffer_size, rc);
- return (rc);
- }
-
-/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */
-#ifdef SOCKNAL_BACKOFF
- if (*ksocknal_tunables.ksnd_backoff_init > 0) {
- option = *ksocknal_tunables.ksnd_backoff_init;
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_BACKOFF_INIT,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set initial tcp backoff %d: %d\n",
- option, rc);
- return (rc);
- }
- }
-
- if (*ksocknal_tunables.ksnd_backoff_max > 0) {
- option = *ksocknal_tunables.ksnd_backoff_max;
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_BACKOFF_MAX,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set maximum tcp backoff %d: %d\n",
- option, rc);
- return (rc);
- }
- }
-#endif
-
- /* snapshot tunables */
- keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
- keep_count = *ksocknal_tunables.ksnd_keepalive_count;
- keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
-
- do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
-
- option = (do_keepalive ? 1 : 0);
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
- return (rc);
- }
-
- if (!do_keepalive)
- return (0);
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE,
- (char *)&keep_idle, sizeof (keep_idle));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc);
- return (rc);
- }
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL,
- (char *)&keep_intvl, sizeof (keep_intvl));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc);
- return (rc);
- }
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT,
- (char *)&keep_count, sizeof (keep_count));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set TCP_KEEPCNT: %d\n", rc);
- return (rc);
- }
-
- return (0);
-}
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
- return &(sk->tp_pinfo.af_tcp);
-}
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
-#define sock2tcp_opt(sk) tcp_sk(sk)
-#else
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
- struct tcp_sock *s = (struct tcp_sock *)sk;
- return &s->tcp;
-}
-#endif
-
-void
-ksocknal_lib_push_conn (ksock_conn_t *conn)
-{
- struct sock *sk;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11))
- struct tcp_opt *tp;
-#else
- struct tcp_sock *tp;
-#endif
- int nonagle;
- int val = 1;
- int rc;
- mm_segment_t oldmm;
-
- rc = ksocknal_connsock_addref(conn);
- if (rc != 0) /* being shut down */
- return;
-
- sk = conn->ksnc_sock->sk;
- tp = sock2tcp_opt(sk);
-
- lock_sock (sk);
- nonagle = tp->nonagle;
- tp->nonagle = 1;
- release_sock (sk);
-
- oldmm = get_fs ();
- set_fs (KERNEL_DS);
-
- rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY,
- (char *)&val, sizeof (val));
- LASSERT (rc == 0);
-
- set_fs (oldmm);
-
- lock_sock (sk);
- tp->nonagle = nonagle;
- release_sock (sk);
-
- ksocknal_connsock_decref(conn);
-}
-
-extern void ksocknal_read_callback (ksock_conn_t *conn);
-extern void ksocknal_write_callback (ksock_conn_t *conn);
-/*
- * socket call back in Linux
- */
-static void
-ksocknal_data_ready (struct sock *sk, int n)
-{
- ksock_conn_t *conn;
- ENTRY;
-
- /* interleave correctly with closing sockets... */
- LASSERT(!in_irq());
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- conn = sk->sk_user_data;
- if (conn == NULL) { /* raced with ksocknal_terminate_conn */
- LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
- sk->sk_data_ready (sk, n);
- } else
- ksocknal_read_callback(conn);
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- EXIT;
-}
-
-static void
-ksocknal_write_space (struct sock *sk)
-{
- ksock_conn_t *conn;
- int wspace;
- int min_wpace;
-
- /* interleave correctly with closing sockets... */
- LASSERT(!in_irq());
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- conn = sk->sk_user_data;
- wspace = SOCKNAL_WSPACE(sk);
- min_wpace = SOCKNAL_MIN_WSPACE(sk);
-
- CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
- sk, wspace, min_wpace, conn,
- (conn == NULL) ? "" : (conn->ksnc_tx_ready ?
- " ready" : " blocked"),
- (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
- " scheduled" : " idle"),
- (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
- " empty" : " queued"));
-
- if (conn == NULL) { /* raced with ksocknal_terminate_conn */
- LASSERT (sk->sk_write_space != &ksocknal_write_space);
- sk->sk_write_space (sk);
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return;
- }
-
- if (wspace >= min_wpace) { /* got enough space */
- ksocknal_write_callback(conn);
-
- /* Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the
- * ENOMEM check in ksocknal_transmit is race-free (think about
- * it). */
-
- clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
-}
-
-void
-ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
-{
- conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
- conn->ksnc_saved_write_space = sock->sk->sk_write_space;
-}
-
-void
-ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
-{
- sock->sk->sk_user_data = conn;
- sock->sk->sk_data_ready = ksocknal_data_ready;
- sock->sk->sk_write_space = ksocknal_write_space;
- return;
-}
-
-void
-ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
-{
- /* Remove conn's network callbacks.
- * NB I _have_ to restore the callback, rather than storing a noop,
- * since the socket could survive past this module being unloaded!! */
- sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
- sock->sk->sk_write_space = conn->ksnc_saved_write_space;
-
- /* A callback could be in progress already; they hold a read lock
- * on ksnd_global_lock (to serialise with me) and NOOP if
- * sk_user_data is NULL. */
- sock->sk->sk_user_data = NULL;
-
- return ;
-}
-
+++ /dev/null
-#define DEBUG_PORTAL_ALLOC
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#ifndef __LINUX_SOCKNAL_LIB_H__
-#define __LINUX_SOCKNAL_LIB_H__
-
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/version.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <net/tcp.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/irq.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <asm/uaccess.h>
-#include <asm/div64.h>
-
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-# include <linux/syscalls.h>
-#endif
-
-#include <libcfs/kp30.h>
-#include <libcfs/linux/portals_compat25.h>
-
-#include <linux/crc32.h>
-static inline __u32 ksocknal_csum(__u32 crc, unsigned char const *p, size_t len)
-{
-#if 1
- return crc32_le(crc, p, len);
-#else
- while (len-- > 0)
- crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ;
- return crc;
-#endif
-}
-
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,7))
-# define SOCKNAL_WSPACE(sk) sk_stream_wspace(sk)
-# define SOCKNAL_MIN_WSPACE(sk) sk_stream_min_wspace(sk)
-#else
-# define SOCKNAL_WSPACE(sk) tcp_wspace(sk)
-# define SOCKNAL_MIN_WSPACE(sk) (((sk)->sk_sndbuf*8)/10)
-#endif
-
-#ifndef CONFIG_SMP
-static inline
-int ksocknal_nsched(void)
-{
- return 1;
-}
-#else
-#include <linux/lustre_version.h>
-# if !(defined(CONFIG_X86) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21))) || defined(CONFIG_X86_64) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) && !defined(CONFIG_X86_HT))
-static inline int
-ksocknal_nsched(void)
-{
- return num_online_cpus();
-}
-
-static inline int
-ksocknal_sched2cpu(int i)
-{
- return i;
-}
-
-static inline int
-ksocknal_irqsched2cpu(int i)
-{
- return i;
-}
-# else
-static inline int
-ksocknal_nsched(void)
-{
- if (smp_num_siblings == 1)
- return (num_online_cpus());
-
- /* We need to know if this assumption is crap */
- LASSERT (smp_num_siblings == 2);
- return (num_online_cpus()/2);
-}
-
-static inline int
-ksocknal_sched2cpu(int i)
-{
- if (smp_num_siblings == 1)
- return i;
-
- return (i * 2);
-}
-
-static inline int
-ksocknal_irqsched2cpu(int i)
-{
- return (ksocknal_sched2cpu(i) + 1);
-}
-# endif
-#endif
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2006 Cluster File Systems, Inc, All rights reserved.
- * Author: Matt Wu
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * This Lustre Software is proprietary - please refer to the license
- * agreement you received with your software.
- *
- * windows socknal library
- *
- */
-
-#include "socklnd.h"
-
-# if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
-static ctl_table ksocknal_ctl_table[18];
-
-ctl_table ksocknal_top_ctl_table[] = {
- {200, "socknal", NULL, 0, 0555, ksocknal_ctl_table},
- { 0 }
-};
-
-int
-ksocknal_lib_tunables_init ()
-{
- int i = 0;
- int j = 1;
-
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "timeout", ksocknal_tunables.ksnd_timeout,
- sizeof (int), 0644, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "credits", ksocknal_tunables.ksnd_credits,
- sizeof (int), 0444, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "peer_credits", ksocknal_tunables.ksnd_peercredits,
- sizeof (int), 0444, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "nconnds", ksocknal_tunables.ksnd_nconnds,
- sizeof (int), 0444, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "min_reconnectms", ksocknal_tunables.ksnd_min_reconnectms,
- sizeof (int), 0444, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "max_reconnectms", ksocknal_tunables.ksnd_max_reconnectms,
- sizeof (int), 0444, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "eager_ack", ksocknal_tunables.ksnd_eager_ack,
- sizeof (int), 0644, NULL, &proc_dointvec};
-#if SOCKNAL_ZC
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "zero_copy", ksocknal_tunables.ksnd_zc_min_frag,
- sizeof (int), 0644, NULL, &proc_dointvec};
-#endif
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "typed", ksocknal_tunables.ksnd_typed_conns,
- sizeof (int), 0444, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "min_bulk", ksocknal_tunables.ksnd_min_bulk,
- sizeof (int), 0644, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "buffer_size", ksocknal_tunables.ksnd_buffer_size,
- sizeof(int), 0644, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "nagle", ksocknal_tunables.ksnd_nagle,
- sizeof(int), 0644, NULL, &proc_dointvec};
-#ifdef CPU_AFFINITY
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "irq_affinity", ksocknal_tunables.ksnd_irq_affinity,
- sizeof(int), 0644, NULL, &proc_dointvec};
-#endif
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "keepalive_idle", ksocknal_tunables.ksnd_keepalive_idle,
- sizeof(int), 0644, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "keepalive_count", ksocknal_tunables.ksnd_keepalive_count,
- sizeof(int), 0644, NULL, &proc_dointvec};
- ksocknal_ctl_table[i++] = (ctl_table)
- {j++, "keepalive_intvl", ksocknal_tunables.ksnd_keepalive_intvl,
- sizeof(int), 0644, NULL, &proc_dointvec};
-
- LASSERT (j == i+1);
- LASSERT (i < sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
-
- ksocknal_tunables.ksnd_sysctl =
- register_sysctl_table(ksocknal_top_ctl_table, 0);
-
- if (ksocknal_tunables.ksnd_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-
- return 0;
-}
-
-void
-ksocknal_lib_tunables_fini ()
-{
- if (ksocknal_tunables.ksnd_sysctl != NULL)
- unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
-}
-#else
-int
-ksocknal_lib_tunables_init ()
-{
- return 0;
-}
-
-void
-ksocknal_lib_tunables_fini ()
-{
-}
-#endif
-
-void
-ksocknal_lib_bind_irq (unsigned int irq)
-{
-}
-
-int
-ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
-{
- int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
- &conn->ksnc_ipaddr, &conn->ksnc_port);
-
- /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
- LASSERT (!conn->ksnc_closing);
-
- if (rc != 0) {
- CERROR ("Error %d getting sock peer IP\n", rc);
- return rc;
- }
-
- rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
- &conn->ksnc_myipaddr, NULL);
- if (rc != 0) {
- CERROR ("Error %d getting sock local IP\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-unsigned int
-ksocknal_lib_sock_irq (struct socket *sock)
-{
- return 0;
-}
-
-#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
-static struct page *
-ksocknal_kvaddr_to_page (unsigned long vaddr)
-{
- struct page *page;
-
- if (vaddr >= VMALLOC_START &&
- vaddr < VMALLOC_END)
- page = vmalloc_to_page ((void *)vaddr);
-#ifdef CONFIG_HIGHMEM
- else if (vaddr >= PKMAP_BASE &&
- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
- page = vmalloc_to_page ((void *)vaddr);
- /* in 2.4 ^ just walks the page tables */
-#endif
- else
- page = virt_to_page (vaddr);
-
- if (page == NULL ||
- !VALID_PAGE (page))
- return (NULL);
-
- return (page);
-}
-#endif
-
-/*
- * ks_lock_iovs
- * Lock the i/o vector buffers into MDL structure
- *
- * Arguments:
- * iov: the array of i/o vectors
- * niov: number of i/o vectors to be locked
- * len: the real length of the iov vectors
- *
- * Return Value:
- * ksock_mdl_t *: the Mdl of the locked buffers or
- * NULL pointer in failure case
- *
- * Notes:
- * N/A
- */
-
-ksock_mdl_t *
-ks_lock_iovs(
- IN struct iovec *iov,
- IN int niov,
- IN int recving,
- IN int * len )
-{
- int rc = 0;
-
- int i = 0;
- int total = 0;
- ksock_mdl_t * mdl = NULL;
- ksock_mdl_t * tail = NULL;
-
- LASSERT(iov != NULL);
- LASSERT(niov > 0);
- LASSERT(len != NULL);
-
- for (i=0; i < niov; i++) {
-
- ksock_mdl_t * Iovec = NULL;
-
- rc = ks_lock_buffer(
- iov[i].iov_base,
- FALSE,
- iov[i].iov_len,
- recving ? IoWriteAccess : IoReadAccess,
- &Iovec );
-
- if (rc < 0) {
- break;
- }
-
- if (tail) {
- tail->Next = Iovec;
- } else {
- mdl = Iovec;
- }
-
- tail = Iovec;
-
- total +=iov[i].iov_len;
- }
-
- if (rc >= 0) {
- *len = total;
- } else {
- if (mdl) {
- ks_release_mdl(mdl, FALSE);
- mdl = NULL;
- }
- }
-
- return mdl;
-}
-
-/*
- * ks_lock_kiovs
- * Lock the kiov pages into MDL structure
- *
- * Arguments:
- * kiov: the array of kiov pages
- * niov: number of kiov to be locked
- * len: the real length of the kiov arrary
- *
- * Return Value:
- * PMDL: the Mdl of the locked buffers or NULL
- * pointer in failure case
- *
- * Notes:
- * N/A
- */
-ksock_mdl_t *
-ks_lock_kiovs(
- IN lnet_kiov_t * kiov,
- IN int nkiov,
- IN int recving,
- IN int * len )
-{
- int rc = 0;
- int i = 0;
- int total = 0;
- ksock_mdl_t * mdl = NULL;
- ksock_mdl_t * tail = NULL;
-
- LASSERT(kiov != NULL);
- LASSERT(nkiov > 0);
- LASSERT(len != NULL);
-
- for (i=0; i < nkiov; i++) {
-
- ksock_mdl_t * Iovec = NULL;
-
-
- //
- // Lock the kiov page into Iovec ¡Â
- //
-
- rc = ks_lock_buffer(
- (PUCHAR)kiov[i].kiov_page->addr +
- kiov[i].kiov_offset,
- FALSE,
- kiov[i].kiov_len,
- recving ? IoWriteAccess : IoReadAccess,
- &Iovec
- );
-
- if (rc < 0) {
- break;
- }
-
- //
- // Attach the Iovec to the mdl chain
- //
-
- if (tail) {
- tail->Next = Iovec;
- } else {
- mdl = Iovec;
- }
-
- tail = Iovec;
-
- total += kiov[i].kiov_len;
-
- }
-
- if (rc >= 0) {
- *len = total;
- } else {
- if (mdl) {
- ks_release_mdl(mdl, FALSE);
- mdl = NULL;
- }
- }
-
- return mdl;
-}
-
-
-int
-ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- struct socket *sock = conn->ksnc_sock;
-#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
- unsigned long vaddr = (unsigned long)iov->iov_base
- int offset = vaddr & (PAGE_SIZE - 1);
- int zcsize = MIN (iov->iov_len, PAGE_SIZE - offset);
- struct page *page;
-#endif
- int nob;
- int rc;
- ksock_mdl_t * mdl;
-
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
-
-#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
- if (zcsize >= ksocknal_data.ksnd_zc_min_frag &&
- (sock->sk->sk_route_caps & NETIF_F_SG) &&
- (sock->sk->sk_route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
- (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) {
- int msgflg = MSG_DONTWAIT;
-
- CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n",
- (void *)vaddr, page, page_address(page), offset, zcsize);
-
- if (!list_empty (&conn->ksnc_tx_queue) ||
- zcsize < tx->tx_resid)
- msgflg |= MSG_MORE;
-
- rc = tcp_sendpage_zccd(sock, page, offset, zcsize, msgflg, &tx->tx_zccd);
- } else
-#endif
- {
- /* lock the whole tx iovs into a single mdl chain */
- mdl = ks_lock_iovs(tx->tx_iov, tx->tx_niov, FALSE, &nob);
-
- if (mdl) {
- /* send the total mdl chain */
- rc = ks_send_mdl( conn->ksnc_sock, tx, mdl, nob,
- (!list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ?
- (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT);
- } else {
- rc = -ENOMEM;
- }
- }
-
- return rc;
-}
-
-int
-ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- struct socket *sock = conn->ksnc_sock;
- lnet_kiov_t *kiov = tx->tx_kiov;
- int rc;
- int nob;
- ksock_mdl_t * mdl;
-
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
-
-#if SOCKNAL_ZC
- if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag &&
- (sock->sk->sk_route_caps & NETIF_F_SG) &&
- (sock->sk->sk_route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) {
- struct page *page = kiov->kiov_page;
- int offset = kiov->kiov_offset;
- int fragsize = kiov->kiov_len;
- int msgflg = MSG_DONTWAIT;
-
- CDEBUG(D_NET, "page %p + offset %x for %d\n",
- page, offset, kiov->kiov_len);
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- fragsize < tx->tx_resid)
- msgflg |= MSG_MORE;
-
- rc = tcp_sendpage_zccd(sock, page, offset, fragsize, msgflg,
- &tx->tx_zccd);
- } else
-#endif
- {
- /* lock the whole tx kiovs into a single mdl chain */
- mdl = ks_lock_kiovs(tx->tx_kiov, tx->tx_nkiov, FALSE, &nob);
-
- if (mdl) {
- /* send the total mdl chain */
- rc = ks_send_mdl(
- conn->ksnc_sock, tx, mdl, nob,
- (!list_empty(&conn->ksnc_tx_queue) || nob < tx->tx_resid) ?
- (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT);
- } else {
- rc = -ENOMEM;
- }
- }
-
- return rc;
-}
-
-
-int
-ksocknal_lib_recv_iov (ksock_conn_t *conn)
-{
- struct iovec *iov = conn->ksnc_rx_iov;
- int rc;
- int size;
- ksock_mdl_t * mdl;
-
- /* lock the whole tx iovs into a single mdl chain */
- mdl = ks_lock_iovs(iov, conn->ksnc_rx_niov, TRUE, &size);
-
- if (!mdl) {
- return (-ENOMEM);
- }
-
- LASSERT (size <= conn->ksnc_rx_nob_wanted);
-
- /* try to request data for the whole mdl chain */
- rc = ks_recv_mdl (conn->ksnc_sock, mdl, size, MSG_DONTWAIT);
-
- return rc;
-}
-
-int
-ksocknal_lib_recv_kiov (ksock_conn_t *conn)
-{
- lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
- int size;
- int rc;
- ksock_mdl_t * mdl;
-
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone, so we only receive 1 frag at a time. */
- LASSERT (conn->ksnc_rx_nkiov > 0);
-
- /* lock the whole tx kiovs into a single mdl chain */
- mdl = ks_lock_kiovs(kiov, conn->ksnc_rx_nkiov, TRUE, &size);
-
- if (!mdl) {
- rc = -ENOMEM;
- return (rc);
- }
-
- LASSERT (size <= conn->ksnc_rx_nob_wanted);
-
- /* try to request data for the whole mdl chain */
- rc = ks_recv_mdl (conn->ksnc_sock, mdl, size, MSG_DONTWAIT);
-
- return rc;
-}
-
-void
-ksocknal_lib_eager_ack (ksock_conn_t *conn)
-{
- __u32 option = 1;
- int rc = 0;
-
- rc = ks_set_tcp_option(
- conn->ksnc_sock, TCP_SOCKET_NODELAY,
- &option, sizeof(option) );
- if (rc != 0) {
- CERROR("Can't disable nagle: %d\n", rc);
- }
-}
-
-int
-ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
-{
- ksock_tconn_t * tconn = conn->ksnc_sock;
- int len;
- int rc;
-
- ks_get_tconn (tconn);
-
- *txmem = *rxmem = 0;
-
- len = sizeof(*nagle);
-
- rc = ks_get_tcp_option(
- tconn, TCP_SOCKET_NODELAY,
- (__u32 *)nagle, &len);
-
- ks_put_tconn (tconn);
-
- printk("ksocknal_get_conn_tunables: nodelay = %d rc = %d\n", *nagle, rc);
-
- if (rc == 0)
- *nagle = !*nagle;
- else
- *txmem = *rxmem = *nagle = 0;
-
- return (rc);
-}
-
-int
-ksocknal_lib_buffersize (int current_sz, int tunable_sz)
-{
- /* ensure >= SOCKNAL_MIN_BUFFER */
- if (current_sz < SOCKNAL_MIN_BUFFER)
- return MAX(SOCKNAL_MIN_BUFFER, tunable_sz);
-
- if (tunable_sz > SOCKNAL_MIN_BUFFER)
- return tunable_sz;
-
- /* leave alone */
- return 0;
-}
-
-int
-ksocknal_lib_setup_sock (struct socket *sock)
-{
- int rc;
-
- int keep_idle;
- int keep_count;
- int keep_intvl;
- int keep_alive;
-
- __u32 option;
-
- /* set the window size */
-
-#if 0
- tconn->kstc_snd_wnd = ksocknal_tunables.ksnd_buffer_size;
- tconn->kstc_rcv_wnd = ksocknal_tunables.ksnd_buffer_size;
-#endif
-
- /* disable nagle */
- if (!ksocknal_tunables.ksnd_nagle) {
- option = 1;
-
- rc = ks_set_tcp_option(
- sock, TCP_SOCKET_NODELAY,
- &option, sizeof (option));
- if (rc != 0) {
- printk ("Can't disable nagle: %d\n", rc);
- return (rc);
- }
- }
-
- /* snapshot tunables */
- keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
- keep_count = *ksocknal_tunables.ksnd_keepalive_count;
- keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
-
- keep_alive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
-
- option = (__u32)(keep_alive ? 1 : 0);
-
- rc = ks_set_tcp_option(
- sock, TCP_SOCKET_KEEPALIVE,
- &option, sizeof (option));
- if (rc != 0) {
- CERROR ("Can't disable nagle: %d\n", rc);
- return (rc);
- }
-
- return (0);
-}
-
-void
-ksocknal_lib_push_conn (ksock_conn_t *conn)
-{
- ksock_tconn_t * tconn;
- __u32 nagle;
- __u32 val = 1;
- int rc;
-
- tconn = conn->ksnc_sock;
-
- ks_get_tconn(tconn);
-
- spin_lock(&tconn->kstc_lock);
- if (tconn->kstc_type == kstt_sender) {
- nagle = tconn->sender.kstc_info.nagle;
- tconn->sender.kstc_info.nagle = 0;
- } else {
- LASSERT(tconn->kstc_type == kstt_child);
- nagle = tconn->child.kstc_info.nagle;
- tconn->child.kstc_info.nagle = 0;
- }
-
- spin_unlock(&tconn->kstc_lock);
-
- val = 1;
- rc = ks_set_tcp_option(
- tconn,
- TCP_SOCKET_NODELAY,
- &(val),
- sizeof(__u32)
- );
-
- LASSERT (rc == 0);
- spin_lock(&tconn->kstc_lock);
-
- if (tconn->kstc_type == kstt_sender) {
- tconn->sender.kstc_info.nagle = nagle;
- } else {
- LASSERT(tconn->kstc_type == kstt_child);
- tconn->child.kstc_info.nagle = nagle;
- }
- spin_unlock(&tconn->kstc_lock);
-
- ks_put_tconn(tconn);
-}
-
-/* @mode: 0: receiving mode / 1: sending mode */
-void
-ksocknal_sched_conn (ksock_conn_t *conn, int mode, ksock_tx_t *tx)
-{
- int flags;
- ksock_sched_t * sched;
- ENTRY;
-
- /* interleave correctly with closing sockets... */
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_irqsave (&sched->kss_lock, flags);
-
- if (mode) { /* transmission can continue ... */
-
-#error "This is out of date - we should be calling ksocknal_write_callback()"
- conn->ksnc_tx_ready = 1;
-
- if (tx) {
- /* Incomplete send: place tx on HEAD of tx_queue */
- list_add (&tx->tx_list, &conn->ksnc_tx_queue);
- }
-
- if ( !conn->ksnc_tx_scheduled &&
- !list_empty(&conn->ksnc_tx_queue)) { //packets to send
- list_add_tail (&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- /* extra ref for scheduler */
- atomic_inc (&conn->ksnc_conn_refcount);
-
- cfs_waitq_signal (&sched->kss_waitq);
- }
- } else { /* receiving can continue ... */
-
- conn->ksnc_rx_ready = 1;
-
- if ( !conn->ksnc_rx_scheduled) { /* not being progressed */
- list_add_tail(&conn->ksnc_rx_list,
- &sched->kss_rx_conns);
- conn->ksnc_rx_scheduled = 1;
- /* extra ref for scheduler */
- atomic_inc (&conn->ksnc_conn_refcount);
-
- cfs_waitq_signal (&sched->kss_waitq);
- }
- }
-
- spin_unlock_irqrestore (&sched->kss_lock, flags);
- read_unlock (&ksocknal_data.ksnd_global_lock);
-
- EXIT;
-}
-
-void ksocknal_schedule_callback(struct socket*sock, int mode, void * tx, ulong_ptr bytes)
-{
- ksock_conn_t * conn = (ksock_conn_t *) sock->kstc_conn;
-
- if (mode) {
- ksocknal_sched_conn(conn, mode, tx);
- } else {
- if ( CAN_BE_SCHED(bytes, (ulong_ptr)conn->ksnc_rx_nob_wanted )) {
- ksocknal_sched_conn(conn, mode, tx);
- }
- }
-}
-
-extern void
-ksocknal_tx_launched (ksock_tx_t *tx);
-
-void
-ksocknal_fini_sending(ksock_tcpx_fini_t *tcpx)
-{
- ksocknal_tx_launched(tcpx->tx);
- cfs_free(tcpx);
-}
-
-void *
-ksocknal_update_tx(
- struct socket* tconn,
- void * txp,
- ulong_ptr rc
- )
-{
- ksock_tx_t * tx = (ksock_tx_t *)txp;
-
- /*
- * the transmission was done, we need update the tx
- */
-
- LASSERT(tx->tx_resid >= (int)rc);
- tx->tx_resid -= (int)rc;
-
- /*
- * just partial of tx is sent out, we need update
- * the fields of tx and schedule later transmission.
- */
-
- if (tx->tx_resid) {
-
- if (tx->tx_niov > 0) {
-
- /* if there's iov, we need process iov first */
- while (rc > 0 ) {
- if (rc < tx->tx_iov->iov_len) {
- /* didn't send whole iov entry... */
- tx->tx_iov->iov_base =
- (char *)(tx->tx_iov->iov_base) + rc;
- tx->tx_iov->iov_len -= rc;
- rc = 0;
- } else {
- /* the whole of iov was sent out */
- rc -= tx->tx_iov->iov_len;
- tx->tx_iov++;
- tx->tx_niov--;
- }
- }
-
- } else {
-
- /* now we need process the kiov queues ... */
-
- while (rc > 0 ) {
-
- if (rc < tx->tx_kiov->kiov_len) {
- /* didn't send whole kiov entry... */
- tx->tx_kiov->kiov_offset += rc;
- tx->tx_kiov->kiov_len -= rc;
- rc = 0;
- } else {
- /* whole kiov was sent out */
- rc -= tx->tx_kiov->kiov_len;
- tx->tx_kiov++;
- tx->tx_nkiov--;
- }
- }
- }
-
- } else {
-
- ksock_tcpx_fini_t * tcpx =
- cfs_alloc(sizeof(ksock_tcpx_fini_t), CFS_ALLOC_ZERO);
-
- ASSERT(tx->tx_resid == 0);
-
- if (!tcpx) {
-
- ksocknal_tx_launched (tx);
-
- } else {
-
- tcpx->tx = tx;
- ExInitializeWorkItem(
- &(tcpx->item),
- ksocknal_fini_sending,
- tcpx
- );
- ExQueueWorkItem(
- &(tcpx->item),
- CriticalWorkQueue
- );
- }
-
- tx = NULL;
- }
-
- return (void *)tx;
-}
-
-void
-ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
-{
-}
-
-void
-ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
-{
- sock->kstc_conn = conn;
- sock->kstc_sched_cb = ksocknal_schedule_callback;
- sock->kstc_update_tx = ksocknal_update_tx;
-}
-
-void
-ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
-{
- sock->kstc_conn = NULL;
- sock->kstc_sched_cb = NULL;
- sock->kstc_update_tx = NULL;
-}
-
+++ /dev/null
-#define DEBUG_PORTAL_ALLOC
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#ifndef __WINNT_TDILND_LIB_H__
-#define __WINNT_TDILND_LIB_H__
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-#ifndef CONFIG_SMP
-
-static inline
-int ksocknal_nsched(void)
-{
- return 1;
-}
-
-#else
-
-static inline int
-ksocknal_nsched(void)
-{
- return num_online_cpus();
-}
-
-static inline int
-ksocknal_sched2cpu(int i)
-{
- return i;
-}
-
-static inline int
-ksocknal_irqsched2cpu(int i)
-{
- return i;
-}
-
-#endif
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "socklnd.h"
-
-static int sock_timeout = 50;
-CFS_MODULE_PARM(sock_timeout, "i", int, 0644,
- "dead socket timeout (seconds)");
-
-static int credits = 256;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "# concurrent sends");
-
-static int peer_credits = 8;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
- "# concurrent sends to 1 peer");
-
-static int nconnds = 4;
-CFS_MODULE_PARM(nconnds, "i", int, 0444,
- "# connection daemons");
-
-static int min_reconnectms = 1000;
-CFS_MODULE_PARM(min_reconnectms, "i", int, 0644,
- "min connection retry interval (mS)");
-
-static int max_reconnectms = 60000;
-CFS_MODULE_PARM(max_reconnectms, "i", int, 0644,
- "max connection retry interval (mS)");
-
-#if defined(__APPLE__) && !defined(__DARWIN8__)
-# define DEFAULT_EAGER_ACK 1
-#else
-# define DEFAULT_EAGER_ACK 0
-#endif
-static int eager_ack = DEFAULT_EAGER_ACK;
-CFS_MODULE_PARM(eager_ack, "i", int, 0644,
- "send tcp ack packets eagerly");
-
-static int typed_conns = 1;
-CFS_MODULE_PARM(typed_conns, "i", int, 0444,
- "use different sockets for bulk");
-
-static int min_bulk = (1<<10);
-CFS_MODULE_PARM(min_bulk, "i", int, 0644,
- "smallest 'large' message");
-
-#ifdef __APPLE__
-# ifdef __DARWIN8__
-# define DEFAULT_BUFFER_SIZE (224*1024)
-# else
-# define DEFAULT_BUFFER_SIZE (1152 * 1024)
-# endif
-#else
-# define DEFAULT_BUFFER_SIZE 0
-#endif
-static int tx_buffer_size = DEFAULT_BUFFER_SIZE;
-CFS_MODULE_PARM(tx_buffer_size, "i", int, 0644,
- "socket tx buffer size (0 for system default)");
-
-static int rx_buffer_size = DEFAULT_BUFFER_SIZE;
-CFS_MODULE_PARM(rx_buffer_size, "i", int, 0644,
- "socket rx buffer size (0 for system default)");
-
-static int nagle = 0;
-CFS_MODULE_PARM(nagle, "i", int, 0644,
- "enable NAGLE?");
-
-static int keepalive_idle = 30;
-CFS_MODULE_PARM(keepalive_idle, "i", int, 0644,
- "# idle seconds before probe");
-
-#ifdef HAVE_BGL_SUPPORT
-#define DEFAULT_KEEPALIVE_COUNT 100
-#else
-#define DEFAULT_KEEPALIVE_COUNT 5
-#endif
-static int keepalive_count = DEFAULT_KEEPALIVE_COUNT;
-CFS_MODULE_PARM(keepalive_count, "i", int, 0644,
- "# missed probes == dead");
-
-static int keepalive_intvl = 5;
-CFS_MODULE_PARM(keepalive_intvl, "i", int, 0644,
- "seconds between probes");
-
-static int enable_csum = 0;
-CFS_MODULE_PARM(enable_csum, "i", int, 0644,
- "enable check sum");
-
-static int inject_csum_error = 0;
-CFS_MODULE_PARM(inject_csum_error, "i", int, 0644,
- "set non-zero to inject a checksum error");
-#ifdef CPU_AFFINITY
-static int enable_irq_affinity = 0;
-CFS_MODULE_PARM(enable_irq_affinity, "i", int, 0644,
- "enable IRQ affinity");
-#endif
-
-static unsigned int zc_min_frag = (2<<10);
-CFS_MODULE_PARM(zc_min_frag, "i", int, 0644,
- "minimum fragment to zero copy");
-
-#ifdef SOCKNAL_BACKOFF
-static int backoff_init = 3;
-CFS_MODULE_PARM(backoff_init, "i", int, 0644,
- "seconds for initial tcp backoff");
-
-static int backoff_max = 3;
-CFS_MODULE_PARM(backoff_max, "i", int, 0644,
- "seconds for maximum tcp backoff");
-#endif
-
-#if SOCKNAL_VERSION_DEBUG
-static int protocol = 2;
-CFS_MODULE_PARM(protocol, "i", int, 0644,
- "protocol version");
-#endif
-
-ksock_tunables_t ksocknal_tunables = {
- .ksnd_timeout = &sock_timeout,
- .ksnd_credits = &credits,
- .ksnd_peercredits = &peer_credits,
- .ksnd_nconnds = &nconnds,
- .ksnd_min_reconnectms = &min_reconnectms,
- .ksnd_max_reconnectms = &max_reconnectms,
- .ksnd_eager_ack = &eager_ack,
- .ksnd_typed_conns = &typed_conns,
- .ksnd_min_bulk = &min_bulk,
- .ksnd_tx_buffer_size = &tx_buffer_size,
- .ksnd_rx_buffer_size = &rx_buffer_size,
- .ksnd_nagle = &nagle,
- .ksnd_keepalive_idle = &keepalive_idle,
- .ksnd_keepalive_count = &keepalive_count,
- .ksnd_keepalive_intvl = &keepalive_intvl,
- .ksnd_enable_csum = &enable_csum,
- .ksnd_inject_csum_error = &inject_csum_error,
- .ksnd_zc_min_frag = &zc_min_frag,
-#ifdef CPU_AFFINITY
- .ksnd_irq_affinity = &enable_irq_affinity,
-#endif
-#ifdef SOCKNAL_BACKOFF
- .ksnd_backoff_init = &backoff_init,
- .ksnd_backoff_max = &backoff_max,
-#endif
-#if SOCKNAL_VERSION_DEBUG
- .ksnd_protocol = &protocol,
-#endif
-};
-
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
-wirecheck
+++ /dev/null
-MODULES := kviblnd
-kviblnd-objs := viblnd.o viblnd_cb.o viblnd_modparams.o
-
-EXTRA_POST_CFLAGS := @VIBCPPFLAGS@
-
-@INCLUDE_RULES@
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if BUILD_VIBLND
-modulenet_DATA = kviblnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kviblnd-objs:%.o=%.c) viblnd.h viblnd_wire.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Frank Zago <fzago@systemfabricworks.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "viblnd.h"
-
-lnd_t the_kiblnd = {
- .lnd_type = VIBLND,
- .lnd_startup = kibnal_startup,
- .lnd_shutdown = kibnal_shutdown,
- .lnd_ctl = kibnal_ctl,
- .lnd_send = kibnal_send,
- .lnd_recv = kibnal_recv,
- .lnd_eager_recv = kibnal_eager_recv,
-};
-
-kib_data_t kibnal_data;
-
-void vibnal_assert_wire_constants (void)
-{
- /* Wire protocol assertions generated by 'wirecheck'
- * running on Linux robert 2.6.11-1.27_FC3 #1 Tue May 17 20:27:37 EDT 2005 i686 athlon i386 G
- * with gcc version 3.4.3 20050227 (Red Hat 3.4.3-22.fc3) */
-
-
- /* Constants... */
- CLASSERT (IBNAL_MSG_MAGIC == 0x0be91b91);
- CLASSERT (IBNAL_MSG_VERSION == 0x11);
- CLASSERT (IBNAL_MSG_CONNREQ == 0xc0);
- CLASSERT (IBNAL_MSG_CONNACK == 0xc1);
- CLASSERT (IBNAL_MSG_NOOP == 0xd0);
- CLASSERT (IBNAL_MSG_IMMEDIATE == 0xd1);
- CLASSERT (IBNAL_MSG_PUT_REQ == 0xd2);
- CLASSERT (IBNAL_MSG_PUT_NAK == 0xd3);
- CLASSERT (IBNAL_MSG_PUT_ACK == 0xd4);
- CLASSERT (IBNAL_MSG_PUT_DONE == 0xd5);
- CLASSERT (IBNAL_MSG_GET_REQ == 0xd6);
- CLASSERT (IBNAL_MSG_GET_DONE == 0xd7);
-
- /* Checks for struct kib_connparams_t */
- CLASSERT ((int)sizeof(kib_connparams_t) == 12);
- CLASSERT ((int)offsetof(kib_connparams_t, ibcp_queue_depth) == 0);
- CLASSERT ((int)sizeof(((kib_connparams_t *)0)->ibcp_queue_depth) == 4);
- CLASSERT ((int)offsetof(kib_connparams_t, ibcp_max_msg_size) == 4);
- CLASSERT ((int)sizeof(((kib_connparams_t *)0)->ibcp_max_msg_size) == 4);
- CLASSERT ((int)offsetof(kib_connparams_t, ibcp_max_frags) == 8);
- CLASSERT ((int)sizeof(((kib_connparams_t *)0)->ibcp_max_frags) == 4);
-
- /* Checks for struct kib_immediate_msg_t */
- CLASSERT ((int)sizeof(kib_immediate_msg_t) == 72);
- CLASSERT ((int)offsetof(kib_immediate_msg_t, ibim_hdr) == 0);
- CLASSERT ((int)sizeof(((kib_immediate_msg_t *)0)->ibim_hdr) == 72);
- CLASSERT ((int)offsetof(kib_immediate_msg_t, ibim_payload[13]) == 85);
- CLASSERT ((int)sizeof(((kib_immediate_msg_t *)0)->ibim_payload[13]) == 1);
- CLASSERT (IBNAL_USE_FMR == 1);
-
- /* Checks for struct kib_rdma_desc_t */
- CLASSERT ((int)sizeof(kib_rdma_desc_t) == 16);
- CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_addr) == 0);
- CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_addr) == 8);
- CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_nob) == 8);
- CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_nob) == 4);
- CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_key) == 12);
- CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_key) == 4);
-
- /* Checks for struct kib_putreq_msg_t */
- CLASSERT ((int)sizeof(kib_putreq_msg_t) == 80);
- CLASSERT ((int)offsetof(kib_putreq_msg_t, ibprm_hdr) == 0);
- CLASSERT ((int)sizeof(((kib_putreq_msg_t *)0)->ibprm_hdr) == 72);
- CLASSERT ((int)offsetof(kib_putreq_msg_t, ibprm_cookie) == 72);
- CLASSERT ((int)sizeof(((kib_putreq_msg_t *)0)->ibprm_cookie) == 8);
-
- /* Checks for struct kib_putack_msg_t */
- CLASSERT ((int)sizeof(kib_putack_msg_t) == 32);
- CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_src_cookie) == 0);
- CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_src_cookie) == 8);
- CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_dst_cookie) == 8);
- CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_dst_cookie) == 8);
- CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_rd) == 16);
- CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_rd) == 16);
-
- /* Checks for struct kib_get_msg_t */
- CLASSERT ((int)sizeof(kib_get_msg_t) == 96);
- CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_hdr) == 0);
- CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_hdr) == 72);
- CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_cookie) == 72);
- CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_cookie) == 8);
- CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_rd) == 80);
- CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_rd) == 16);
-
- /* Checks for struct kib_completion_msg_t */
- CLASSERT ((int)sizeof(kib_completion_msg_t) == 12);
- CLASSERT ((int)offsetof(kib_completion_msg_t, ibcm_cookie) == 0);
- CLASSERT ((int)sizeof(((kib_completion_msg_t *)0)->ibcm_cookie) == 8);
- CLASSERT ((int)offsetof(kib_completion_msg_t, ibcm_status) == 8);
- CLASSERT ((int)sizeof(((kib_completion_msg_t *)0)->ibcm_status) == 4);
-
- /* Checks for struct kib_msg_t */
- CLASSERT ((int)sizeof(kib_msg_t) == 152);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_magic) == 0);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_magic) == 4);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_version) == 4);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_version) == 2);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_type) == 6);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_type) == 1);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_credits) == 7);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_credits) == 1);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_nob) == 8);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_nob) == 4);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_cksum) == 12);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_cksum) == 4);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_srcnid) == 16);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_srcnid) == 8);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_srcstamp) == 24);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_srcstamp) == 8);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_dstnid) == 32);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_dstnid) == 8);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_dststamp) == 40);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_dststamp) == 8);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_seq) == 48);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_seq) == 8);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_u.connparams) == 56);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.connparams) == 12);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_u.immediate) == 56);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.immediate) == 72);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_u.putreq) == 56);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putreq) == 80);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_u.putack) == 56);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putack) == 32);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_u.get) == 56);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.get) == 96);
- CLASSERT ((int)offsetof(kib_msg_t, ibm_u.completion) == 56);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.completion) == 12);
-}
-
-__u32
-kibnal_cksum (void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return (sum == 0) ? 1 : sum;
-}
-
-void
-kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
-{
- msg->ibm_type = type;
- msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
-}
-
-void
-kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
- lnet_nid_t dstnid, __u64 dststamp, __u64 seq)
-{
- /* CAVEAT EMPTOR! all message fields not set here should have been
- * initialised previously. */
- msg->ibm_magic = IBNAL_MSG_MAGIC;
- msg->ibm_version = version;
- /* ibm_type */
- msg->ibm_credits = credits;
- /* ibm_nob */
- msg->ibm_cksum = 0;
- msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid,
- dstnid);
- msg->ibm_srcstamp = kibnal_data.kib_incarnation;
- msg->ibm_dstnid = dstnid;
- msg->ibm_dststamp = dststamp;
- msg->ibm_seq = seq;
-
- if (*kibnal_tunables.kib_cksum) {
- /* NB ibm_cksum zero while computing cksum */
- msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
- }
-}
-
-int
-kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
-{
- const int hdr_size = offsetof(kib_msg_t, ibm_u);
- __u32 msg_cksum;
- __u32 msg_version;
- int flip;
- int msg_nob;
-#if !IBNAL_USE_FMR
- int i;
- int n;
-#endif
- /* 6 bytes are enough to have received magic + version */
- if (nob < 6) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- /* Future protocol version compatibility support!
- * If the viblnd-specific protocol changes, or when LNET unifies
- * protocols over all LNDs, the initial connection will negotiate a
- * protocol version. If I find this, I avoid any console errors. If
- * my is doing connection establishment, the reject will tell the peer
- * which version I'm running. */
-
- if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
- flip = 0;
- } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
- flip = 1;
- } else {
- if (msg->ibm_magic == LNET_PROTO_MAGIC ||
- msg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
- return -EPROTO;
-
- /* Completely out to lunch */
- CERROR("Bad magic: %08x\n", msg->ibm_magic);
- return -EPROTO;
- }
-
- msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
- if (expected_version == 0) {
- if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD &&
- msg_version != IBNAL_MSG_VERSION)
- return -EPROTO;
- } else if (msg_version != expected_version) {
- CERROR("Bad version: %x(%x expected)\n",
- msg_version, expected_version);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
- if (msg_nob > nob) {
- CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
- return -EPROTO;
- }
-
- /* checksum must be computed with ibm_cksum zero and BEFORE anything
- * gets flipped */
- msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
- msg->ibm_cksum = 0;
- if (msg_cksum != 0 &&
- msg_cksum != kibnal_cksum(msg, msg_nob)) {
- CERROR("Bad checksum\n");
- return -EPROTO;
- }
- msg->ibm_cksum = msg_cksum;
-
- if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
- msg->ibm_version = msg_version;
- CLASSERT (sizeof(msg->ibm_type) == 1);
- CLASSERT (sizeof(msg->ibm_credits) == 1);
- msg->ibm_nob = msg_nob;
- __swab64s(&msg->ibm_srcnid);
- __swab64s(&msg->ibm_srcstamp);
- __swab64s(&msg->ibm_dstnid);
- __swab64s(&msg->ibm_dststamp);
- __swab64s(&msg->ibm_seq);
- }
-
- if (msg->ibm_srcnid == LNET_NID_ANY) {
- CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
- return -EPROTO;
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Unknown message type %x\n", msg->ibm_type);
- return -EPROTO;
-
- case IBNAL_MSG_NOOP:
- break;
-
- case IBNAL_MSG_IMMEDIATE:
- if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
- CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
- return -EPROTO;
- }
- break;
-
- case IBNAL_MSG_PUT_REQ:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) {
- CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.putreq)));
- return -EPROTO;
- }
- break;
-
- case IBNAL_MSG_PUT_ACK:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) {
- CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.putack)));
- return -EPROTO;
- }
-#if IBNAL_USE_FMR
- if (flip) {
- __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
- }
-#else
- if (flip) {
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
- }
-
- n = msg->ibm_u.putack.ibpam_rd.rd_nfrag;
- if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",
- n, IBNAL_MAX_RDMA_FRAGS);
- return -EPROTO;
- }
-
- if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
- CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
- return -EPROTO;
- }
-
- if (flip) {
- for (i = 0; i < n; i++) {
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_lo);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_hi);
- }
- }
-#endif
- break;
-
- case IBNAL_MSG_GET_REQ:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) {
- CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.get)));
- return -EPROTO;
- }
-#if IBNAL_USE_FMR
- if (flip) {
- __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
- }
-#else
- if (flip) {
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
- }
-
- n = msg->ibm_u.get.ibgm_rd.rd_nfrag;
- if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",
- n, IBNAL_MAX_RDMA_FRAGS);
- return -EPROTO;
- }
-
- if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
- CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
- return -EPROTO;
- }
-
- if (flip)
- for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) {
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_lo);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_hi);
- }
-#endif
- break;
-
- case IBNAL_MSG_PUT_NAK:
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
- CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.completion)));
- return -EPROTO;
- }
- if (flip)
- __swab32s(&msg->ibm_u.completion.ibcm_status);
- break;
-
- case IBNAL_MSG_CONNREQ:
- case IBNAL_MSG_CONNACK:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
- CERROR("Short connreq/ack: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
- return -EPROTO;
- }
- if (flip) {
- __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
- __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
- __swab32s(&msg->ibm_u.connparams.ibcp_max_frags);
- }
- break;
- }
- return 0;
-}
-
-int
-kibnal_start_listener (lnet_ni_t *ni)
-{
- static cm_listen_data_t info;
-
- cm_return_t cmrc;
-
- LASSERT (kibnal_data.kib_listen_handle == NULL);
-
- kibnal_data.kib_listen_handle =
- cm_create_cep(cm_cep_transp_rc);
- if (kibnal_data.kib_listen_handle == NULL) {
- CERROR ("Can't create listen CEP\n");
- return -ENOMEM;
- }
-
- CDEBUG(D_NET, "Created CEP %p for listening\n",
- kibnal_data.kib_listen_handle);
-
- memset(&info, 0, sizeof(info));
- info.listen_addr.end_pt.sid =
- (__u64)(*kibnal_tunables.kib_service_number);
-
- cmrc = cm_listen(kibnal_data.kib_listen_handle, &info,
- kibnal_listen_callback, NULL);
- if (cmrc == cm_stat_success)
- return 0;
-
- CERROR ("cm_listen error: %d\n", cmrc);
-
- cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
- LASSERT (cmrc == cm_stat_success);
-
- kibnal_data.kib_listen_handle = NULL;
- return -EINVAL;
-}
-
-void
-kibnal_stop_listener(lnet_ni_t *ni)
-{
- cm_return_t cmrc;
-
- LASSERT (kibnal_data.kib_listen_handle != NULL);
-
- cmrc = cm_cancel(kibnal_data.kib_listen_handle);
- if (cmrc != cm_stat_success)
- CERROR ("Error %d stopping listener\n", cmrc);
-
- cfs_pause(cfs_time_seconds(1)/10); /* ensure no more callbacks */
-
- cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
- if (cmrc != vv_return_ok)
- CERROR ("Error %d destroying CEP\n", cmrc);
-
- kibnal_data.kib_listen_handle = NULL;
-}
-
-int
-kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- unsigned long flags;
- int rc;
-
- LASSERT (nid != LNET_NID_ANY);
-
- LIBCFS_ALLOC(peer, sizeof (*peer));
- if (peer == NULL) {
- CERROR("Cannot allocate peer\n");
- return -ENOMEM;
- }
-
- memset(peer, 0, sizeof(*peer)); /* zero flags etc */
-
- peer->ibp_nid = nid;
- atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */
-
- INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */
- INIT_LIST_HEAD (&peer->ibp_conns);
- INIT_LIST_HEAD (&peer->ibp_tx_queue);
-
- peer->ibp_error = 0;
- peer->ibp_last_alive = cfs_time_current();
- peer->ibp_reconnect_interval = 0; /* OK to connect at any time */
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (atomic_read(&kibnal_data.kib_npeers) >=
- *kibnal_tunables.kib_concurrent_peers) {
- rc = -EOVERFLOW; /* !! but at least it distinguishes */
- } else if (kibnal_data.kib_listen_handle == NULL) {
- rc = -ESHUTDOWN; /* shutdown has started */
- } else {
- rc = 0;
- /* npeers only grows with the global lock held */
- atomic_inc(&kibnal_data.kib_npeers);
- }
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- if (rc != 0) {
- CERROR("Can't create peer: %s\n",
- (rc == -ESHUTDOWN) ? "shutting down" :
- "too many peers");
- LIBCFS_FREE(peer, sizeof(*peer));
- } else {
- *peerp = peer;
- }
-
- return rc;
-}
-
-void
-kibnal_destroy_peer (kib_peer_t *peer)
-{
- LASSERT (atomic_read (&peer->ibp_refcount) == 0);
- LASSERT (peer->ibp_persistence == 0);
- LASSERT (!kibnal_peer_active(peer));
- LASSERT (peer->ibp_connecting == 0);
- LASSERT (peer->ibp_accepting == 0);
- LASSERT (list_empty (&peer->ibp_conns));
- LASSERT (list_empty (&peer->ibp_tx_queue));
-
- LIBCFS_FREE (peer, sizeof (*peer));
-
- /* NB a peer's connections keep a reference on their peer until
- * they are destroyed, so we can be assured that _all_ state to do
- * with this peer has been cleaned up when its refcount drops to
- * zero. */
- atomic_dec(&kibnal_data.kib_npeers);
-}
-
-kib_peer_t *
-kibnal_find_peer_locked (lnet_nid_t nid)
-{
- /* the caller is responsible for accounting the additional reference
- * that this creates */
- struct list_head *peer_list = kibnal_nid2peerlist (nid);
- struct list_head *tmp;
- kib_peer_t *peer;
-
- list_for_each (tmp, peer_list) {
-
- peer = list_entry (tmp, kib_peer_t, ibp_list);
-
- LASSERT (peer->ibp_persistence != 0 || /* persistent peer */
- peer->ibp_connecting != 0 || /* creating conns */
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns)); /* active conn */
-
- if (peer->ibp_nid != nid)
- continue;
-
- CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
- peer, libcfs_nid2str(nid),
- atomic_read (&peer->ibp_refcount));
- return (peer);
- }
- return (NULL);
-}
-
-void
-kibnal_unlink_peer_locked (kib_peer_t *peer)
-{
- LASSERT (peer->ibp_persistence == 0);
- LASSERT (list_empty(&peer->ibp_conns));
-
- LASSERT (kibnal_peer_active(peer));
- list_del_init (&peer->ibp_list);
- /* lose peerlist's ref */
- kibnal_peer_decref(peer);
-}
-
-int
-kibnal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp,
- int *persistencep)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- int i;
- unsigned long flags;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-
- list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- if (index-- > 0)
- continue;
-
- *nidp = peer->ibp_nid;
- *ipp = peer->ibp_ip;
- *persistencep = peer->ibp_persistence;
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (0);
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (-ENOENT);
-}
-
-int
-kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip)
-{
- kib_peer_t *peer;
- kib_peer_t *peer2;
- unsigned long flags;
- int rc;
-
- CDEBUG(D_NET, "%s at %u.%u.%u.%u\n",
- libcfs_nid2str(nid), HIPQUAD(ip));
-
- if (nid == LNET_NID_ANY)
- return (-EINVAL);
-
- rc = kibnal_create_peer(&peer, nid);
- if (rc != 0)
- return rc;
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- /* I'm always called with a reference on kibnal_data.kib_ni
- * so shutdown can't have started */
- LASSERT (kibnal_data.kib_listen_handle != NULL);
-
- peer2 = kibnal_find_peer_locked (nid);
- if (peer2 != NULL) {
- kibnal_peer_decref (peer);
- peer = peer2;
- } else {
- /* peer table takes existing ref on peer */
- list_add_tail (&peer->ibp_list,
- kibnal_nid2peerlist (nid));
- }
-
- peer->ibp_ip = ip;
- peer->ibp_persistence++;
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (0);
-}
-
-void
-kibnal_del_peer_locked (kib_peer_t *peer)
-{
- struct list_head *ctmp;
- struct list_head *cnxt;
- kib_conn_t *conn;
-
- peer->ibp_persistence = 0;
-
- if (list_empty(&peer->ibp_conns)) {
- kibnal_unlink_peer_locked(peer);
- } else {
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, kib_conn_t, ibc_list);
-
- kibnal_close_conn_locked (conn, 0);
- }
- /* NB peer is no longer persistent; closing its last conn
- * unlinked it. */
- }
- /* NB peer now unlinked; might even be freed if the peer table had the
- * last ref on it. */
-}
-
-int
-kibnal_del_peer (lnet_nid_t nid)
-{
- CFS_LIST_HEAD (zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- kib_peer_t *peer;
- int lo;
- int hi;
- int i;
- unsigned long flags;
- int rc = -ENOENT;
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
- else {
- lo = 0;
- hi = kibnal_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
- continue;
-
- if (!list_empty(&peer->ibp_tx_queue)) {
- LASSERT (list_empty(&peer->ibp_conns));
-
- list_splice_init(&peer->ibp_tx_queue, &zombies);
- }
-
- kibnal_del_peer_locked (peer);
- rc = 0; /* matched something */
- }
- }
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_txlist_done(&zombies, -EIO);
-
- return (rc);
-}
-
-kib_conn_t *
-kibnal_get_conn_by_idx (int index)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- kib_conn_t *conn;
- struct list_head *ctmp;
- int i;
- unsigned long flags;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
- list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence > 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- list_for_each (ctmp, &peer->ibp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
- kibnal_conn_addref(conn);
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (conn);
- }
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (NULL);
-}
-
-void
-kibnal_debug_rx (kib_rx_t *rx)
-{
- CDEBUG(D_CONSOLE, " %p nob %d msg_type %x "
- "cred %d seq "LPD64"\n",
- rx, rx->rx_nob, rx->rx_msg->ibm_type,
- rx->rx_msg->ibm_credits, rx->rx_msg->ibm_seq);
-}
-
-void
-kibnal_debug_tx (kib_tx_t *tx)
-{
- CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lx "
- "cookie "LPX64" msg %s%s type %x cred %d seq "LPD64"\n",
- tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting,
- tx->tx_status, tx->tx_deadline, tx->tx_cookie,
- tx->tx_lntmsg[0] == NULL ? "-" : "!",
- tx->tx_lntmsg[1] == NULL ? "-" : "!",
- tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits,
- tx->tx_msg->ibm_seq);
-}
-
-void
-kibnal_debug_conn (kib_conn_t *conn)
-{
- struct list_head *tmp;
- int i;
-
- spin_lock(&conn->ibc_lock);
-
- CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n",
- atomic_read(&conn->ibc_refcount), conn,
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- CDEBUG(D_CONSOLE, " txseq "LPD64" rxseq "LPD64" state %d \n",
- conn->ibc_txseq, conn->ibc_rxseq, conn->ibc_state);
- CDEBUG(D_CONSOLE, " nposted %d cred %d o_cred %d r_cred %d\n",
- conn->ibc_nsends_posted, conn->ibc_credits,
- conn->ibc_outstanding_credits, conn->ibc_reserved_credits);
- CDEBUG(D_CONSOLE, " disc %d comms_err %d\n",
- conn->ibc_disconnect, conn->ibc_comms_error);
-
- CDEBUG(D_CONSOLE, " early_rxs:\n");
- list_for_each(tmp, &conn->ibc_early_rxs)
- kibnal_debug_rx(list_entry(tmp, kib_rx_t, rx_list));
-
- CDEBUG(D_CONSOLE, " tx_queue_nocred:\n");
- list_for_each(tmp, &conn->ibc_tx_queue_nocred)
- kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
- CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n");
- list_for_each(tmp, &conn->ibc_tx_queue_rsrvd)
- kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
- CDEBUG(D_CONSOLE, " tx_queue:\n");
- list_for_each(tmp, &conn->ibc_tx_queue)
- kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
- CDEBUG(D_CONSOLE, " active_txs:\n");
- list_for_each(tmp, &conn->ibc_active_txs)
- kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
- CDEBUG(D_CONSOLE, " rxs:\n");
- for (i = 0; i < IBNAL_RX_MSGS; i++)
- kibnal_debug_rx(&conn->ibc_rxs[i]);
-
- spin_unlock(&conn->ibc_lock);
-}
-
-int
-kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
-{
- static vv_qp_attr_t attr;
-
- kib_connvars_t *cv = conn->ibc_connvars;
- vv_return_t vvrc;
-
- /* Only called by connd => static OK */
- LASSERT (!in_interrupt());
- LASSERT (current == kibnal_data.kib_connd);
-
- memset(&attr, 0, sizeof(attr));
-
- switch (new_state) {
- default:
- LBUG();
-
- case vv_qp_state_init: {
- struct vv_qp_modify_init_st *init = &attr.modify.params.init;
-
- init->p_key_indx = cv->cv_pkey_index;
- init->phy_port_num = cv->cv_port;
- init->q_key = IBNAL_QKEY; /* XXX but VV_QP_AT_Q_KEY not set! */
- init->access_control = vv_acc_r_mem_read |
- vv_acc_r_mem_write; /* XXX vv_acc_l_mem_write ? */
-
- attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX |
- VV_QP_AT_PHY_PORT_NUM |
- VV_QP_AT_ACCESS_CON_F;
- break;
- }
- case vv_qp_state_rtr: {
- struct vv_qp_modify_rtr_st *rtr = &attr.modify.params.rtr;
- vv_add_vec_t *av = &rtr->remote_add_vec;
-
- av->dlid = cv->cv_path.dlid;
- av->grh_flag = (!IBNAL_LOCAL_SUB);
- av->max_static_rate = IBNAL_R_2_STATIC_RATE(cv->cv_path.rate);
- av->service_level = cv->cv_path.sl;
- av->source_path_bit = IBNAL_SOURCE_PATH_BIT;
- av->pmtu = cv->cv_path.mtu;
- av->rnr_retry_count = cv->cv_rnr_count;
- av->global_dest.traffic_class = cv->cv_path.traffic_class;
- av->global_dest.hope_limit = cv->cv_path.hop_limut;
- av->global_dest.flow_lable = cv->cv_path.flow_label;
- av->global_dest.s_gid_index = cv->cv_sgid_index;
- // XXX other av fields zero?
-
- rtr->destanation_qp = cv->cv_remote_qpn;
- rtr->receive_psn = cv->cv_rxpsn;
- rtr->responder_rdma_r_atom_num = IBNAL_OUS_DST_RD;
- rtr->opt_min_rnr_nak_timer = *kibnal_tunables.kib_rnr_nak_timer;
-
-
- // XXX sdp sets VV_QP_AT_OP_F but no actual optional options
- attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC |
- VV_QP_AT_DEST_QP |
- VV_QP_AT_R_PSN |
- VV_QP_AT_MIN_RNR_NAK_T |
- VV_QP_AT_RESP_RDMA_ATOM_OUT_NUM |
- VV_QP_AT_OP_F;
- break;
- }
- case vv_qp_state_rts: {
- struct vv_qp_modify_rts_st *rts = &attr.modify.params.rts;
-
- rts->send_psn = cv->cv_txpsn;
- rts->local_ack_timeout = *kibnal_tunables.kib_local_ack_timeout;
- rts->retry_num = *kibnal_tunables.kib_retry_cnt;
- rts->rnr_num = *kibnal_tunables.kib_rnr_cnt;
- rts->dest_out_rdma_r_atom_num = IBNAL_OUS_DST_RD;
-
- attr.modify.vv_qp_attr_mask = VV_QP_AT_S_PSN |
- VV_QP_AT_L_ACK_T |
- VV_QP_AT_RETRY_NUM |
- VV_QP_AT_RNR_NUM |
- VV_QP_AT_DEST_RDMA_ATOM_OUT_NUM;
- break;
- }
- case vv_qp_state_error:
- case vv_qp_state_reset:
- attr.modify.vv_qp_attr_mask = 0;
- break;
- }
-
- attr.modify.qp_modify_into_state = new_state;
- attr.modify.vv_qp_attr_mask |= VV_QP_AT_STATE;
-
- vvrc = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &attr, NULL);
- if (vvrc != vv_return_ok) {
- CERROR("Can't modify qp -> %s state to %d: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- new_state, vvrc);
- return -EIO;
- }
-
- return 0;
-}
-
-kib_conn_t *
-kibnal_create_conn (cm_cep_handle_t cep)
-{
- kib_conn_t *conn;
- int i;
- int page_offset;
- int ipage;
- vv_return_t vvrc;
- int rc;
-
- static vv_qp_attr_t reqattr;
- static vv_qp_attr_t rspattr;
-
- /* Only the connd creates conns => single threaded */
- LASSERT(!in_interrupt());
- LASSERT(current == kibnal_data.kib_connd);
-
- LIBCFS_ALLOC(conn, sizeof (*conn));
- if (conn == NULL) {
- CERROR ("Can't allocate connection\n");
- return (NULL);
- }
-
- /* zero flags, NULL pointers etc... */
- memset (conn, 0, sizeof (*conn));
-
- conn->ibc_version = IBNAL_MSG_VERSION; /* Use latest version at first */
-
- INIT_LIST_HEAD (&conn->ibc_early_rxs);
- INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred);
- INIT_LIST_HEAD (&conn->ibc_tx_queue);
- INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
- INIT_LIST_HEAD (&conn->ibc_active_txs);
- spin_lock_init (&conn->ibc_lock);
-
- atomic_inc (&kibnal_data.kib_nconns);
- /* well not really, but I call destroy() on failure, which decrements */
-
- conn->ibc_cep = cep;
-
- LIBCFS_ALLOC(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
- if (conn->ibc_connvars == NULL) {
- CERROR("Can't allocate in-progress connection state\n");
- goto failed;
- }
- memset (conn->ibc_connvars, 0, sizeof(*conn->ibc_connvars));
- /* Random seed for QP sequence number */
- get_random_bytes(&conn->ibc_connvars->cv_rxpsn,
- sizeof(conn->ibc_connvars->cv_rxpsn));
-
- LIBCFS_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
- if (conn->ibc_rxs == NULL) {
- CERROR("Cannot allocate RX buffers\n");
- goto failed;
- }
- memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
- rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES, 1);
- if (rc != 0)
- goto failed;
-
- for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
- struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
- kib_rx_t *rx = &conn->ibc_rxs[i];
- vv_mem_reg_h_t mem_h;
- vv_r_key_t r_key;
-
- rx->rx_conn = conn;
- rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
- page_offset);
-
- vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
- rx->rx_msg,
- IBNAL_MSG_SIZE,
- &mem_h,
- &rx->rx_lkey,
- &r_key);
- LASSERT (vvrc == vv_return_ok);
-
- CDEBUG(D_NET, "Rx[%d] %p->%p[%x]\n", i, rx,
- rx->rx_msg, rx->rx_lkey);
-
- page_offset += IBNAL_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBNAL_RX_MSG_PAGES);
- }
- }
-
- memset(&reqattr, 0, sizeof(reqattr));
-
- reqattr.create.qp_type = vv_qp_type_r_conn;
- reqattr.create.cq_send_h = kibnal_data.kib_cq;
- reqattr.create.cq_receive_h = kibnal_data.kib_cq;
- reqattr.create.send_max_outstand_wr = (1 + IBNAL_MAX_RDMA_FRAGS) *
- (*kibnal_tunables.kib_concurrent_sends);
- reqattr.create.receive_max_outstand_wr = IBNAL_RX_MSGS;
- reqattr.create.max_scatgat_per_send_wr = 1;
- reqattr.create.max_scatgat_per_receive_wr = 1;
- reqattr.create.signaling_type = vv_selectable_signaling;
- reqattr.create.pd_h = kibnal_data.kib_pd;
- reqattr.create.recv_solicited_events = vv_selectable_signaling; // vv_signal_all;
-
- vvrc = vv_qp_create(kibnal_data.kib_hca, &reqattr, NULL,
- &conn->ibc_qp, &rspattr);
- if (vvrc != vv_return_ok) {
- CERROR ("Failed to create queue pair: %d\n", vvrc);
- goto failed;
- }
-
- /* Mark QP created */
- conn->ibc_state = IBNAL_CONN_INIT_QP;
- conn->ibc_connvars->cv_local_qpn = rspattr.create_return.qp_num;
-
- if (rspattr.create_return.receive_max_outstand_wr <
- IBNAL_RX_MSGS ||
- rspattr.create_return.send_max_outstand_wr <
- (1 + IBNAL_MAX_RDMA_FRAGS) * (*kibnal_tunables.kib_concurrent_sends)) {
- CERROR("Insufficient rx/tx work items: wanted %d/%d got %d/%d\n",
- IBNAL_RX_MSGS,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- (*kibnal_tunables.kib_concurrent_sends),
- rspattr.create_return.receive_max_outstand_wr,
- rspattr.create_return.send_max_outstand_wr);
- goto failed;
- }
-
- /* Mark init complete */
- conn->ibc_state = IBNAL_CONN_INIT;
-
- /* 1 ref for caller */
- atomic_set (&conn->ibc_refcount, 1);
- return (conn);
-
- failed:
- kibnal_destroy_conn (conn);
- return (NULL);
-}
-
-void
-kibnal_destroy_conn (kib_conn_t *conn)
-{
- vv_return_t vvrc;
-
- /* Only the connd does this (i.e. single threaded) */
- LASSERT (!in_interrupt());
- LASSERT (current == kibnal_data.kib_connd);
-
- CDEBUG (D_NET, "connection %p\n", conn);
-
- LASSERT (atomic_read (&conn->ibc_refcount) == 0);
- LASSERT (list_empty(&conn->ibc_early_rxs));
- LASSERT (list_empty(&conn->ibc_tx_queue));
- LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
- LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
- LASSERT (list_empty(&conn->ibc_active_txs));
- LASSERT (conn->ibc_nsends_posted == 0);
-
- switch (conn->ibc_state) {
- default:
- /* conn must be completely disengaged from the network */
- LBUG();
-
- case IBNAL_CONN_DISCONNECTED:
- /* connvars should have been freed already */
- LASSERT (conn->ibc_connvars == NULL);
- /* fall through */
-
- case IBNAL_CONN_INIT:
- vvrc = cm_destroy_cep(conn->ibc_cep);
- LASSERT (vvrc == vv_return_ok);
- /* fall through */
-
- case IBNAL_CONN_INIT_QP:
- kibnal_set_qp_state(conn, vv_qp_state_reset);
- vvrc = vv_qp_destroy(kibnal_data.kib_hca, conn->ibc_qp);
- if (vvrc != vv_return_ok)
- CERROR("Can't destroy QP: %d\n", vvrc);
- /* fall through */
-
- case IBNAL_CONN_INIT_NOTHING:
- break;
- }
-
- if (conn->ibc_rx_pages != NULL)
- kibnal_free_pages(conn->ibc_rx_pages);
-
- if (conn->ibc_rxs != NULL)
- LIBCFS_FREE(conn->ibc_rxs,
- IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
- if (conn->ibc_connvars != NULL)
- LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
-
- if (conn->ibc_peer != NULL)
- kibnal_peer_decref(conn->ibc_peer);
-
- LIBCFS_FREE(conn, sizeof (*conn));
-
- atomic_dec(&kibnal_data.kib_nconns);
-}
-
-int
-kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- count++;
- kibnal_close_conn_locked (conn, why);
- }
-
- return (count);
-}
-
-int
-kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- if (conn->ibc_incarnation == incarnation)
- continue;
-
- CDEBUG(D_NET, "Closing stale conn -> %s incarnation:"LPX64"("LPX64")\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_incarnation, incarnation);
-
- count++;
- kibnal_close_conn_locked (conn, -ESTALE);
- }
-
- return (count);
-}
-
-int
-kibnal_close_matching_conns (lnet_nid_t nid)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- unsigned long flags;
- int count = 0;
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
- else {
- lo = 0;
- hi = kibnal_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
- continue;
-
- count += kibnal_close_peer_conns_locked (peer, 0);
- }
- }
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- /* wildcards always succeed */
- if (nid == LNET_NID_ANY)
- return (0);
-
- return (count == 0 ? -ENOENT : 0);
-}
-
-int
-kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
-
- LASSERT (ni == kibnal_data.kib_ni);
-
- switch(cmd) {
- case IOC_LIBCFS_GET_PEER: {
- lnet_nid_t nid = 0;
- __u32 ip = 0;
- int share_count = 0;
-
- rc = kibnal_get_peer_info(data->ioc_count,
- &nid, &ip, &share_count);
- data->ioc_nid = nid;
- data->ioc_count = share_count;
- data->ioc_u32[0] = ip;
- data->ioc_u32[1] = *kibnal_tunables.kib_service_number; /* port */
- break;
- }
- case IOC_LIBCFS_ADD_PEER: {
- rc = kibnal_add_persistent_peer (data->ioc_nid,
- data->ioc_u32[0]); /* IP */
- break;
- }
- case IOC_LIBCFS_DEL_PEER: {
- rc = kibnal_del_peer (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_GET_CONN: {
- kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count);
-
- if (conn == NULL)
- rc = -ENOENT;
- else {
- // kibnal_debug_conn(conn);
- rc = 0;
- data->ioc_nid = conn->ibc_peer->ibp_nid;
- kibnal_conn_decref(conn);
- }
- break;
- }
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- rc = kibnal_close_matching_conns (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_REGISTER_MYNID: {
- if (ni->ni_nid == data->ioc_nid) {
- rc = 0;
- } else {
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- rc = -EINVAL;
- }
- break;
- }
- }
-
- return rc;
-}
-
-void
-kibnal_free_pages (kib_pages_t *p)
-{
- int npages = p->ibp_npages;
- int i;
-
- for (i = 0; i < npages; i++)
- if (p->ibp_pages[i] != NULL)
- __free_page(p->ibp_pages[i]);
-
- LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
-}
-
-int
-kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
-{
- kib_pages_t *p;
- int i;
-
- LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
- if (p == NULL) {
- CERROR ("Can't allocate buffer %d\n", npages);
- return (-ENOMEM);
- }
-
- memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
- p->ibp_npages = npages;
-
- for (i = 0; i < npages; i++) {
- p->ibp_pages[i] = alloc_page (GFP_KERNEL);
- if (p->ibp_pages[i] == NULL) {
- CERROR ("Can't allocate page %d of %d\n", i, npages);
- kibnal_free_pages(p);
- return (-ENOMEM);
- }
- }
-
- *pp = p;
- return (0);
-}
-
-int
-kibnal_alloc_tx_descs (void)
-{
- int i;
-
- LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
- if (kibnal_data.kib_tx_descs == NULL)
- return -ENOMEM;
-
- memset(kibnal_data.kib_tx_descs, 0,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
-
-#if IBNAL_USE_FMR
- LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV *
- sizeof(*tx->tx_pages));
- if (tx->tx_pages == NULL)
- return -ENOMEM;
-#else
- LIBCFS_ALLOC(tx->tx_wrq,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_wrq));
- if (tx->tx_wrq == NULL)
- return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_gl,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_gl));
- if (tx->tx_gl == NULL)
- return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_rd,
- offsetof(kib_rdma_desc_t,
- rd_frags[IBNAL_MAX_RDMA_FRAGS]));
- if (tx->tx_rd == NULL)
- return -ENOMEM;
-#endif
- }
-
- return 0;
-}
-
-void
-kibnal_free_tx_descs (void)
-{
- int i;
-
- if (kibnal_data.kib_tx_descs == NULL)
- return;
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
-
-#if IBNAL_USE_FMR
- if (tx->tx_pages != NULL)
- LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV *
- sizeof(*tx->tx_pages));
-#else
- if (tx->tx_wrq != NULL)
- LIBCFS_FREE(tx->tx_wrq,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_wrq));
-
- if (tx->tx_gl != NULL)
- LIBCFS_FREE(tx->tx_gl,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_gl));
-
- if (tx->tx_rd != NULL)
- LIBCFS_FREE(tx->tx_rd,
- offsetof(kib_rdma_desc_t,
- rd_frags[IBNAL_MAX_RDMA_FRAGS]));
-#endif
- }
-
- LIBCFS_FREE(kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
-}
-
-#if IBNAL_USE_FMR
-void
-kibnal_free_fmrs (int n)
-{
- int i;
- vv_return_t vvrc;
- kib_tx_t *tx;
-
- for (i = 0; i < n; i++) {
- tx = &kibnal_data.kib_tx_descs[i];
-
- vvrc = vv_free_fmr(kibnal_data.kib_hca,
- tx->tx_md.md_fmrhandle);
- if (vvrc != vv_return_ok)
- CWARN("vv_free_fmr[%d]: %d\n", i, vvrc);
- }
-}
-#endif
-
-int
-kibnal_setup_tx_descs (void)
-{
- int ipage = 0;
- int page_offset = 0;
- struct page *page;
- kib_tx_t *tx;
- vv_mem_reg_h_t mem_h;
- vv_r_key_t rkey;
- vv_return_t vvrc;
- int i;
- int rc;
-#if IBNAL_USE_FMR
- vv_fmr_t fmr_props;
-#endif
-
- /* pre-mapped messages are not bigger than 1 page */
- CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
-
- /* No fancy arithmetic when we do the buffer calculations */
- CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
-
- rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
- IBNAL_TX_MSG_PAGES(), 0);
- if (rc != 0)
- return (rc);
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
- tx = &kibnal_data.kib_tx_descs[i];
-
-#if IBNAL_USE_FMR
- memset(&fmr_props, 0, sizeof(fmr_props));
- fmr_props.pd_hndl = kibnal_data.kib_pd;
- fmr_props.acl = (vv_acc_r_mem_write |
- vv_acc_l_mem_write);
- fmr_props.max_pages = LNET_MAX_IOV;
- fmr_props.log2_page_sz = PAGE_SHIFT;
- fmr_props.max_outstanding_maps = *kibnal_tunables.kib_fmr_remaps;
-
- vvrc = vv_alloc_fmr(kibnal_data.kib_hca,
- &fmr_props,
- &tx->tx_md.md_fmrhandle);
- if (vvrc != vv_return_ok) {
- CERROR("Can't allocate fmr %d: %d\n", i, vvrc);
- kibnal_free_fmrs(i);
- kibnal_free_pages (kibnal_data.kib_tx_pages);
- return -ENOMEM;
- }
-
- tx->tx_md.md_fmrcount = *kibnal_tunables.kib_fmr_remaps;
- tx->tx_md.md_active = 0;
-#endif
- tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
- page_offset);
-
- vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
- tx->tx_msg,
- IBNAL_MSG_SIZE,
- &mem_h,
- &tx->tx_lkey,
- &rkey);
- LASSERT (vvrc == vv_return_ok);
-
- CDEBUG(D_NET, "Tx[%d] %p->%p[%x]\n", i, tx,
- tx->tx_msg, tx->tx_lkey);
-
- list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
-
- page_offset += IBNAL_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
- }
- }
-
- return (0);
-}
-
-void
-kibnal_shutdown (lnet_ni_t *ni)
-{
- int i;
- vv_return_t vvrc;
-
- LASSERT (ni == kibnal_data.kib_ni);
- LASSERT (ni->ni_data == &kibnal_data);
-
- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- switch (kibnal_data.kib_init) {
-
- case IBNAL_INIT_ALL:
- /* stop accepting connections and prevent new peers */
- kibnal_stop_listener(ni);
-
- /* nuke all existing peers */
- kibnal_del_peer(LNET_NID_ANY);
-
- /* Wait for all peer state to clean up */
- i = 2;
- while (atomic_read(&kibnal_data.kib_npeers) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */
- "waiting for %d peers to disconnect\n",
- atomic_read(&kibnal_data.kib_npeers));
- cfs_pause(cfs_time_seconds(1));
- }
- /* fall through */
-
- case IBNAL_INIT_CQ:
- vvrc = vv_cq_destroy(kibnal_data.kib_hca, kibnal_data.kib_cq);
- if (vvrc != vv_return_ok)
- CERROR ("Destroy CQ error: %d\n", vvrc);
- /* fall through */
-
- case IBNAL_INIT_TXD:
- kibnal_free_pages (kibnal_data.kib_tx_pages);
-#if IBNAL_USE_FMR
- kibnal_free_fmrs(IBNAL_TX_MSGS());
-#endif
- /* fall through */
-
- case IBNAL_INIT_PD:
-#if 0
- /* Only deallocate a PD if we actually allocated one */
- vvrc = vv_pd_deallocate(kibnal_data.kib_hca,
- kibnal_data.kib_pd);
- if (vvrc != vv_return_ok)
- CERROR ("Destroy PD error: %d\n", vvrc);
-#endif
- /* fall through */
-
- case IBNAL_INIT_ASYNC:
- vvrc = vv_dell_async_event_cb (kibnal_data.kib_hca,
- kibnal_async_callback);
- if (vvrc != vv_return_ok)
- CERROR("vv_dell_async_event_cb error: %d\n", vvrc);
-
- /* fall through */
-
- case IBNAL_INIT_HCA:
- vvrc = vv_hca_close(kibnal_data.kib_hca);
- if (vvrc != vv_return_ok)
- CERROR ("Close HCA error: %d\n", vvrc);
- /* fall through */
-
- case IBNAL_INIT_DATA:
- LASSERT (atomic_read(&kibnal_data.kib_npeers) == 0);
- LASSERT (kibnal_data.kib_peers != NULL);
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
- LASSERT (list_empty (&kibnal_data.kib_peers[i]));
- }
- LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
- LASSERT (list_empty (&kibnal_data.kib_connd_zombies));
- LASSERT (list_empty (&kibnal_data.kib_connd_conns));
- LASSERT (list_empty (&kibnal_data.kib_connd_pcreqs));
- LASSERT (list_empty (&kibnal_data.kib_connd_peers));
-
- /* flag threads to terminate; wake and wait for them to die */
- kibnal_data.kib_shutdown = 1;
- wake_up_all (&kibnal_data.kib_sched_waitq);
- wake_up_all (&kibnal_data.kib_connd_waitq);
-
- i = 2;
- while (atomic_read (&kibnal_data.kib_nthreads) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d threads to terminate\n",
- atomic_read (&kibnal_data.kib_nthreads));
- cfs_pause(cfs_time_seconds(1));
- }
- /* fall through */
-
- case IBNAL_INIT_NOTHING:
- break;
- }
-
- kibnal_free_tx_descs();
-
- if (kibnal_data.kib_peers != NULL)
- LIBCFS_FREE (kibnal_data.kib_peers,
- sizeof (struct list_head) *
- kibnal_data.kib_peer_hash_size);
-
- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- kibnal_data.kib_init = IBNAL_INIT_NOTHING;
- PORTAL_MODULE_UNUSE;
-}
-
-int
-kibnal_startup (lnet_ni_t *ni)
-{
- char scratch[32];
- char ipif_name[32];
- char *hca_name;
- __u32 ip;
- __u32 netmask;
- int up;
- int nob;
- int devno;
- struct timeval tv;
- int rc;
- int i;
- vv_request_event_record_t req_er;
- vv_return_t vvrc;
-
- LASSERT (ni->ni_lnd == &the_kiblnd);
-
- /* Only 1 instance supported */
- if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) {
- CERROR ("Only 1 instance supported\n");
- return -EPERM;
- }
-
- if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) {
- CERROR ("Can't set credits(%d) > ntx(%d)\n",
- *kibnal_tunables.kib_credits,
- *kibnal_tunables.kib_ntx);
- return -EINVAL;
- }
-
- ni->ni_maxtxcredits = *kibnal_tunables.kib_credits;
- ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits;
-
- CLASSERT (LNET_MAX_INTERFACES > 1);
-
- if (ni->ni_interfaces[0] != NULL) {
- /* Use the HCA specified in 'networks=' */
-
- if (ni->ni_interfaces[1] != NULL) {
- CERROR("Multiple interfaces not supported\n");
- return -EPERM;
- }
-
- /* Parse <hca base name><number> */
- hca_name = ni->ni_interfaces[0];
- nob = strlen(*kibnal_tunables.kib_hca_basename);
-
- if (strncmp(hca_name, *kibnal_tunables.kib_hca_basename, nob) ||
- sscanf(hca_name + nob, "%d%n", &devno, &nob) < 1) {
- CERROR("Unrecognised HCA %s\n", hca_name);
- return -EINVAL;
- }
-
- } else {
- /* Use <hca base name>0 */
- devno = 0;
-
- hca_name = scratch;
- snprintf(hca_name, sizeof(scratch), "%s%d",
- *kibnal_tunables.kib_hca_basename, devno);
- if (strlen(hca_name) == sizeof(scratch) - 1) {
- CERROR("HCA name %s truncated\n", hca_name);
- return -EINVAL;
- }
- }
-
- /* Find IP address from <ipif base name><hca number> */
- snprintf(ipif_name, sizeof(ipif_name), "%s%d",
- *kibnal_tunables.kib_ipif_basename, devno);
- if (strlen(ipif_name) == sizeof(ipif_name) - 1) {
- CERROR("IPoIB interface name %s truncated\n", ipif_name);
- return -EINVAL;
- }
-
- rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask);
- if (rc != 0) {
- CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc);
- return -ENETDOWN;
- }
-
- if (!up) {
- CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name);
- return -ENETDOWN;
- }
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
-
- PORTAL_MODULE_USE;
- memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */
-
- kibnal_data.kib_ni = ni;
- ni->ni_data = &kibnal_data;
-
- do_gettimeofday(&tv);
- kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
- rwlock_init(&kibnal_data.kib_global_lock);
-
- kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
- LIBCFS_ALLOC (kibnal_data.kib_peers,
- sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
- if (kibnal_data.kib_peers == NULL) {
- goto failed;
- }
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++)
- INIT_LIST_HEAD(&kibnal_data.kib_peers[i]);
-
- spin_lock_init (&kibnal_data.kib_connd_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_pcreqs);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_conns);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies);
- init_waitqueue_head (&kibnal_data.kib_connd_waitq);
-
- spin_lock_init (&kibnal_data.kib_sched_lock);
- init_waitqueue_head (&kibnal_data.kib_sched_waitq);
-
- spin_lock_init (&kibnal_data.kib_tx_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
-
- rc = kibnal_alloc_tx_descs();
- if (rc != 0) {
- CERROR("Can't allocate tx descs\n");
- goto failed;
- }
-
- /* lists/ptrs/locks initialised */
- kibnal_data.kib_init = IBNAL_INIT_DATA;
- /*****************************************************/
-
- for (i = 0; i < IBNAL_N_SCHED; i++) {
- rc = kibnal_thread_start (kibnal_scheduler, (void *)((long)i));
- if (rc != 0) {
- CERROR("Can't spawn vibnal scheduler[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- rc = kibnal_thread_start (kibnal_connd, NULL);
- if (rc != 0) {
- CERROR ("Can't spawn vibnal connd: %d\n", rc);
- goto failed;
- }
-
- vvrc = vv_hca_open(hca_name, NULL, &kibnal_data.kib_hca);
- if (vvrc != vv_return_ok) {
- CERROR ("Can't open HCA %s: %d\n", hca_name, vvrc);
- goto failed;
- }
-
- /* Channel Adapter opened */
- kibnal_data.kib_init = IBNAL_INIT_HCA;
-
- /* register to get HCA's asynchronous events. */
- req_er.req_event_type = VV_ASYNC_EVENT_ALL_MASK;
- vvrc = vv_set_async_event_cb (kibnal_data.kib_hca, req_er,
- kibnal_async_callback);
- if (vvrc != vv_return_ok) {
- CERROR ("Can't set HCA %s callback: %d\n", hca_name, vvrc);
- goto failed;
- }
-
- kibnal_data.kib_init = IBNAL_INIT_ASYNC;
-
- /*****************************************************/
-
- vvrc = vv_hca_query(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs);
- if (vvrc != vv_return_ok) {
- CERROR ("Can't size port attrs for %s: %d\n", hca_name, vvrc);
- goto failed;
- }
-
- kibnal_data.kib_port = -1;
-
- for (i = 0; i<kibnal_data.kib_hca_attrs.port_num; i++) {
-
- int port_num = i+1;
- u_int32_t tbl_count;
- vv_port_attrib_t *pattr = &kibnal_data.kib_port_attr;
-
- vvrc = vv_port_query(kibnal_data.kib_hca, port_num, pattr);
- if (vvrc != vv_return_ok) {
- CERROR("vv_port_query failed for %s port %d: %d\n",
- hca_name, port_num, vvrc);
- continue;
- }
-
- switch (pattr->port_state) {
- case vv_state_linkDoun:
- CDEBUG(D_NET, "port[%d] Down\n", port_num);
- continue;
- case vv_state_linkInit:
- CDEBUG(D_NET, "port[%d] Init\n", port_num);
- continue;
- case vv_state_linkArm:
- CDEBUG(D_NET, "port[%d] Armed\n", port_num);
- continue;
- case vv_state_linkActive:
- CDEBUG(D_NET, "port[%d] Active\n", port_num);
-
- /* Found a suitable port. Get its GUID and PKEY. */
- tbl_count = 1;
- vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca,
- port_num, &tbl_count,
- &kibnal_data.kib_port_gid);
- if (vvrc != vv_return_ok) {
- CERROR("vv_get_port_gid_tbl failed "
- "for %s port %d: %d\n",
- hca_name, port_num, vvrc);
- continue;
- }
-
- tbl_count = 1;
- vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca,
- port_num, &tbl_count,
- &kibnal_data.kib_port_pkey);
- if (vvrc != vv_return_ok) {
- CERROR("vv_get_port_partition_tbl failed "
- "for %s port %d: %d\n",
- hca_name, port_num, vvrc);
- continue;
- }
-
- kibnal_data.kib_port = port_num;
-
- break;
- case vv_state_linkActDefer: /* TODO: correct? */
- case vv_state_linkNoChange:
- CERROR("Unexpected %s port[%d] state %d\n",
- hca_name, i, pattr->port_state);
- continue;
- }
- break;
- }
-
- if (kibnal_data.kib_port == -1) {
- CERROR ("Can't find an active port on %s\n", hca_name);
- goto failed;
- }
-
- CDEBUG(D_NET, "Using %s port %d - GID="LPX64":"LPX64"\n",
- hca_name, kibnal_data.kib_port,
- kibnal_data.kib_port_gid.scope.g.subnet,
- kibnal_data.kib_port_gid.scope.g.eui64);
-
- /*****************************************************/
-
-#if 1
- /* We use a pre-allocated PD */
- vvrc = vv_get_gen_pd_h(kibnal_data.kib_hca, &kibnal_data.kib_pd);
-#else
- vvrc = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd);
-#endif
- if (vvrc != vv_return_ok) {
- CERROR ("Can't init PD: %d\n", vvrc);
- goto failed;
- }
-
- /* flag PD initialised */
- kibnal_data.kib_init = IBNAL_INIT_PD;
- /*****************************************************/
-
- rc = kibnal_setup_tx_descs();
- if (rc != 0) {
- CERROR ("Can't register tx descs: %d\n", rc);
- goto failed;
- }
-
- /* flag TX descs initialised */
- kibnal_data.kib_init = IBNAL_INIT_TXD;
- /*****************************************************/
-
- {
- __u32 nentries;
-
- vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(),
- kibnal_cq_callback,
- NULL, /* context */
- &kibnal_data.kib_cq, &nentries);
- if (vvrc != 0) {
- CERROR ("Can't create RX CQ: %d\n", vvrc);
- goto failed;
- }
-
- /* flag CQ initialised */
- kibnal_data.kib_init = IBNAL_INIT_CQ;
-
- if (nentries < IBNAL_CQ_ENTRIES()) {
- CERROR ("CQ only has %d entries, need %d\n",
- nentries, IBNAL_CQ_ENTRIES());
- goto failed;
- }
-
- vvrc = vv_request_completion_notification(kibnal_data.kib_hca,
- kibnal_data.kib_cq,
- vv_next_solicit_unsolicit_event);
- if (vvrc != 0) {
- CERROR ("Failed to re-arm completion queue: %d\n", rc);
- goto failed;
- }
- }
-
- rc = kibnal_start_listener(ni);
- if (rc != 0) {
- CERROR("Can't start listener: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- kibnal_data.kib_init = IBNAL_INIT_ALL;
- /*****************************************************/
-
- return (0);
-
- failed:
- CDEBUG(D_NET, "kibnal_startup failed\n");
- kibnal_shutdown (ni);
- return (-ENETDOWN);
-}
-
-void __exit
-kibnal_module_fini (void)
-{
- lnet_unregister_lnd(&the_kiblnd);
- kibnal_tunables_fini();
-}
-
-int __init
-kibnal_module_init (void)
-{
- int rc;
-
- vibnal_assert_wire_constants();
-
- CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
- <= cm_REQ_priv_data_len);
- CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
- <= cm_REP_priv_data_len);
- CLASSERT (sizeof(kib_msg_t) <= IBNAL_MSG_SIZE);
-#if !IBNAL_USE_FMR
- CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
- <= IBNAL_MSG_SIZE);
- CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
- <= IBNAL_MSG_SIZE);
-#endif
- rc = kibnal_tunables_init();
- if (rc != 0)
- return rc;
-
- lnet_register_lnd(&the_kiblnd);
-
- return 0;
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel Voltaire IB LND v1.00");
-MODULE_LICENSE("GPL");
-
-module_init(kibnal_module_init);
-module_exit(kibnal_module_fini);
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Frank Zago <fzago@systemfabricworks.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-
-/* CPU_{L,B}E #defines needed by Voltaire headers */
-#include <asm/byteorder.h>
-#ifdef __BIG_ENDIAN__
-#define CPU_BE 1
-#define CPU_LE 0
-#endif
-#ifdef __LITTLE_ENDIAN__
-#define CPU_BE 0
-#define CPU_LE 1
-#endif
-
-#include <vverbs.h>
-#include <ib-cm.h>
-#include <ibat.h>
-
-/* GCC 3.2.2, miscompiles this driver.
- * See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9853. */
-#define GCC_VERSION ((__GNUC__*100 + __GNUC_MINOR__)*100 + __GNUC_PATCHLEVEL__)
-#if (GCC_VERSION >= 30000) && (GCC_VERSION < 30203)
-# error Invalid GCC version. Must use GCC < 3.0.0 || GCC >= 3.2.3
-#endif
-
-#ifdef CONFIG_SMP
-# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */
-#else
-# define IBNAL_N_SCHED 1 /* # schedulers */
-#endif
-
-#define IBNAL_USE_FMR 1
-
-/* tunables fixed at compile time */
-#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */
-#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */
-#define IBNAL_CREDIT_HIGHWATER 7 /* when eagerly to return credits */
-#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */
-
-/* constants derived from sdp-connection.c */
-#define IBNAL_QKEY 0
-#define IBNAL_PKEY 0xffff
-#define IBNAL_PKEY_IDX 0
-#define IBNAL_SGID_IDX 0
-#define IBNAL_SERVICE_LEVEL 0
-#define IBNAL_STATIC_RATE 0
-#define IBNAL_EE_FLOW_CNT 1
-#define IBNAL_LOCAL_SUB 1
-#define IBNAL_TRAFFIC_CLASS 0
-#define IBNAL_SOURCE_PATH_BIT 0
-#define IBNAL_OUS_DST_RD 1
-#define IBNAL_IB_MTU vv_mtu_1024
-
-/* constants derived from sdp-hca-params.h */
-#define PATH_RATE_2_5GB 2
-#define MLX_IPD_1x 1
-#define MLX_IPD_4x 0
-#define IBNAL_R_2_STATIC_RATE(r) ((r) == PATH_RATE_2_5GB ? MLX_IPD_1x : MLX_IPD_4x)
-
-/* other low-level IB constants */
-#define IBNAL_PKT_LIFETIME 5
-#define IBNAL_ARB_INITIATOR_DEPTH 0
-#define IBNAL_ARB_RESP_RES 0
-#define IBNAL_FAILOVER_ACCEPTED 0
-
-/************************/
-/* derived constants... */
-
-/* TX messages (shared by all connections) */
-#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx)
-#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE)
-#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)
-
-#if IBNAL_USE_FMR
-# define IBNAL_MAX_RDMA_FRAGS 1
-# define IBNAL_CONCURRENT_SENDS IBNAL_RX_MSGS
-#else
-# define IBNAL_MAX_RDMA_FRAGS LNET_MAX_IOV
-# define IBNAL_CONCURRENT_SENDS IBNAL_MSG_QUEUE_SIZE
-#endif
-
-/* RX messages (per connection) */
-#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE*2)
-#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE)
-#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
-
-#define IBNAL_CQ_ENTRIES() (IBNAL_TX_MSGS() * (1 + IBNAL_MAX_RDMA_FRAGS) + \
- IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers)
-
-typedef struct
-{
- unsigned int *kib_service_number; /* IB service number */
- int *kib_min_reconnect_interval; /* first failed connection retry... */
- int *kib_max_reconnect_interval; /* ...exponentially increasing to this */
- int *kib_concurrent_peers; /* max # nodes all talking to me */
- int *kib_cksum; /* checksum kib_msg_t? */
- int *kib_timeout; /* comms timeout (seconds) */
- int *kib_ntx; /* # tx descs */
- int *kib_credits; /* # concurrent sends */
- int *kib_peercredits; /* # concurrent sends to 1 peer */
- int *kib_arp_retries; /* # times to retry ARP */
- char **kib_hca_basename; /* HCA base name */
- char **kib_ipif_basename; /* IPoIB interface base name */
- int *kib_local_ack_timeout; /* IB RC QP ack timeout... */
- int *kib_retry_cnt; /* ...and retry */
- int *kib_rnr_cnt; /* RNR retries... */
- int *kib_rnr_nak_timer; /* ...and interval */
- int *kib_keepalive; /* keepalive interval */
- int *kib_concurrent_sends; /* send work queue sizing */
-#if IBNAL_USE_FMR
- int *kib_fmr_remaps; /* # FMR maps before unmap required */
-#endif
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */
-#endif
-} kib_tunables_t;
-
-typedef struct
-{
- int ibp_npages; /* # pages */
- struct page *ibp_pages[0];
-} kib_pages_t;
-
-#if IBNAL_USE_FMR
-typedef struct
-{
- vv_fmr_h_t md_fmrhandle; /* FMR handle */
- int md_fmrcount; /* # mappings left */
- int md_active; /* mapping in use? */
- __u32 md_lkey; /* local key */
- __u32 md_rkey; /* remote key */
- __u64 md_addr; /* IO VM address */
-} kib_md_t;
-#endif
-
-typedef struct
-{
- int kib_init; /* initialisation state */
- __u64 kib_incarnation; /* which one am I */
- int kib_shutdown; /* shut down? */
- atomic_t kib_nthreads; /* # live threads */
- lnet_ni_t *kib_ni; /* _the_ nal instance */
-
- vv_gid_t kib_port_gid; /* device/port GID */
- vv_p_key_t kib_port_pkey; /* device/port pkey */
-
- cm_cep_handle_t kib_listen_handle; /* IB listen handle */
-
- rwlock_t kib_global_lock; /* stabilize peer/conn ops */
- int kib_ready; /* CQ callback fired */
- int kib_checking_cq; /* a scheduler is checking the CQ */
-
- struct list_head *kib_peers; /* hash table of all my known peers */
- int kib_peer_hash_size; /* size of kib_peers */
- atomic_t kib_npeers; /* # peers extant */
- atomic_t kib_nconns; /* # connections extant */
-
- void *kib_connd; /* the connd task (serialisation assertions) */
- struct list_head kib_connd_peers; /* peers wanting to get connected */
- struct list_head kib_connd_pcreqs; /* passive connection requests */
- struct list_head kib_connd_conns; /* connections to setup/teardown */
- struct list_head kib_connd_zombies; /* connections with zero refcount */
- wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */
- spinlock_t kib_connd_lock; /* serialise */
-
- wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */
- spinlock_t kib_sched_lock; /* serialise */
-
- struct kib_tx *kib_tx_descs; /* all the tx descriptors */
- kib_pages_t *kib_tx_pages; /* premapped tx msg pages */
-
- struct list_head kib_idle_txs; /* idle tx descriptors */
- __u64 kib_next_tx_cookie; /* RDMA completion cookie */
- spinlock_t kib_tx_lock; /* serialise */
-
- vv_hca_h_t kib_hca; /* The HCA */
- vv_hca_attrib_t kib_hca_attrs; /* its properties */
- int kib_port; /* port on the device */
- vv_port_attrib_t kib_port_attr; /* its properties */
-
- vv_pd_h_t kib_pd; /* protection domain */
- vv_cq_h_t kib_cq; /* completion queue */
-
-} kib_data_t;
-
-#define IBNAL_INIT_NOTHING 0
-#define IBNAL_INIT_DATA 1
-#define IBNAL_INIT_LIB 2
-#define IBNAL_INIT_HCA 3
-#define IBNAL_INIT_ASYNC 4
-#define IBNAL_INIT_PD 5
-#define IBNAL_INIT_TXD 6
-#define IBNAL_INIT_CQ 7
-#define IBNAL_INIT_ALL 8
-
-#include "viblnd_wire.h"
-
-/***********************************************************************/
-
-typedef struct kib_rx /* receive message */
-{
- struct list_head rx_list; /* queue for attention */
- struct kib_conn *rx_conn; /* owning conn */
- int rx_nob; /* # bytes received (-1 while posted) */
- vv_l_key_t rx_lkey; /* local key */
- kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */
- vv_wr_t rx_wrq; /* receive work item */
- vv_scatgat_t rx_gl; /* and its memory */
-} kib_rx_t;
-
-typedef struct kib_tx /* transmit message */
-{
- struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */
- struct kib_conn *tx_conn; /* owning conn */
- int tx_sending; /* # tx callbacks outstanding */
- int tx_queued; /* queued for sending */
- int tx_waiting; /* waiting for peer */
- int tx_status; /* completion status */
- unsigned long tx_deadline; /* completion deadline */
- __u64 tx_cookie; /* completion cookie */
- lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */
- vv_l_key_t tx_lkey; /* local key for message buffer */
- kib_msg_t *tx_msg; /* message buffer (host vaddr) */
- int tx_nwrq; /* # send work items */
-#if IBNAL_USE_FMR
- vv_wr_t tx_wrq[2]; /* send work items... */
- vv_scatgat_t tx_gl[2]; /* ...and their memory */
- kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */
- kib_md_t tx_md; /* FMR mapping descriptor */
- __u64 *tx_pages; /* page phys addrs */
-#else
- vv_wr_t *tx_wrq; /* send work items... */
- vv_scatgat_t *tx_gl; /* ...and their memory */
- kib_rdma_desc_t *tx_rd; /* rdma descriptor (src buffers) */
-#endif
-} kib_tx_t;
-
-/* Passive connection request (listener callback) queued for handling by connd */
-typedef struct kib_pcreq
-{
- struct list_head pcr_list; /* queue for handling by connd */
- cm_cep_handle_t pcr_cep; /* listening handle */
- cm_request_data_t pcr_cmreq; /* request data */
-} kib_pcreq_t;
-
-typedef struct kib_connvars
-{
- /* connection-in-progress variables */
- __u32 cv_port;
- __u32 cv_pkey_index;
- __u32 cv_rnr_count;
- __u32 cv_sgid_index;
- __u32 cv_remote_qpn;
- __u32 cv_local_qpn;
- __u32 cv_rxpsn;
- __u32 cv_txpsn;
- ib_path_record_v2_t cv_path;
- ibat_arp_data_t cv_arp;
- ibat_stat_t cv_arprc;
- cm_conn_data_t cv_conndata;
-} kib_connvars_t;
-
-typedef struct kib_conn
-{
- struct kib_peer *ibc_peer; /* owning peer */
- struct list_head ibc_list; /* stash on peer's conn list */
- __u64 ibc_incarnation; /* which instance of the peer */
- __u64 ibc_txseq; /* tx sequence number */
- __u64 ibc_rxseq; /* rx sequence number */
- __u32 ibc_version; /* peer protocol version */
- atomic_t ibc_refcount; /* # users */
- int ibc_state; /* what's happening */
- int ibc_nsends_posted; /* # uncompleted sends */
- int ibc_credits; /* # credits I have */
- int ibc_outstanding_credits; /* # credits to return */
- int ibc_reserved_credits; /* # credits for ACK/DONE msgs */
- int ibc_disconnect; /* some disconnect callback fired */
- int ibc_comms_error; /* set on comms error */
- unsigned long ibc_last_send; /* time of last send */
- struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */
- struct list_head ibc_tx_queue_nocred; /* sends that don't need a cred */
- struct list_head ibc_tx_queue_rsrvd; /* sends that need a reserved cred */
- struct list_head ibc_tx_queue; /* send queue */
- struct list_head ibc_active_txs; /* active tx awaiting completion */
- spinlock_t ibc_lock; /* serialise */
- kib_rx_t *ibc_rxs; /* the rx descs */
- kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */
- vv_qp_h_t ibc_qp; /* queue pair */
- cm_cep_handle_t ibc_cep; /* connection endpoint */
- kib_connvars_t *ibc_connvars; /* in-progress connection state */
-} kib_conn_t;
-
-#define IBNAL_CONN_INIT_NOTHING 0 /* incomplete init */
-#define IBNAL_CONN_INIT_QP 1 /* QP allocated */
-#define IBNAL_CONN_INIT 2 /* completed init */
-#define IBNAL_CONN_ACTIVE_ARP 3 /* active arping */
-#define IBNAL_CONN_ACTIVE_CONNECT 4 /* active sending req */
-#define IBNAL_CONN_ACTIVE_CHECK_REPLY 5 /* active checking reply */
-#define IBNAL_CONN_ACTIVE_RTU 6 /* active sending rtu */
-#define IBNAL_CONN_PASSIVE_WAIT 7 /* passive waiting for rtu */
-#define IBNAL_CONN_ESTABLISHED 8 /* connection established */
-#define IBNAL_CONN_DISCONNECT1 9 /* disconnect phase 1 */
-#define IBNAL_CONN_DISCONNECT2 10 /* disconnect phase 2 */
-#define IBNAL_CONN_DISCONNECTED 11 /* disconnect complete */
-
-typedef struct kib_peer
-{
- struct list_head ibp_list; /* stash on global peer list */
- struct list_head ibp_connd_list; /* schedule on kib_connd_peers */
- lnet_nid_t ibp_nid; /* who's on the other end(s) */
- __u32 ibp_ip; /* IP to query for peer conn params */
- int ibp_port; /* port to qery for peer conn params */
- __u64 ibp_incarnation; /* peer's incarnation */
- atomic_t ibp_refcount; /* # users */
- int ibp_persistence; /* "known" peer refs */
- struct list_head ibp_conns; /* all active connections */
- struct list_head ibp_tx_queue; /* msgs waiting for a conn */
- int ibp_connecting; /* current active connection attempts */
- int ibp_accepting; /* current passive connection attempts */
- int ibp_arp_count; /* # arp attempts */
- unsigned long ibp_reconnect_time; /* when reconnect may be attempted */
- unsigned long ibp_reconnect_interval; /* exponential backoff */
- int ibp_error; /* errno on closing this peer */
- cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */
-} kib_peer_t;
-
-
-extern kib_data_t kibnal_data;
-extern kib_tunables_t kibnal_tunables;
-
-int kibnal_startup (lnet_ni_t *ni);
-void kibnal_shutdown (lnet_ni_t *ni);
-int kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int kibnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-extern int kibnal_eager_recv (lnet_ni_t *ni, void *private,
- lnet_msg_t *lntmsg, void **new_private);
-int kibnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-extern void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob);
-extern void kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
- lnet_nid_t dstnid, __u64 dststamp, __u64 seq);
-extern int kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob);
-extern int kibnal_create_peer(kib_peer_t **peerp, lnet_nid_t nid);
-extern void kibnal_destroy_peer(kib_peer_t *peer);
-extern int kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip);
-extern int kibnal_del_peer(lnet_nid_t nid);
-extern kib_peer_t *kibnal_find_peer_locked(lnet_nid_t nid);
-extern void kibnal_unlink_peer_locked(kib_peer_t *peer);
-extern void kibnal_peer_alive(kib_peer_t *peer);
-extern int kibnal_close_stale_conns_locked(kib_peer_t *peer,
- __u64 incarnation);
-extern kib_conn_t *kibnal_create_conn(cm_cep_handle_t cep);
-extern void kibnal_listen_callback(cm_cep_handle_t cep, cm_conn_data_t *info, void *arg);
-
-extern int kibnal_alloc_pages(kib_pages_t **pp, int npages, int access);
-extern void kibnal_free_pages(kib_pages_t *p);
-
-extern void kibnal_check_sends(kib_conn_t *conn);
-extern void kibnal_close_conn_locked(kib_conn_t *conn, int error);
-extern void kibnal_destroy_conn(kib_conn_t *conn);
-extern int kibnal_thread_start(int (*fn)(void *arg), void *arg);
-extern int kibnal_scheduler(void *arg);
-extern int kibnal_connd(void *arg);
-extern void kibnal_init_tx_msg(kib_tx_t *tx, int type, int body_nob);
-extern void kibnal_close_conn(kib_conn_t *conn, int why);
-extern int kibnal_set_qp_state(kib_conn_t *conn, vv_qp_state_t new_state);
-extern void kibnal_async_callback(vv_event_record_t ev);
-extern void kibnal_cq_callback(unsigned long context);
-extern void kibnal_passive_connreq(kib_pcreq_t *pcr, int reject);
-extern void kibnal_txlist_done (struct list_head *txlist, int status);
-extern void kibnal_queue_tx(kib_tx_t *tx, kib_conn_t *conn);
-extern int kibnal_init_rdma(kib_tx_t *tx, int type, int nob,
- kib_rdma_desc_t *dstrd, __u64 dstcookie);
-extern int kibnal_tunables_init(void);
-extern void kibnal_tunables_fini(void);
-
-#define kibnal_conn_addref(conn) \
-do { \
- CDEBUG(D_NET, "conn[%p] (%d)++\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \
- atomic_inc(&(conn)->ibc_refcount); \
-} while (0)
-
-#define kibnal_conn_decref(conn) \
-do { \
- unsigned long flags; \
- \
- CDEBUG(D_NET, "conn[%p] (%d)--\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \
- if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); \
- list_add_tail(&(conn)->ibc_list, \
- &kibnal_data.kib_connd_zombies); \
- wake_up(&kibnal_data.kib_connd_waitq); \
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); \
- } \
-} while (0)
-
-#define kibnal_peer_addref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \
- atomic_inc(&(peer)->ibp_refcount); \
-} while (0)
-
-#define kibnal_peer_decref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \
- if (atomic_dec_and_test(&(peer)->ibp_refcount)) \
- kibnal_destroy_peer(peer); \
-} while (0)
-
-static inline struct list_head *
-kibnal_nid2peerlist (lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size;
-
- return (&kibnal_data.kib_peers [hash]);
-}
-
-static inline int
-kibnal_peer_active (kib_peer_t *peer)
-{
- /* Am I in the peer hash table? */
- return (!list_empty(&peer->ibp_list));
-}
-
-static inline void
-kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
-{
- struct list_head *q;
-
- LASSERT (tx->tx_nwrq > 0); /* work items set up */
- LASSERT (!tx->tx_queued); /* not queued for sending already */
-
- tx->tx_queued = 1;
- tx->tx_deadline = jiffies + (*kibnal_tunables.kib_timeout * HZ);
-
- if (tx->tx_conn == NULL) {
- kibnal_conn_addref(conn);
- tx->tx_conn = conn;
- LASSERT (tx->tx_msg->ibm_type != IBNAL_MSG_PUT_DONE);
- } else {
- LASSERT (tx->tx_conn == conn);
- LASSERT (tx->tx_msg->ibm_type == IBNAL_MSG_PUT_DONE);
- }
-
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- /* All messages have simple credit control */
- q = &conn->ibc_tx_queue;
- } else {
- LASSERT (conn->ibc_version == IBNAL_MSG_VERSION);
-
- switch (tx->tx_msg->ibm_type) {
- case IBNAL_MSG_PUT_REQ:
- case IBNAL_MSG_GET_REQ:
- /* RDMA request: reserve a buffer for the RDMA reply
- * before sending */
- q = &conn->ibc_tx_queue_rsrvd;
- break;
-
- case IBNAL_MSG_PUT_NAK:
- case IBNAL_MSG_PUT_ACK:
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- /* RDMA reply/completion: no credits; peer has reserved
- * a reply buffer */
- q = &conn->ibc_tx_queue_nocred;
- break;
-
- case IBNAL_MSG_NOOP:
- case IBNAL_MSG_IMMEDIATE:
- /* Otherwise: consume a credit before sending */
- q = &conn->ibc_tx_queue;
- break;
-
- default:
- LBUG();
- q = NULL;
- }
- }
-
- list_add_tail(&tx->tx_list, q);
-}
-
-static inline int
-kibnal_send_keepalive(kib_conn_t *conn)
-{
- return (*kibnal_tunables.kib_keepalive > 0) &&
- time_after(jiffies, conn->ibc_last_send +
- *kibnal_tunables.kib_keepalive*HZ);
-}
-
-#ifndef IBNAL_VOIDSTAR_SGADDR
-# define IBNAL_VOIDSTAR_SGADDR 0
-#endif
-
-#if IBNAL_VOIDSTAR_SGADDR
-# if defined(CONFIG_HIGHMEM)
-# if defined(CONFIG_X86) && defined(CONFIG_HIGHMEM4G)
- /* truncation to void* doesn't matter if 0 <= physmem < 4G
- * so allow x86 with 32 bit phys addrs */
-# elif defined(CONFIG_IA64)
- /* OK anyway on 64-bit arch */
-# else
-# error "Can't support HIGHMEM when vv_scatgat_t::v_address is void *"
-# endif
-# endif
-# define KIBNAL_ADDR2SG(a) ((void *)((unsigned long)(a)))
-# define KIBNAL_SG2ADDR(a) ((__u64)((unsigned long)(a)))
-static inline __u64 kibnal_addr2net (__u64 addr)
-{
- void *netaddr;
- vv_return_t vvrc = vv_va2advertise_addr(kibnal_data.kib_hca,
- KIBNAL_ADDR2SG(addr),
- &netaddr);
- LASSERT (vvrc == vv_return_ok);
- return KIBNAL_SG2ADDR(netaddr);
-}
-#else
-# define KIBNAL_ADDR2SG(a) a
-# define KIBNAL_SG2ADDR(a) a
-static inline __u64 kibnal_addr2net (__u64 addr)
-{
- __u64 netaddr;
- vv_return_t vvrc = vv_va2advertise_addr(kibnal_data.kib_hca,
- addr,
- &netaddr);
- LASSERT (vvrc == vv_return_ok);
- return netaddr;
-}
-#endif
-
-/* CAVEAT EMPTOR: We rely on tx/rx descriptor alignment to allow us to use the
- * lowest 2 bits of the work request id to stash the work item type (the op
- * field is not valid when the wc completes in error). */
-
-#define IBNAL_WID_TX 0
-#define IBNAL_WID_RX 1
-#define IBNAL_WID_RDMA 2
-#define IBNAL_WID_MASK 3UL
-
-static inline vv_wr_id_t
-kibnal_ptr2wreqid (void *ptr, int type)
-{
- unsigned long lptr = (unsigned long)ptr;
-
- LASSERT ((lptr & IBNAL_WID_MASK) == 0);
- LASSERT ((type & ~IBNAL_WID_MASK) == 0);
- return (vv_wr_id_t)(lptr | type);
-}
-
-static inline void *
-kibnal_wreqid2ptr (vv_wr_id_t wreqid)
-{
- return (void *)(((unsigned long)wreqid) & ~IBNAL_WID_MASK);
-}
-
-static inline int
-kibnal_wreqid2type (vv_wr_id_t wreqid)
-{
- return (wreqid & IBNAL_WID_MASK);
-}
-
-static inline void
-kibnal_set_conn_state (kib_conn_t *conn, int state)
-{
- conn->ibc_state = state;
- mb();
-}
-
-#if IBNAL_USE_FMR
-
-static inline int
-kibnal_rd_size (kib_rdma_desc_t *rd)
-{
- return rd->rd_nob;
-}
-
-#else
-static inline __u64
-kibnal_rf_addr (kib_rdma_frag_t *rf)
-{
- return (((__u64)rf->rf_addr_hi)<<32) | ((__u64)rf->rf_addr_lo);
-}
-
-static inline void
-kibnal_rf_set (kib_rdma_frag_t *rf, __u64 addr, int nob)
-{
- rf->rf_addr_lo = addr & 0xffffffff;
- rf->rf_addr_hi = (addr >> 32) & 0xffffffff;
- rf->rf_nob = nob;
-}
-
-static inline int
-kibnal_rd_size (kib_rdma_desc_t *rd)
-{
- int i;
- int size;
-
- for (i = size = 0; i < rd->rd_nfrag; i++)
- size += rd->rd_frags[i].rf_nob;
-
- return size;
-}
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Frank Zago <fzago@systemfabricworks.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "viblnd.h"
-
-void
-kibnal_tx_done (kib_tx_t *tx)
-{
- lnet_msg_t *lntmsg[2];
- int rc = tx->tx_status;
- int i;
-
- LASSERT (!in_interrupt());
- LASSERT (!tx->tx_queued); /* mustn't be queued for sending */
- LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */
- LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */
-
-#if IBNAL_USE_FMR
- if (tx->tx_md.md_fmrcount == 0 ||
- (rc != 0 && tx->tx_md.md_active)) {
- vv_return_t vvrc;
-
- /* mapping must be active (it dropped fmrcount to 0) */
- LASSERT (tx->tx_md.md_active);
-
- vvrc = vv_unmap_fmr(kibnal_data.kib_hca,
- 1, &tx->tx_md.md_fmrhandle);
- LASSERT (vvrc == vv_return_ok);
-
- tx->tx_md.md_fmrcount = *kibnal_tunables.kib_fmr_remaps;
- }
- tx->tx_md.md_active = 0;
-#endif
-
- /* tx may have up to 2 lnet msgs to finalise */
- lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
- lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
-
- if (tx->tx_conn != NULL) {
- kibnal_conn_decref(tx->tx_conn);
- tx->tx_conn = NULL;
- }
-
- tx->tx_nwrq = 0;
- tx->tx_status = 0;
-
- spin_lock(&kibnal_data.kib_tx_lock);
-
- list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
-
- spin_unlock(&kibnal_data.kib_tx_lock);
-
- /* delay finalize until my descs have been freed */
- for (i = 0; i < 2; i++) {
- if (lntmsg[i] == NULL)
- continue;
-
- lnet_finalize (kibnal_data.kib_ni, lntmsg[i], rc);
- }
-}
-
-void
-kibnal_txlist_done (struct list_head *txlist, int status)
-{
- kib_tx_t *tx;
-
- while (!list_empty (txlist)) {
- tx = list_entry (txlist->next, kib_tx_t, tx_list);
-
- list_del (&tx->tx_list);
- /* complete now */
- tx->tx_waiting = 0;
- tx->tx_status = status;
- kibnal_tx_done (tx);
- }
-}
-
-kib_tx_t *
-kibnal_get_idle_tx (void)
-{
- kib_tx_t *tx;
-
- spin_lock(&kibnal_data.kib_tx_lock);
-
- if (list_empty (&kibnal_data.kib_idle_txs)) {
- spin_unlock(&kibnal_data.kib_tx_lock);
- return NULL;
- }
-
- tx = list_entry (kibnal_data.kib_idle_txs.next, kib_tx_t, tx_list);
- list_del (&tx->tx_list);
-
- /* Allocate a new completion cookie. It might not be needed,
- * but we've got a lock right now and we're unlikely to
- * wrap... */
- tx->tx_cookie = kibnal_data.kib_next_tx_cookie++;
-
- spin_unlock(&kibnal_data.kib_tx_lock);
-
- LASSERT (tx->tx_nwrq == 0);
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_sending == 0);
- LASSERT (!tx->tx_waiting);
- LASSERT (tx->tx_status == 0);
- LASSERT (tx->tx_conn == NULL);
- LASSERT (tx->tx_lntmsg[0] == NULL);
- LASSERT (tx->tx_lntmsg[1] == NULL);
-
- return tx;
-}
-
-int
-kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit)
-{
- kib_conn_t *conn = rx->rx_conn;
- int rc = 0;
- __u64 addr = (__u64)((unsigned long)((rx)->rx_msg));
- vv_return_t vvrc;
-
- LASSERT (!in_interrupt());
- /* old peers don't reserve rxs for RDMA replies */
- LASSERT (!rsrvd_credit ||
- conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
-
- rx->rx_gl = (vv_scatgat_t) {
- .v_address = KIBNAL_ADDR2SG(addr),
- .l_key = rx->rx_lkey,
- .length = IBNAL_MSG_SIZE,
- };
-
- rx->rx_wrq = (vv_wr_t) {
- .wr_id = kibnal_ptr2wreqid(rx, IBNAL_WID_RX),
- .completion_notification = 1,
- .scatgat_list = &rx->rx_gl,
- .num_of_data_segments = 1,
- .wr_type = vv_wr_receive,
- };
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_INIT);
- LASSERT (rx->rx_nob >= 0); /* not posted */
-
- CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n",
- rx->rx_wrq.scatgat_list->length,
- rx->rx_wrq.scatgat_list->l_key,
- KIBNAL_SG2ADDR(rx->rx_wrq.scatgat_list->v_address));
-
- if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) {
- /* No more posts for this rx; so lose its ref */
- kibnal_conn_decref(conn);
- return 0;
- }
-
- rx->rx_nob = -1; /* flag posted */
-
- spin_lock(&conn->ibc_lock);
- /* Serialise vv_post_receive; it's not re-entrant on the same QP */
- vvrc = vv_post_receive(kibnal_data.kib_hca,
- conn->ibc_qp, &rx->rx_wrq);
-
- if (vvrc == vv_return_ok) {
- if (credit)
- conn->ibc_outstanding_credits++;
- if (rsrvd_credit)
- conn->ibc_reserved_credits++;
-
- spin_unlock(&conn->ibc_lock);
-
- if (credit || rsrvd_credit)
- kibnal_check_sends(conn);
-
- return 0;
- }
-
- spin_unlock(&conn->ibc_lock);
-
- CERROR ("post rx -> %s failed %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), vvrc);
- rc = -EIO;
- kibnal_close_conn(rx->rx_conn, rc);
- /* No more posts for this rx; so lose its ref */
- kibnal_conn_decref(conn);
- return rc;
-}
-
-int
-kibnal_post_receives (kib_conn_t *conn)
-{
- int i;
- int rc;
-
- LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED);
- LASSERT (conn->ibc_comms_error == 0);
-
- for (i = 0; i < IBNAL_RX_MSGS; i++) {
- /* +1 ref for rx desc. This ref remains until kibnal_post_rx
- * fails (i.e. actual failure or we're disconnecting) */
- kibnal_conn_addref(conn);
- rc = kibnal_post_rx (&conn->ibc_rxs[i], 0, 0);
- if (rc != 0)
- return rc;
- }
-
- return 0;
-}
-
-kib_tx_t *
-kibnal_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie)
-{
- struct list_head *tmp;
-
- list_for_each(tmp, &conn->ibc_active_txs) {
- kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
-
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_sending != 0 || tx->tx_waiting);
-
- if (tx->tx_cookie != cookie)
- continue;
-
- if (tx->tx_waiting &&
- tx->tx_msg->ibm_type == txtype)
- return tx;
-
- CWARN("Bad completion: %swaiting, type %x (wanted %x)\n",
- tx->tx_waiting ? "" : "NOT ",
- tx->tx_msg->ibm_type, txtype);
- }
- return NULL;
-}
-
-void
-kibnal_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie)
-{
- kib_tx_t *tx;
- int idle;
-
- spin_lock(&conn->ibc_lock);
-
- tx = kibnal_find_waiting_tx_locked(conn, txtype, cookie);
- if (tx == NULL) {
- spin_unlock(&conn->ibc_lock);
-
- CWARN("Unmatched completion type %x cookie "LPX64" from %s\n",
- txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_close_conn (conn, -EPROTO);
- return;
- }
-
- if (tx->tx_status == 0) { /* success so far */
- if (status < 0) { /* failed? */
- tx->tx_status = status;
- } else if (txtype == IBNAL_MSG_GET_REQ) {
- lnet_set_reply_msg_len(kibnal_data.kib_ni,
- tx->tx_lntmsg[1], status);
- }
- }
-
- tx->tx_waiting = 0;
-
- idle = !tx->tx_queued && (tx->tx_sending == 0);
- if (idle)
- list_del(&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kibnal_tx_done(tx);
-}
-
-void
-kibnal_send_completion (kib_conn_t *conn, int type, int status, __u64 cookie)
-{
- kib_tx_t *tx = kibnal_get_idle_tx();
-
- if (tx == NULL) {
- CERROR("Can't get tx for completion %x for %s\n",
- type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
- }
-
- tx->tx_msg->ibm_u.completion.ibcm_status = status;
- tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
- kibnal_init_tx_msg(tx, type, sizeof(kib_completion_msg_t));
-
- kibnal_queue_tx(tx, conn);
-}
-
-void
-kibnal_handle_rx (kib_rx_t *rx)
-{
- kib_msg_t *msg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- int credits = msg->ibm_credits;
- kib_tx_t *tx;
- int rc = 0;
- int repost = 1;
- int rsrvd_credit = 0;
- int rc2;
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- CDEBUG (D_NET, "Received %x[%d] from %s\n",
- msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- if (credits != 0) {
- /* Have I received credits that will let me send? */
- spin_lock(&conn->ibc_lock);
- conn->ibc_credits += credits;
- spin_unlock(&conn->ibc_lock);
-
- kibnal_check_sends(conn);
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Bad IBNAL message type %x from %s\n",
- msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- rc = -EPROTO;
- break;
-
- case IBNAL_MSG_NOOP:
- break;
-
- case IBNAL_MSG_IMMEDIATE:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr,
- msg->ibm_srcnid, rx, 0);
- repost = rc < 0; /* repost on error */
- break;
-
- case IBNAL_MSG_PUT_REQ:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.putreq.ibprm_hdr,
- msg->ibm_srcnid, rx, 1);
- repost = rc < 0; /* repost on error */
- break;
-
- case IBNAL_MSG_PUT_NAK:
- rsrvd_credit = 1; /* rdma reply (was pre-reserved) */
-
- CWARN ("PUT_NACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_handle_completion(conn, IBNAL_MSG_PUT_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBNAL_MSG_PUT_ACK:
- rsrvd_credit = 1; /* rdma reply (was pre-reserved) */
-
- spin_lock(&conn->ibc_lock);
- tx = kibnal_find_waiting_tx_locked(conn, IBNAL_MSG_PUT_REQ,
- msg->ibm_u.putack.ibpam_src_cookie);
- if (tx != NULL)
- list_del(&tx->tx_list);
- spin_unlock(&conn->ibc_lock);
-
- if (tx == NULL) {
- CERROR("Unmatched PUT_ACK from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- rc = -EPROTO;
- break;
- }
-
- LASSERT (tx->tx_waiting);
- /* CAVEAT EMPTOR: I could be racing with tx_complete, but...
- * (a) I can overwrite tx_msg since my peer has received it!
- * (b) tx_waiting set tells tx_complete() it's not done. */
-
- tx->tx_nwrq = 0; /* overwrite PUT_REQ */
-
- rc2 = kibnal_init_rdma(tx, IBNAL_MSG_PUT_DONE,
- kibnal_rd_size(&msg->ibm_u.putack.ibpam_rd),
- &msg->ibm_u.putack.ibpam_rd,
- msg->ibm_u.putack.ibpam_dst_cookie);
- if (rc2 < 0)
- CERROR("Can't setup rdma for PUT to %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2);
-
- spin_lock(&conn->ibc_lock);
- if (tx->tx_status == 0 && rc2 < 0)
- tx->tx_status = rc2;
- tx->tx_waiting = 0; /* clear waiting and queue atomically */
- kibnal_queue_tx_locked(tx, conn);
- spin_unlock(&conn->ibc_lock);
- break;
-
- case IBNAL_MSG_PUT_DONE:
- /* This buffer was pre-reserved by not returning the credit
- * when the PUT_REQ's buffer was reposted, so I just return it
- * now */
- kibnal_handle_completion(conn, IBNAL_MSG_PUT_ACK,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBNAL_MSG_GET_REQ:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.get.ibgm_hdr,
- msg->ibm_srcnid, rx, 1);
- repost = rc < 0; /* repost on error */
- break;
-
- case IBNAL_MSG_GET_DONE:
- rsrvd_credit = 1; /* rdma reply (was pre-reserved) */
-
- kibnal_handle_completion(conn, IBNAL_MSG_GET_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
- }
-
- if (rc < 0) /* protocol error */
- kibnal_close_conn(conn, rc);
-
- if (repost) {
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD)
- rsrvd_credit = 0; /* peer isn't pre-reserving */
-
- kibnal_post_rx(rx, !rsrvd_credit, rsrvd_credit);
- }
-}
-
-void
-kibnal_rx_complete (kib_rx_t *rx, vv_comp_status_t vvrc, int nob, __u64 rxseq)
-{
- kib_msg_t *msg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- unsigned long flags;
- int rc;
-
- CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
- LASSERT (rx->rx_nob < 0); /* was posted */
- rx->rx_nob = 0; /* isn't now */
-
- if (conn->ibc_state > IBNAL_CONN_ESTABLISHED)
- goto ignore;
-
- if (vvrc != vv_comp_status_success) {
- CERROR("Rx from %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), vvrc);
- goto failed;
- }
-
- rc = kibnal_unpack_msg(msg, conn->ibc_version, nob);
- if (rc != 0) {
- CERROR ("Error %d unpacking rx from %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- goto failed;
- }
-
- rx->rx_nob = nob; /* Can trust 'nob' now */
-
- if (!lnet_ptlcompat_matchnid(conn->ibc_peer->ibp_nid,
- msg->ibm_srcnid) ||
- !lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
- msg->ibm_dstnid) ||
- msg->ibm_srcstamp != conn->ibc_incarnation ||
- msg->ibm_dststamp != kibnal_data.kib_incarnation) {
- CERROR ("Stale rx from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- goto failed;
- }
-
- if (msg->ibm_seq != rxseq) {
- CERROR ("Out-of-sequence rx from %s"
- ": got "LPD64" but expected "LPD64"\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- msg->ibm_seq, rxseq);
- goto failed;
- }
-
- /* set time last known alive */
- kibnal_peer_alive(conn->ibc_peer);
-
- /* racing with connection establishment/teardown! */
-
- if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) {
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- /* must check holding global lock to eliminate race */
- if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) {
- list_add_tail(&rx->rx_list, &conn->ibc_early_rxs);
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return;
- }
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- }
- kibnal_handle_rx(rx);
- return;
-
- failed:
- CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
- kibnal_close_conn(conn, -EIO);
- ignore:
- /* Don't re-post rx & drop its ref on conn */
- kibnal_conn_decref(conn);
-}
-
-struct page *
-kibnal_kvaddr_to_page (unsigned long vaddr)
-{
- struct page *page;
-
- if (vaddr >= VMALLOC_START &&
- vaddr < VMALLOC_END) {
- page = vmalloc_to_page ((void *)vaddr);
- LASSERT (page != NULL);
- return page;
- }
-#ifdef CONFIG_HIGHMEM
- if (vaddr >= PKMAP_BASE &&
- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
- /* No highmem pages only used for bulk (kiov) I/O */
- CERROR("find page for address in highmem\n");
- LBUG();
- }
-#endif
- page = virt_to_page (vaddr);
- LASSERT (page != NULL);
- return page;
-}
-
-#if !IBNAL_USE_FMR
-int
-kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page,
- unsigned long page_offset, unsigned long len)
-{
- kib_rdma_frag_t *frag = &rd->rd_frags[rd->rd_nfrag];
- vv_l_key_t l_key;
- vv_r_key_t r_key;
- __u64 addr;
- __u64 frag_addr;
- vv_mem_reg_h_t mem_h;
- vv_return_t vvrc;
-
- if (rd->rd_nfrag >= IBNAL_MAX_RDMA_FRAGS) {
- CERROR ("Too many RDMA fragments\n");
- return -EMSGSIZE;
- }
-
- /* Try to create an address that adaptor-tavor will munge into a valid
- * network address, given how it maps all phys mem into 1 region */
- addr = lnet_page2phys(page) + page_offset + PAGE_OFFSET;
-
- /* NB this relies entirely on there being a single region for the whole
- * of memory, since "high" memory will wrap in the (void *) cast! */
- vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
- (void *)((unsigned long)addr),
- len, &mem_h, &l_key, &r_key);
- LASSERT (vvrc == vv_return_ok);
-
- if (active) {
- if (rd->rd_nfrag == 0) {
- rd->rd_key = l_key;
- } else if (l_key != rd->rd_key) {
- CERROR ("> 1 key for single RDMA desc\n");
- return -EINVAL;
- }
- frag_addr = addr;
- } else {
- if (rd->rd_nfrag == 0) {
- rd->rd_key = r_key;
- } else if (r_key != rd->rd_key) {
- CERROR ("> 1 key for single RDMA desc\n");
- return -EINVAL;
- }
-
- frag_addr = kibnal_addr2net(addr);
- }
-
- kibnal_rf_set(frag, frag_addr, len);
-
- CDEBUG(D_NET,"map frag [%d][%d %x %08x%08x] "LPX64"\n",
- rd->rd_nfrag, frag->rf_nob, rd->rd_key,
- frag->rf_addr_hi, frag->rf_addr_lo, frag_addr);
-
- rd->rd_nfrag++;
- return 0;
-}
-
-int
-kibnal_setup_rd_iov(kib_tx_t *tx, kib_rdma_desc_t *rd,
- vv_access_con_bit_mask_t access,
- unsigned int niov, struct iovec *iov, int offset, int nob)
-{
- /* active if I'm sending */
- int active = ((access & vv_acc_r_mem_write) == 0);
- int fragnob;
- int rc;
- unsigned long vaddr;
- struct page *page;
- int page_offset;
-
- LASSERT (nob > 0);
- LASSERT (niov > 0);
- LASSERT ((rd != tx->tx_rd) == !active);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- rd->rd_nfrag = 0;
- do {
- LASSERT (niov > 0);
-
- vaddr = ((unsigned long)iov->iov_base) + offset;
- page_offset = vaddr & (PAGE_SIZE - 1);
- page = kibnal_kvaddr_to_page(vaddr);
- if (page == NULL) {
- CERROR ("Can't find page\n");
- return -EFAULT;
- }
-
- fragnob = min((int)(iov->iov_len - offset), nob);
- fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
-
- rc = kibnal_append_rdfrag(rd, active, page,
- page_offset, fragnob);
- if (rc != 0)
- return rc;
-
- if (offset + fragnob < iov->iov_len) {
- offset += fragnob;
- } else {
- offset = 0;
- iov++;
- niov--;
- }
- nob -= fragnob;
- } while (nob > 0);
-
- return 0;
-}
-
-int
-kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd,
- vv_access_con_bit_mask_t access,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
-{
- /* active if I'm sending */
- int active = ((access & vv_acc_r_mem_write) == 0);
- int fragnob;
- int rc;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT (nob > 0);
- LASSERT (nkiov > 0);
- LASSERT ((rd != tx->tx_rd) == !active);
-
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- nkiov--;
- kiov++;
- LASSERT (nkiov > 0);
- }
-
- rd->rd_nfrag = 0;
- do {
- LASSERT (nkiov > 0);
- fragnob = min((int)(kiov->kiov_len - offset), nob);
-
- rc = kibnal_append_rdfrag(rd, active, kiov->kiov_page,
- kiov->kiov_offset + offset,
- fragnob);
- if (rc != 0)
- return rc;
-
- offset = 0;
- kiov++;
- nkiov--;
- nob -= fragnob;
- } while (nob > 0);
-
- return 0;
-}
-#else
-int
-kibnal_map_tx (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- int npages, unsigned long page_offset, int nob)
-{
- vv_return_t vvrc;
- vv_fmr_map_t map_props;
-
- LASSERT ((rd != tx->tx_rd) == !active);
- LASSERT (!tx->tx_md.md_active);
- LASSERT (tx->tx_md.md_fmrcount > 0);
- LASSERT (page_offset < PAGE_SIZE);
- LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT)));
- LASSERT (npages <= LNET_MAX_IOV);
-
- memset(&map_props, 0, sizeof(map_props));
-
- map_props.start = (void *)page_offset;
- map_props.size = nob;
- map_props.page_array_len = npages;
- map_props.page_array = tx->tx_pages;
-
- vvrc = vv_map_fmr(kibnal_data.kib_hca, tx->tx_md.md_fmrhandle,
- &map_props, &tx->tx_md.md_lkey, &tx->tx_md.md_rkey);
- if (vvrc != vv_return_ok) {
- CERROR ("Can't map vaddr %p for %d in %d pages: %d\n",
- map_props.start, nob, npages, vvrc);
- return -EFAULT;
- }
-
- tx->tx_md.md_addr = (unsigned long)map_props.start;
- tx->tx_md.md_active = 1;
- tx->tx_md.md_fmrcount--;
-
- rd->rd_key = active ? tx->tx_md.md_lkey : tx->tx_md.md_rkey;
- rd->rd_nob = nob;
- rd->rd_addr = tx->tx_md.md_addr;
-
- /* Compensate for adaptor-tavor's munging of gatherlist addresses */
- if (active)
- rd->rd_addr += PAGE_OFFSET;
-
- return 0;
-}
-
-int
-kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd,
- vv_access_con_bit_mask_t access,
- unsigned int niov, struct iovec *iov, int offset, int nob)
-{
- /* active if I'm sending */
- int active = ((access & vv_acc_r_mem_write) == 0);
- int resid;
- int fragnob;
- struct page *page;
- int npages;
- unsigned long page_offset;
- unsigned long vaddr;
-
- LASSERT (nob > 0);
- LASSERT (niov > 0);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- if (nob > iov->iov_len - offset) {
- CERROR ("Can't map multiple vaddr fragments\n");
- return (-EMSGSIZE);
- }
-
- vaddr = ((unsigned long)iov->iov_base) + offset;
-
- page_offset = vaddr & (PAGE_SIZE - 1);
- resid = nob;
- npages = 0;
-
- do {
- LASSERT (npages < LNET_MAX_IOV);
-
- page = kibnal_kvaddr_to_page(vaddr);
- if (page == NULL) {
- CERROR("Can't find page for %lu\n", vaddr);
- return -EFAULT;
- }
-
- tx->tx_pages[npages++] = lnet_page2phys(page);
-
- fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1));
- vaddr += fragnob;
- resid -= fragnob;
-
- } while (resid > 0);
-
- return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);
-}
-
-int
-kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd,
- vv_access_con_bit_mask_t access,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
-{
- /* active if I'm sending */
- int active = ((access & vv_acc_r_mem_write) == 0);
- int resid;
- int npages;
- unsigned long page_offset;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT (nob > 0);
- LASSERT (nkiov > 0);
- LASSERT (nkiov <= LNET_MAX_IOV);
- LASSERT (!tx->tx_md.md_active);
- LASSERT ((rd != tx->tx_rd) == !active);
-
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- nkiov--;
- kiov++;
- LASSERT (nkiov > 0);
- }
-
- page_offset = kiov->kiov_offset + offset;
-
- resid = offset + nob;
- npages = 0;
-
- do {
- LASSERT (npages < LNET_MAX_IOV);
- LASSERT (nkiov > 0);
-
- if ((npages > 0 && kiov->kiov_offset != 0) ||
- (resid > kiov->kiov_len &&
- (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) {
- /* Can't have gaps */
- CERROR ("Can't make payload contiguous in I/O VM:"
- "page %d, offset %d, len %d \n",
- npages, kiov->kiov_offset, kiov->kiov_len);
-
- return -EINVAL;
- }
-
- tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page);
- resid -= kiov->kiov_len;
- kiov++;
- nkiov--;
- } while (resid > 0);
-
- return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);
-}
-#endif
-
-kib_conn_t *
-kibnal_find_conn_locked (kib_peer_t *peer)
-{
- struct list_head *tmp;
-
- /* just return the first connection */
- list_for_each (tmp, &peer->ibp_conns) {
- return (list_entry(tmp, kib_conn_t, ibc_list));
- }
-
- return (NULL);
-}
-
-void
-kibnal_check_sends (kib_conn_t *conn)
-{
- kib_tx_t *tx;
- vv_return_t vvrc;
- int rc;
- int consume_cred;
- int done;
-
- /* Don't send anything until after the connection is established */
- if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) {
- CDEBUG(D_NET, "%s too soon\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
- }
-
- spin_lock(&conn->ibc_lock);
-
- LASSERT (conn->ibc_nsends_posted <=
- *kibnal_tunables.kib_concurrent_sends);
- LASSERT (conn->ibc_reserved_credits >= 0);
-
- while (conn->ibc_reserved_credits > 0 &&
- !list_empty(&conn->ibc_tx_queue_rsrvd)) {
- LASSERT (conn->ibc_version !=
- IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
- tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
- kib_tx_t, tx_list);
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
- conn->ibc_reserved_credits--;
- }
-
- if (list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER ||
- kibnal_send_keepalive(conn))) {
- spin_unlock(&conn->ibc_lock);
-
- tx = kibnal_get_idle_tx();
- if (tx != NULL)
- kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0);
-
- spin_lock(&conn->ibc_lock);
-
- if (tx != NULL)
- kibnal_queue_tx_locked(tx, conn);
- }
-
- for (;;) {
- if (!list_empty(&conn->ibc_tx_queue_nocred)) {
- LASSERT (conn->ibc_version !=
- IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
- tx = list_entry (conn->ibc_tx_queue_nocred.next,
- kib_tx_t, tx_list);
- consume_cred = 0;
- } else if (!list_empty (&conn->ibc_tx_queue)) {
- tx = list_entry (conn->ibc_tx_queue.next,
- kib_tx_t, tx_list);
- consume_cred = 1;
- } else {
- /* nothing waiting */
- break;
- }
-
- LASSERT (tx->tx_queued);
- /* We rely on this for QP sizing */
- LASSERT (tx->tx_nwrq > 0 && tx->tx_nwrq <= 1 + IBNAL_MAX_RDMA_FRAGS);
-
- LASSERT (conn->ibc_outstanding_credits >= 0);
- LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE);
- LASSERT (conn->ibc_credits >= 0);
- LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE);
-
- if (conn->ibc_nsends_posted ==
- *kibnal_tunables.kib_concurrent_sends) {
- /* We've got some tx completions outstanding... */
- CDEBUG(D_NET, "%s: posted enough\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
- }
-
- if (consume_cred) {
- if (conn->ibc_credits == 0) { /* no credits */
- CDEBUG(D_NET, "%s: no credits\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
- }
-
- if (conn->ibc_credits == 1 && /* last credit reserved for */
- conn->ibc_outstanding_credits == 0) { /* giving back credits */
- CDEBUG(D_NET, "%s: not using last credit\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
- }
- }
-
- list_del (&tx->tx_list);
- tx->tx_queued = 0;
-
- /* NB don't drop ibc_lock before bumping tx_sending */
-
- if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP &&
- (!list_empty(&conn->ibc_tx_queue) ||
- !list_empty(&conn->ibc_tx_queue_nocred) ||
- (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER &&
- !kibnal_send_keepalive(conn)))) {
- /* redundant NOOP */
- spin_unlock(&conn->ibc_lock);
- kibnal_tx_done(tx);
- spin_lock(&conn->ibc_lock);
- CDEBUG(D_NET, "%s: redundant noop\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- continue;
- }
-
- kibnal_pack_msg(tx->tx_msg, conn->ibc_version,
- conn->ibc_outstanding_credits,
- conn->ibc_peer->ibp_nid, conn->ibc_incarnation,
- conn->ibc_txseq);
-
- conn->ibc_txseq++;
- conn->ibc_outstanding_credits = 0;
- conn->ibc_nsends_posted++;
- if (consume_cred)
- conn->ibc_credits--;
-
- /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA
- * PUT. If so, it was first queued here as a PUT_REQ, sent and
- * stashed on ibc_active_txs, matched by an incoming PUT_ACK,
- * and then re-queued here. It's (just) possible that
- * tx_sending is non-zero if we've not done the tx_complete() from
- * the first send; hence the ++ rather than = below. */
- tx->tx_sending++;
-
- list_add (&tx->tx_list, &conn->ibc_active_txs);
-
- /* Keep holding ibc_lock while posting sends on this
- * connection; vv_post_send() isn't re-entrant on the same
- * QP!! */
-
- LASSERT (tx->tx_nwrq > 0);
-#if 0
- if (tx->tx_wrq[0].wr_type == vv_wr_rdma_write)
- CDEBUG(D_NET, "WORK[0]: RDMA gl %p for %d k %x -> "LPX64" k %x\n",
- tx->tx_wrq[0].scatgat_list->v_address,
- tx->tx_wrq[0].scatgat_list->length,
- tx->tx_wrq[0].scatgat_list->l_key,
- tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_addr,
- tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_r_key);
- else
- CDEBUG(D_NET, "WORK[0]: %s gl %p for %d k %x\n",
- tx->tx_wrq[0].wr_type == vv_wr_send ? "SEND" : "????",
- tx->tx_wrq[0].scatgat_list->v_address,
- tx->tx_wrq[0].scatgat_list->length,
- tx->tx_wrq[0].scatgat_list->l_key);
-
- if (tx->tx_nwrq > 1) {
- if (tx->tx_wrq[1].wr_type == vv_wr_rdma_write)
- CDEBUG(D_NET, "WORK[1]: RDMA gl %p for %d k %x -> "LPX64" k %x\n",
- tx->tx_wrq[1].scatgat_list->v_address,
- tx->tx_wrq[1].scatgat_list->length,
- tx->tx_wrq[1].scatgat_list->l_key,
- tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_addr,
- tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_r_key);
- else
- CDEBUG(D_NET, "WORK[1]: %s gl %p for %d k %x\n",
- tx->tx_wrq[1].wr_type == vv_wr_send ? "SEND" : "????",
- tx->tx_wrq[1].scatgat_list->v_address,
- tx->tx_wrq[1].scatgat_list->length,
- tx->tx_wrq[1].scatgat_list->l_key);
- }
-#endif
- rc = -ECONNABORTED;
- vvrc = vv_return_ok;
- if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
- tx->tx_status = 0;
- vvrc = vv_post_send_list(kibnal_data.kib_hca,
- conn->ibc_qp,
- tx->tx_nwrq,
- tx->tx_wrq,
- vv_operation_type_send_rc);
- rc = (vvrc == vv_return_ok) ? 0 : -EIO;
- }
-
- conn->ibc_last_send = jiffies;
-
- if (rc != 0) {
- /* NB credits are transferred in the actual
- * message, which can only be the last work item */
- conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits;
- if (consume_cred)
- conn->ibc_credits++;
- conn->ibc_nsends_posted--;
-
- tx->tx_status = rc;
- tx->tx_waiting = 0;
- tx->tx_sending--;
-
- done = (tx->tx_sending == 0);
- if (done)
- list_del (&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (conn->ibc_state == IBNAL_CONN_ESTABLISHED)
- CERROR ("Error %d posting transmit to %s\n",
- vvrc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- else
- CDEBUG (D_NET, "Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- kibnal_close_conn (conn, rc);
-
- if (done)
- kibnal_tx_done (tx);
- return;
- }
- }
-
- spin_unlock(&conn->ibc_lock);
-}
-
-void
-kibnal_tx_complete (kib_tx_t *tx, vv_comp_status_t vvrc)
-{
- kib_conn_t *conn = tx->tx_conn;
- int failed = (vvrc != vv_comp_status_success);
- int idle;
-
- CDEBUG(D_NET, "tx %p conn %p sending %d nwrq %d vvrc %d\n",
- tx, conn, tx->tx_sending, tx->tx_nwrq, vvrc);
-
- LASSERT (tx->tx_sending > 0);
-
- if (failed &&
- tx->tx_status == 0 &&
- conn->ibc_state == IBNAL_CONN_ESTABLISHED)
- CDEBUG(D_NETERROR, "tx -> %s type %x cookie "LPX64
- "sending %d waiting %d: failed %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- tx->tx_msg->ibm_type, tx->tx_cookie,
- tx->tx_sending, tx->tx_waiting, vvrc);
-
- spin_lock(&conn->ibc_lock);
-
- /* I could be racing with rdma completion. Whoever makes 'tx' idle
- * gets to free it, which also drops its ref on 'conn'. */
-
- tx->tx_sending--;
- conn->ibc_nsends_posted--;
-
- if (failed) {
- tx->tx_waiting = 0;
- tx->tx_status = -EIO;
- }
-
- idle = (tx->tx_sending == 0) && /* This is the final callback */
- !tx->tx_waiting && /* Not waiting for peer */
- !tx->tx_queued; /* Not re-queued (PUT_DONE) */
- if (idle)
- list_del(&tx->tx_list);
-
- kibnal_conn_addref(conn); /* 1 ref for me.... */
-
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kibnal_tx_done (tx);
-
- if (failed) {
- kibnal_close_conn (conn, -EIO);
- } else {
- kibnal_peer_alive(conn->ibc_peer);
- kibnal_check_sends(conn);
- }
-
- kibnal_conn_decref(conn); /* ...until here */
-}
-
-void
-kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob)
-{
- vv_scatgat_t *gl = &tx->tx_gl[tx->tx_nwrq];
- vv_wr_t *wrq = &tx->tx_wrq[tx->tx_nwrq];
- int nob = offsetof (kib_msg_t, ibm_u) + body_nob;
- __u64 addr = (__u64)((unsigned long)((tx)->tx_msg));
-
- LASSERT (tx->tx_nwrq >= 0 &&
- tx->tx_nwrq < (1 + IBNAL_MAX_RDMA_FRAGS));
- LASSERT (nob <= IBNAL_MSG_SIZE);
-
- kibnal_init_msg(tx->tx_msg, type, body_nob);
-
- *gl = (vv_scatgat_t) {
- .v_address = KIBNAL_ADDR2SG(addr),
- .l_key = tx->tx_lkey,
- .length = nob,
- };
-
- memset(wrq, 0, sizeof(*wrq));
-
- wrq->wr_id = kibnal_ptr2wreqid(tx, IBNAL_WID_TX);
- wrq->wr_type = vv_wr_send;
- wrq->scatgat_list = gl;
- wrq->num_of_data_segments = 1;
- wrq->completion_notification = 1;
- wrq->type.send.solicited_event = 1;
- wrq->type.send.immidiate_data_indicator = 0;
- wrq->type.send.send_qp_type.rc_type.fance_indicator = 0;
-
- tx->tx_nwrq++;
-}
-
-int
-kibnal_init_rdma (kib_tx_t *tx, int type, int nob,
- kib_rdma_desc_t *dstrd, __u64 dstcookie)
-{
- kib_msg_t *ibmsg = tx->tx_msg;
- kib_rdma_desc_t *srcrd = tx->tx_rd;
- vv_scatgat_t *gl;
- vv_wr_t *wrq;
- int rc;
-
-#if IBNAL_USE_FMR
- LASSERT (tx->tx_nwrq == 0);
-
- gl = &tx->tx_gl[0];
- gl->length = nob;
- gl->v_address = KIBNAL_ADDR2SG(srcrd->rd_addr);
- gl->l_key = srcrd->rd_key;
-
- wrq = &tx->tx_wrq[0];
-
- wrq->wr_id = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA);
- wrq->completion_notification = 0;
- wrq->scatgat_list = gl;
- wrq->num_of_data_segments = 1;
- wrq->wr_type = vv_wr_rdma_write;
- wrq->type.send.solicited_event = 0;
- wrq->type.send.send_qp_type.rc_type.fance_indicator = 0;
- wrq->type.send.send_qp_type.rc_type.r_addr = dstrd->rd_addr;
- wrq->type.send.send_qp_type.rc_type.r_r_key = dstrd->rd_key;
-
- tx->tx_nwrq = 1;
- rc = nob;
-#else
- /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */
- int resid = nob;
- kib_rdma_frag_t *srcfrag;
- int srcidx;
- kib_rdma_frag_t *dstfrag;
- int dstidx;
- int wrknob;
-
- /* Called by scheduler */
- LASSERT (!in_interrupt());
-
- LASSERT (type == IBNAL_MSG_GET_DONE ||
- type == IBNAL_MSG_PUT_DONE);
-
- srcidx = dstidx = 0;
- srcfrag = &srcrd->rd_frags[0];
- dstfrag = &dstrd->rd_frags[0];
- rc = resid;
-
- while (resid > 0) {
- if (srcidx >= srcrd->rd_nfrag) {
- CERROR("Src buffer exhausted: %d frags\n", srcidx);
- rc = -EPROTO;
- break;
- }
-
- if (dstidx == dstrd->rd_nfrag) {
- CERROR("Dst buffer exhausted: %d frags\n", dstidx);
- rc = -EPROTO;
- break;
- }
-
- if (tx->tx_nwrq == IBNAL_MAX_RDMA_FRAGS) {
- CERROR("RDMA too fragmented: %d/%d src %d/%d dst frags\n",
- srcidx, srcrd->rd_nfrag,
- dstidx, dstrd->rd_nfrag);
- rc = -EMSGSIZE;
- break;
- }
-
- wrknob = MIN(MIN(srcfrag->rf_nob, dstfrag->rf_nob), resid);
-
- gl = &tx->tx_gl[tx->tx_nwrq];
- gl->v_address = KIBNAL_ADDR2SG(kibnal_rf_addr(srcfrag));
- gl->length = wrknob;
- gl->l_key = srcrd->rd_key;
-
- wrq = &tx->tx_wrq[tx->tx_nwrq];
-
- wrq->wr_id = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA);
- wrq->completion_notification = 0;
- wrq->scatgat_list = gl;
- wrq->num_of_data_segments = 1;
- wrq->wr_type = vv_wr_rdma_write;
- wrq->type.send.solicited_event = 0;
- wrq->type.send.send_qp_type.rc_type.fance_indicator = 0;
- wrq->type.send.send_qp_type.rc_type.r_addr = kibnal_rf_addr(dstfrag);
- wrq->type.send.send_qp_type.rc_type.r_r_key = dstrd->rd_key;
-
- resid -= wrknob;
- if (wrknob < srcfrag->rf_nob) {
- kibnal_rf_set(srcfrag,
- kibnal_rf_addr(srcfrag) + wrknob,
- srcfrag->rf_nob - wrknob);
- } else {
- srcfrag++;
- srcidx++;
- }
-
- if (wrknob < dstfrag->rf_nob) {
- kibnal_rf_set(dstfrag,
- kibnal_rf_addr(dstfrag) + wrknob,
- dstfrag->rf_nob - wrknob);
- } else {
- dstfrag++;
- dstidx++;
- }
-
- tx->tx_nwrq++;
- }
-
- if (rc < 0) /* no RDMA if completing with failure */
- tx->tx_nwrq = 0;
-#endif
-
- ibmsg->ibm_u.completion.ibcm_status = rc;
- ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
- kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t));
-
- return rc;
-}
-
-void
-kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn)
-{
- spin_lock(&conn->ibc_lock);
- kibnal_queue_tx_locked (tx, conn);
- spin_unlock(&conn->ibc_lock);
-
- kibnal_check_sends(conn);
-}
-
-void
-kibnal_schedule_peer_arp (kib_peer_t *peer)
-{
- unsigned long flags;
-
- LASSERT (peer->ibp_connecting != 0);
- LASSERT (peer->ibp_arp_count > 0);
-
- kibnal_peer_addref(peer); /* extra ref for connd */
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- list_add_tail (&peer->ibp_connd_list, &kibnal_data.kib_connd_peers);
- wake_up (&kibnal_data.kib_connd_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
-}
-
-void
-kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- kib_conn_t *conn;
- unsigned long flags;
- rwlock_t *g_lock = &kibnal_data.kib_global_lock;
- int retry;
- int rc;
-
- /* If I get here, I've committed to send, so I complete the tx with
- * failure on any problems */
-
- LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */
- LASSERT (tx->tx_nwrq > 0); /* work items have been set up */
-
- for (retry = 0; ; retry = 1) {
- read_lock_irqsave(g_lock, flags);
-
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL) {
- conn = kibnal_find_conn_locked (peer);
- if (conn != NULL) {
- kibnal_conn_addref(conn); /* 1 ref for me... */
- read_unlock_irqrestore(g_lock, flags);
-
- kibnal_queue_tx (tx, conn);
- kibnal_conn_decref(conn); /* ...to here */
- return;
- }
- }
-
- /* Making one or more connections; I'll need a write lock... */
- read_unlock(g_lock);
- write_lock(g_lock);
-
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL)
- break;
-
- write_unlock_irqrestore(g_lock, flags);
-
- if (retry) {
- CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
-
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kibnal_tx_done (tx);
- return;
- }
-
- rc = kibnal_add_persistent_peer(nid, LNET_NIDADDR(nid));
- if (rc != 0) {
- CERROR("Can't add peer %s: %d\n",
- libcfs_nid2str(nid), rc);
-
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kibnal_tx_done (tx);
- return;
- }
- }
-
- conn = kibnal_find_conn_locked (peer);
- if (conn != NULL) {
- /* Connection exists; queue message on it */
- kibnal_conn_addref(conn); /* 1 ref for me... */
- write_unlock_irqrestore(g_lock, flags);
-
- kibnal_queue_tx (tx, conn);
- kibnal_conn_decref(conn); /* ...until here */
- return;
- }
-
- if (peer->ibp_connecting == 0 &&
- peer->ibp_accepting == 0) {
- if (!(peer->ibp_reconnect_interval == 0 || /* first attempt */
- time_after_eq(jiffies, peer->ibp_reconnect_time))) {
- write_unlock_irqrestore(g_lock, flags);
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kibnal_tx_done (tx);
- return;
- }
-
- peer->ibp_connecting = 1;
- peer->ibp_arp_count = 1 + *kibnal_tunables.kib_arp_retries;
- kibnal_schedule_peer_arp(peer);
- }
-
- /* A connection is being established; queue the message... */
- list_add_tail (&tx->tx_list, &peer->ibp_tx_queue);
-
- write_unlock_irqrestore(g_lock, flags);
-}
-
-int
-kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- lnet_hdr_t *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- kib_msg_t *ibmsg;
- kib_tx_t *tx;
- int nob;
- int rc;
-
- /* NB 'private' is different depending on what we're sending.... */
-
- CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT (payload_nob == 0 || payload_niov > 0);
- LASSERT (payload_niov <= LNET_MAX_IOV);
-
- /* Thread context */
- LASSERT (!in_interrupt());
- /* payload is either all vaddrs or all pages */
- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
- switch (type) {
- default:
- LBUG();
- return (-EIO);
-
- case LNET_MSG_ACK:
- LASSERT (payload_nob == 0);
- break;
-
- case LNET_MSG_GET:
- if (routing || target_is_router)
- break; /* send IMMEDIATE */
-
- /* is the REPLY message too small for RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
- if (nob <= IBNAL_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can allocate txd for GET to %s: \n",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.get.ibgm_hdr = *hdr;
- ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
-
- if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
- rc = kibnal_setup_rd_iov(tx, &ibmsg->ibm_u.get.ibgm_rd,
- vv_acc_r_mem_write,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.iov,
- 0, lntmsg->msg_md->md_length);
- else
- rc = kibnal_setup_rd_kiov(tx, &ibmsg->ibm_u.get.ibgm_rd,
- vv_acc_r_mem_write,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.kiov,
- 0, lntmsg->msg_md->md_length);
- if (rc != 0) {
- CERROR("Can't setup GET sink for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kibnal_tx_done(tx);
- return -EIO;
- }
-
-#if IBNAL_USE_FMR
- nob = sizeof(kib_get_msg_t);
-#else
- {
- int n = ibmsg->ibm_u.get.ibgm_rd.rd_nfrag;
-
- nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]);
- }
-#endif
- kibnal_init_tx_msg(tx, IBNAL_MSG_GET_REQ, nob);
-
- tx->tx_lntmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni,
- lntmsg);
- if (tx->tx_lntmsg[1] == NULL) {
- CERROR("Can't create reply for GET -> %s\n",
- libcfs_nid2str(target.nid));
- kibnal_tx_done(tx);
- return -EIO;
- }
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */
- tx->tx_waiting = 1; /* waiting for GET_DONE */
- kibnal_launch_tx(tx, target.nid);
- return 0;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- /* Is the payload small enough not to need RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob <= IBNAL_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can't allocate %s txd for %s\n",
- type == LNET_MSG_PUT ? "PUT" : "REPLY",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- if (payload_kiov == NULL)
- rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 0,
- payload_niov, payload_iov,
- payload_offset, payload_nob);
- else
- rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 0,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- if (rc != 0) {
- CERROR("Can't setup PUT src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kibnal_tx_done(tx);
- return -EIO;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
- ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
- kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_REQ, sizeof(kib_putreq_msg_t));
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */
- kibnal_launch_tx(tx, target.nid);
- return 0;
- }
-
- /* send IMMEDIATE */
-
- LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob])
- <= IBNAL_MSG_SIZE);
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR ("Can't send %d to %s: tx descs exhausted\n",
- type, libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
-
- if (payload_kiov != NULL)
- lnet_copy_kiov2flat(IBNAL_MSG_SIZE, ibmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- else
- lnet_copy_iov2flat(IBNAL_MSG_SIZE, ibmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-
- nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]);
- kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- kibnal_launch_tx(tx, target.nid);
- return 0;
-}
-
-void
-kibnal_reply (lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg)
-{
- lnet_process_id_t target = lntmsg->msg_target;
- unsigned int niov = lntmsg->msg_niov;
- struct iovec *iov = lntmsg->msg_iov;
- lnet_kiov_t *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- kib_tx_t *tx;
- int rc;
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can't get tx for REPLY to %s\n",
- libcfs_nid2str(target.nid));
- goto failed_0;
- }
-
- if (nob == 0)
- rc = 0;
- else if (kiov == NULL)
- rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 0,
- niov, iov, offset, nob);
- else
- rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 0,
- niov, kiov, offset, nob);
-
- if (rc != 0) {
- CERROR("Can't setup GET src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- rc = kibnal_init_rdma(tx, IBNAL_MSG_GET_DONE, nob,
- &rx->rx_msg->ibm_u.get.ibgm_rd,
- rx->rx_msg->ibm_u.get.ibgm_cookie);
- if (rc < 0) {
- CERROR("Can't setup rdma for GET from %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- if (rc == 0) {
- /* No RDMA: local completion may happen now! */
- lnet_finalize(ni, lntmsg, 0);
- } else {
- /* RDMA: lnet_finalize(lntmsg) when it
- * completes */
- tx->tx_lntmsg[0] = lntmsg;
- }
-
- kibnal_queue_tx(tx, rx->rx_conn);
- return;
-
- failed_1:
- kibnal_tx_done(tx);
- failed_0:
- lnet_finalize(ni, lntmsg, -EIO);
-}
-
-int
-kibnal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- void **new_private)
-{
- kib_rx_t *rx = private;
- kib_conn_t *conn = rx->rx_conn;
-
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- /* Can't block if RDMA completions need normal credits */
- LCONSOLE_ERROR_MSG(0x129, "Dropping message from %s: no buffers"
- " free. %s is running an old version of LNET "
- "that may deadlock if messages wait for"
- "buffers) \n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return -EDEADLK;
- }
-
- *new_private = private;
- return 0;
-}
-
-int
-kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- kib_rx_t *rx = private;
- kib_msg_t *rxmsg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- kib_tx_t *tx;
- kib_msg_t *txmsg;
- int nob;
- int post_cred = 1;
- int rc = 0;
-
- LASSERT (mlen <= rlen);
- LASSERT (!in_interrupt());
- /* Either all pages or all vaddrs */
- LASSERT (!(kiov != NULL && iov != NULL));
-
- switch (rxmsg->ibm_type) {
- default:
- LBUG();
-
- case IBNAL_MSG_IMMEDIATE:
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]);
- if (nob > rx->rx_nob) {
- CERROR ("Immediate message from %s too big: %d(%d)\n",
- libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
- nob, rx->rx_nob);
- rc = -EPROTO;
- break;
- }
-
- if (kiov != NULL)
- lnet_copy_flat2kiov(niov, kiov, offset,
- IBNAL_MSG_SIZE, rxmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- mlen);
- else
- lnet_copy_flat2iov(niov, iov, offset,
- IBNAL_MSG_SIZE, rxmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- mlen);
- lnet_finalize (ni, lntmsg, 0);
- break;
-
- case IBNAL_MSG_PUT_REQ:
- if (mlen == 0) {
- lnet_finalize(ni, lntmsg, 0);
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, 0,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can't allocate tx for %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /* Not replying will break the connection */
- rc = -ENOMEM;
- break;
- }
-
- txmsg = tx->tx_msg;
- if (kiov == NULL)
- rc = kibnal_setup_rd_iov(tx,
- &txmsg->ibm_u.putack.ibpam_rd,
- vv_acc_r_mem_write,
- niov, iov, offset, mlen);
- else
- rc = kibnal_setup_rd_kiov(tx,
- &txmsg->ibm_u.putack.ibpam_rd,
- vv_acc_r_mem_write,
- niov, kiov, offset, mlen);
- if (rc != 0) {
- CERROR("Can't setup PUT sink for %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kibnal_tx_done(tx);
- /* tell peer it's over */
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, rc,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
- txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
-#if IBNAL_USE_FMR
- nob = sizeof(kib_putack_msg_t);
-#else
- {
- int n = tx->tx_msg->ibm_u.putack.ibpam_rd.rd_nfrag;
-
- nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]);
- }
-#endif
- kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_ACK, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_DONE */
- kibnal_queue_tx(tx, conn);
-
- if (conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD)
- post_cred = 0; /* peer still owns 'rx' for sending PUT_DONE */
- break;
-
- case IBNAL_MSG_GET_REQ:
- if (lntmsg != NULL) {
- /* Optimized GET; RDMA lntmsg's payload */
- kibnal_reply(ni, rx, lntmsg);
- } else {
- /* GET didn't match anything */
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_GET_DONE,
- -ENODATA,
- rxmsg->ibm_u.get.ibgm_cookie);
- }
- break;
- }
-
- kibnal_post_rx(rx, post_cred, 0);
- return rc;
-}
-
-int
-kibnal_thread_start (int (*fn)(void *arg), void *arg)
-{
- long pid = kernel_thread (fn, arg, 0);
-
- if (pid < 0)
- return ((int)pid);
-
- atomic_inc (&kibnal_data.kib_nthreads);
- return (0);
-}
-
-void
-kibnal_thread_fini (void)
-{
- atomic_dec (&kibnal_data.kib_nthreads);
-}
-
-void
-kibnal_peer_alive (kib_peer_t *peer)
-{
- /* This is racy, but everyone's only writing cfs_time_current() */
- peer->ibp_last_alive = cfs_time_current();
- mb();
-}
-
-void
-kibnal_peer_notify (kib_peer_t *peer)
-{
- time_t last_alive = 0;
- int error = 0;
- unsigned long flags;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (list_empty(&peer->ibp_conns) &&
- peer->ibp_accepting == 0 &&
- peer->ibp_connecting == 0 &&
- peer->ibp_error != 0) {
- error = peer->ibp_error;
- peer->ibp_error = 0;
-
- last_alive = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() -
- peer->ibp_last_alive);
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- if (error != 0)
- lnet_notify(kibnal_data.kib_ni, peer->ibp_nid, 0, last_alive);
-}
-
-void
-kibnal_schedule_conn (kib_conn_t *conn)
-{
- unsigned long flags;
-
- kibnal_conn_addref(conn); /* ++ref for connd */
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns);
- wake_up (&kibnal_data.kib_connd_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
-}
-
-void
-kibnal_close_conn_locked (kib_conn_t *conn, int error)
-{
- /* This just does the immediate housekeeping. 'error' is zero for a
- * normal shutdown which can happen only after the connection has been
- * established. If the connection is established, schedule the
- * connection to be finished off by the connd. Otherwise the connd is
- * already dealing with it (either to set it up or tear it down).
- * Caller holds kib_global_lock exclusively in irq context */
- kib_peer_t *peer = conn->ibc_peer;
-
- LASSERT (error != 0 || conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- if (error != 0 && conn->ibc_comms_error == 0)
- conn->ibc_comms_error = error;
-
- if (conn->ibc_state != IBNAL_CONN_ESTABLISHED)
- return; /* already being handled */
-
- /* NB Can't take ibc_lock here (could be in IRQ context), without
- * risking deadlock, so access to ibc_{tx_queue,active_txs} is racey */
-
- if (error == 0 &&
- list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_rsrvd) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- list_empty(&conn->ibc_active_txs)) {
- CDEBUG(D_NET, "closing conn to %s"
- " rx# "LPD64" tx# "LPD64"\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_txseq, conn->ibc_rxseq);
- } else {
- CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s"
- " rx# "LPD64" tx# "LPD64"\n",
- libcfs_nid2str(peer->ibp_nid), error,
- list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
- list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
- list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
- list_empty(&conn->ibc_active_txs) ? "" : "(waiting)",
- conn->ibc_txseq, conn->ibc_rxseq);
- }
-
- list_del (&conn->ibc_list);
-
- if (list_empty (&peer->ibp_conns)) { /* no more conns */
- if (peer->ibp_persistence == 0 && /* non-persistent peer */
- kibnal_peer_active(peer)) /* still in peer table */
- kibnal_unlink_peer_locked (peer);
-
- /* set/clear error on last conn */
- peer->ibp_error = conn->ibc_comms_error;
- }
-
- kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECT1);
-
- kibnal_schedule_conn(conn);
- kibnal_conn_decref(conn); /* lose ibc_list's ref */
-}
-
-void
-kibnal_close_conn (kib_conn_t *conn, int error)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- kibnal_close_conn_locked (conn, error);
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-}
-
-void
-kibnal_handle_early_rxs(kib_conn_t *conn)
-{
- unsigned long flags;
- kib_rx_t *rx;
-
- LASSERT (!in_interrupt());
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- while (!list_empty(&conn->ibc_early_rxs)) {
- rx = list_entry(conn->ibc_early_rxs.next,
- kib_rx_t, rx_list);
- list_del(&rx->rx_list);
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_handle_rx(rx);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- }
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-}
-
-void
-kibnal_abort_txs(kib_conn_t *conn, struct list_head *txs)
-{
- LIST_HEAD (zombies);
- struct list_head *tmp;
- struct list_head *nxt;
- kib_tx_t *tx;
-
- spin_lock(&conn->ibc_lock);
-
- list_for_each_safe (tmp, nxt, txs) {
- tx = list_entry (tmp, kib_tx_t, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_waiting || tx->tx_sending != 0);
- } else {
- LASSERT (tx->tx_queued);
- }
-
- tx->tx_status = -ECONNABORTED;
- tx->tx_queued = 0;
- tx->tx_waiting = 0;
-
- if (tx->tx_sending == 0) {
- list_del (&tx->tx_list);
- list_add (&tx->tx_list, &zombies);
- }
- }
-
- spin_unlock(&conn->ibc_lock);
-
- kibnal_txlist_done(&zombies, -ECONNABORTED);
-}
-
-void
-kibnal_conn_disconnected(kib_conn_t *conn)
-{
- /* I'm the connd */
- LASSERT (!in_interrupt());
- LASSERT (current == kibnal_data.kib_connd);
- LASSERT (conn->ibc_state >= IBNAL_CONN_INIT);
-
- kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTED);
-
- /* move QP to error state to make posted work items complete */
- kibnal_set_qp_state(conn, vv_qp_state_error);
-
- /* Complete all tx descs not waiting for sends to complete.
- * NB we should be safe from RDMA now that the QP has changed state */
-
- kibnal_abort_txs(conn, &conn->ibc_tx_queue);
- kibnal_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
- kibnal_abort_txs(conn, &conn->ibc_tx_queue_nocred);
- kibnal_abort_txs(conn, &conn->ibc_active_txs);
-
- kibnal_handle_early_rxs(conn);
-
- kibnal_peer_notify(conn->ibc_peer);
-}
-
-void
-kibnal_peer_connect_failed (kib_peer_t *peer, int active, int error)
-{
- LIST_HEAD (zombies);
- unsigned long flags;
-
- /* Only the connd creates conns => single threaded */
- LASSERT (error != 0);
- LASSERT (!in_interrupt());
- LASSERT (current == kibnal_data.kib_connd);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (active) {
- LASSERT (peer->ibp_connecting != 0);
- peer->ibp_connecting--;
- } else {
- LASSERT (peer->ibp_accepting != 0);
- peer->ibp_accepting--;
- }
-
- if (peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0) {
- /* another connection attempt under way (loopback?)... */
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return;
- }
-
- if (list_empty(&peer->ibp_conns)) {
- /* Say when active connection can be re-attempted */
- peer->ibp_reconnect_interval *= 2;
- peer->ibp_reconnect_interval =
- MAX(peer->ibp_reconnect_interval,
- *kibnal_tunables.kib_min_reconnect_interval);
- peer->ibp_reconnect_interval =
- MIN(peer->ibp_reconnect_interval,
- *kibnal_tunables.kib_max_reconnect_interval);
-
- peer->ibp_reconnect_time = jiffies +
- peer->ibp_reconnect_interval * HZ;
-
- /* Take peer's blocked transmits to complete with error */
- list_add(&zombies, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- if (kibnal_peer_active(peer) &&
- (peer->ibp_persistence == 0)) {
- /* failed connection attempt on non-persistent peer */
- kibnal_unlink_peer_locked (peer);
- }
-
- peer->ibp_error = error;
- } else {
- /* Can't have blocked transmits if there are connections */
- LASSERT (list_empty(&peer->ibp_tx_queue));
- }
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_peer_notify(peer);
-
- if (list_empty (&zombies))
- return;
-
- CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kibnal_txlist_done(&zombies, -EHOSTUNREACH);
-}
-
-void
-kibnal_reject(cm_cep_handle_t cep, int why)
-{
- static cm_reject_data_t rejs[3];
- cm_reject_data_t *rej = &rejs[why];
-
- LASSERT (why >= 0 && why < sizeof(rejs)/sizeof(rejs[0]));
-
- /* If I wasn't so lazy, I'd initialise this only once; it's effective
- * read-only */
- rej->reason = cm_rej_code_usr_rej;
- rej->priv_data[0] = (IBNAL_MSG_MAGIC) & 0xff;
- rej->priv_data[1] = (IBNAL_MSG_MAGIC >> 8) & 0xff;
- rej->priv_data[2] = (IBNAL_MSG_MAGIC >> 16) & 0xff;
- rej->priv_data[3] = (IBNAL_MSG_MAGIC >> 24) & 0xff;
- rej->priv_data[4] = (IBNAL_MSG_VERSION) & 0xff;
- rej->priv_data[5] = (IBNAL_MSG_VERSION >> 8) & 0xff;
- rej->priv_data[6] = why;
-
- cm_reject(cep, rej);
-}
-
-void
-kibnal_connreq_done(kib_conn_t *conn, int active, int status)
-{
- struct list_head txs;
- kib_peer_t *peer = conn->ibc_peer;
- unsigned long flags;
- kib_tx_t *tx;
-
- CDEBUG(D_NET,"%d\n", status);
-
- /* Only the connd creates conns => single threaded */
- LASSERT (!in_interrupt());
- LASSERT (current == kibnal_data.kib_connd);
- LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED);
-
- if (active) {
- LASSERT (peer->ibp_connecting > 0);
- } else {
- LASSERT (peer->ibp_accepting > 0);
- }
-
- LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
- conn->ibc_connvars = NULL;
-
- if (status != 0) {
- /* failed to establish connection */
- switch (conn->ibc_state) {
- default:
- LBUG();
-
- case IBNAL_CONN_ACTIVE_CHECK_REPLY:
- /* got a connection reply but failed checks */
- LASSERT (active);
- kibnal_reject(conn->ibc_cep, IBNAL_REJECT_FATAL);
- break;
-
- case IBNAL_CONN_ACTIVE_CONNECT:
- LASSERT (active);
- cm_cancel(conn->ibc_cep);
- cfs_pause(cfs_time_seconds(1)/10);
- /* cm_connect() failed immediately or
- * callback returned failure */
- break;
-
- case IBNAL_CONN_ACTIVE_ARP:
- LASSERT (active);
- /* ibat_get_ib_data() failed immediately
- * or callback returned failure */
- break;
-
- case IBNAL_CONN_INIT:
- break;
-
- case IBNAL_CONN_PASSIVE_WAIT:
- LASSERT (!active);
- /* cm_accept callback returned failure */
- break;
- }
-
- kibnal_peer_connect_failed(peer, active, status);
- kibnal_conn_disconnected(conn);
- return;
- }
-
- /* connection established */
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (active) {
- LASSERT(conn->ibc_state == IBNAL_CONN_ACTIVE_RTU);
- } else {
- LASSERT(conn->ibc_state == IBNAL_CONN_PASSIVE_WAIT);
- }
-
- conn->ibc_last_send = jiffies;
- kibnal_set_conn_state(conn, IBNAL_CONN_ESTABLISHED);
- kibnal_peer_alive(peer);
-
- /* Add conn to peer's list and nuke any dangling conns from a different
- * peer instance... */
- kibnal_conn_addref(conn); /* +1 ref for ibc_list */
- list_add(&conn->ibc_list, &peer->ibp_conns);
- kibnal_close_stale_conns_locked (peer, conn->ibc_incarnation);
-
- if (!kibnal_peer_active(peer) || /* peer has been deleted */
- conn->ibc_comms_error != 0 || /* comms error */
- conn->ibc_disconnect) { /* need to disconnect */
-
- /* start to shut down connection */
- kibnal_close_conn_locked(conn, -ECONNABORTED);
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- kibnal_peer_connect_failed(peer, active, -ECONNABORTED);
- return;
- }
-
- if (active)
- peer->ibp_connecting--;
- else
- peer->ibp_accepting--;
-
- /* grab pending txs while I have the lock */
- list_add(&txs, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- /* Schedule blocked txs */
- spin_lock (&conn->ibc_lock);
- while (!list_empty (&txs)) {
- tx = list_entry (txs.next, kib_tx_t, tx_list);
- list_del (&tx->tx_list);
-
- kibnal_queue_tx_locked (tx, conn);
- }
- spin_unlock (&conn->ibc_lock);
- kibnal_check_sends (conn);
-
- /* schedule blocked rxs */
- kibnal_handle_early_rxs(conn);
-}
-
-void
-kibnal_cm_callback(cm_cep_handle_t cep, cm_conn_data_t *cmdata, void *arg)
-{
- static cm_dreply_data_t drep; /* just zeroed space */
-
- kib_conn_t *conn = (kib_conn_t *)arg;
- unsigned long flags;
-
- /* CAVEAT EMPTOR: tasklet context */
-
- switch (cmdata->status) {
- default:
- LBUG();
-
- case cm_event_disconn_request:
- /* IBNAL_CONN_ACTIVE_RTU: gets closed in kibnal_connreq_done
- * IBNAL_CONN_ESTABLISHED: I start it closing
- * otherwise: it's closing anyway */
- cm_disconnect(conn->ibc_cep, NULL, &drep);
- cm_cancel(conn->ibc_cep);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- LASSERT (!conn->ibc_disconnect);
- conn->ibc_disconnect = 1;
-
- switch (conn->ibc_state) {
- default:
- LBUG();
-
- case IBNAL_CONN_ACTIVE_RTU:
- /* kibnal_connreq_done is getting there; It'll see
- * ibc_disconnect set... */
- break;
-
- case IBNAL_CONN_ESTABLISHED:
- /* kibnal_connreq_done got there already; get
- * disconnect going... */
- kibnal_close_conn_locked(conn, 0);
- break;
-
- case IBNAL_CONN_DISCONNECT1:
- /* kibnal_disconnect_conn is getting there; It'll see
- * ibc_disconnect set... */
- break;
-
- case IBNAL_CONN_DISCONNECT2:
- /* kibnal_disconnect_conn got there already; complete
- * the disconnect. */
- kibnal_schedule_conn(conn);
- break;
- }
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- break;
-
- case cm_event_disconn_timeout:
- case cm_event_disconn_reply:
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECT2);
- LASSERT (!conn->ibc_disconnect);
- conn->ibc_disconnect = 1;
-
- /* kibnal_disconnect_conn sent the disconnect request. */
- kibnal_schedule_conn(conn);
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- break;
-
- case cm_event_connected:
- case cm_event_conn_timeout:
- case cm_event_conn_reject:
- LASSERT (conn->ibc_state == IBNAL_CONN_PASSIVE_WAIT);
- conn->ibc_connvars->cv_conndata = *cmdata;
-
- kibnal_schedule_conn(conn);
- break;
- }
-
- kibnal_conn_decref(conn); /* lose my ref */
-}
-
-void
-kibnal_check_passive_wait(kib_conn_t *conn)
-{
- int rc;
-
- switch (conn->ibc_connvars->cv_conndata.status) {
- default:
- LBUG();
-
- case cm_event_connected:
- kibnal_conn_addref(conn); /* ++ ref for CM callback */
- rc = kibnal_set_qp_state(conn, vv_qp_state_rts);
- if (rc != 0)
- conn->ibc_comms_error = rc;
- /* connection _has_ been established; it's just that we've had
- * an error immediately... */
- kibnal_connreq_done(conn, 0, 0);
- break;
-
- case cm_event_conn_timeout:
- kibnal_connreq_done(conn, 0, -ETIMEDOUT);
- break;
-
- case cm_event_conn_reject:
- kibnal_connreq_done(conn, 0, -ECONNRESET);
- break;
- }
-}
-
-void
-kibnal_recv_connreq(cm_cep_handle_t *cep, cm_request_data_t *cmreq)
-{
- static kib_msg_t txmsg;
- static kib_msg_t rxmsg;
- static cm_reply_data_t reply;
-
- kib_conn_t *conn = NULL;
- int rc = 0;
- int reason;
- int rxmsgnob;
- rwlock_t *g_lock = &kibnal_data.kib_global_lock;
- kib_peer_t *peer;
- kib_peer_t *peer2;
- unsigned long flags;
- kib_connvars_t *cv;
- cm_return_t cmrc;
- vv_return_t vvrc;
-
- /* I'm the connd executing in thread context
- * No concurrency problems with static data! */
- LASSERT (!in_interrupt());
- LASSERT (current == kibnal_data.kib_connd);
-
- if (cmreq->sid != (__u64)(*kibnal_tunables.kib_service_number)) {
- CERROR(LPX64" != IBNAL_SERVICE_NUMBER("LPX64")\n",
- cmreq->sid, (__u64)(*kibnal_tunables.kib_service_number));
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- /* copy into rxmsg to avoid alignment issues */
- rxmsgnob = MIN(cm_REQ_priv_data_len, sizeof(rxmsg));
- memcpy(&rxmsg, cmreq->priv_data, rxmsgnob);
-
- rc = kibnal_unpack_msg(&rxmsg, 0, rxmsgnob);
- if (rc != 0) {
- /* SILENT! kibnal_unpack_msg() complains if required */
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- if (rxmsg.ibm_version != IBNAL_MSG_VERSION)
- CWARN("Connection from %s: old protocol version 0x%x\n",
- libcfs_nid2str(rxmsg.ibm_srcnid), rxmsg.ibm_version);
-
- if (rxmsg.ibm_type != IBNAL_MSG_CONNREQ) {
- CERROR("Unexpected connreq msg type: %x from %s\n",
- rxmsg.ibm_type, libcfs_nid2str(rxmsg.ibm_srcnid));
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
- rxmsg.ibm_dstnid)) {
- CERROR("Can't accept %s: bad dst nid %s\n",
- libcfs_nid2str(rxmsg.ibm_srcnid),
- libcfs_nid2str(rxmsg.ibm_dstnid));
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- if (rxmsg.ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) {
- CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n",
- libcfs_nid2str(rxmsg.ibm_srcnid),
- rxmsg.ibm_u.connparams.ibcp_queue_depth,
- IBNAL_MSG_QUEUE_SIZE);
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- if (rxmsg.ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE) {
- CERROR("Can't accept %s: message size %d too big (%d max)\n",
- libcfs_nid2str(rxmsg.ibm_srcnid),
- rxmsg.ibm_u.connparams.ibcp_max_msg_size,
- IBNAL_MSG_SIZE);
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- if (rxmsg.ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Can't accept %s: max frags %d too big (%d max)\n",
- libcfs_nid2str(rxmsg.ibm_srcnid),
- rxmsg.ibm_u.connparams.ibcp_max_frags,
- IBNAL_MAX_RDMA_FRAGS);
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- /* assume 'rxmsg.ibm_srcnid' is a new peer; create */
- rc = kibnal_create_peer (&peer, rxmsg.ibm_srcnid);
- if (rc != 0) {
- CERROR("Can't create peer for %s\n",
- libcfs_nid2str(rxmsg.ibm_srcnid));
- reason = IBNAL_REJECT_NO_RESOURCES;
- goto reject;
- }
-
- write_lock_irqsave(g_lock, flags);
-
- if (kibnal_data.kib_listen_handle == NULL) {
- write_unlock_irqrestore(g_lock, flags);
-
- CWARN ("Shutdown has started, rejecting connreq from %s\n",
- libcfs_nid2str(rxmsg.ibm_srcnid));
- kibnal_peer_decref(peer);
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- peer2 = kibnal_find_peer_locked(rxmsg.ibm_srcnid);
- if (peer2 != NULL) {
- /* tie-break connection race in favour of the higher NID */
- if (peer2->ibp_connecting != 0 &&
- rxmsg.ibm_srcnid < kibnal_data.kib_ni->ni_nid) {
- write_unlock_irqrestore(g_lock, flags);
-
- CWARN("Conn race %s\n",
- libcfs_nid2str(peer2->ibp_nid));
-
- kibnal_peer_decref(peer);
- reason = IBNAL_REJECT_CONN_RACE;
- goto reject;
- }
-
- peer2->ibp_accepting++;
- kibnal_peer_addref(peer2);
-
- write_unlock_irqrestore(g_lock, flags);
- kibnal_peer_decref(peer);
- peer = peer2;
- } else {
- /* Brand new peer */
- LASSERT (peer->ibp_accepting == 0);
- peer->ibp_accepting = 1;
-
- kibnal_peer_addref(peer);
- list_add_tail(&peer->ibp_list, kibnal_nid2peerlist(rxmsg.ibm_srcnid));
-
- write_unlock_irqrestore(g_lock, flags);
- }
-
- conn = kibnal_create_conn(cep);
- if (conn == NULL) {
- CERROR("Can't create conn for %s\n",
- libcfs_nid2str(rxmsg.ibm_srcnid));
- kibnal_peer_connect_failed(peer, 0, -ENOMEM);
- kibnal_peer_decref(peer);
- reason = IBNAL_REJECT_NO_RESOURCES;
- goto reject;
- }
-
- conn->ibc_version = rxmsg.ibm_version;
-
- conn->ibc_peer = peer; /* conn takes over my ref */
- conn->ibc_incarnation = rxmsg.ibm_srcstamp;
- conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
- conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE;
- LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
- <= IBNAL_RX_MSGS);
-
- cv = conn->ibc_connvars;
-
- cv->cv_txpsn = cmreq->cep_data.start_psn;
- cv->cv_remote_qpn = cmreq->cep_data.qpn;
- cv->cv_path = cmreq->path_data.path;
- cv->cv_rnr_count = cmreq->cep_data.rtr_retry_cnt;
- // XXX cmreq->cep_data.retry_cnt;
- cv->cv_port = cmreq->cep_data.local_port_num;
-
- vvrc = gid2gid_index(kibnal_data.kib_hca, cv->cv_port,
- &cv->cv_path.sgid, &cv->cv_sgid_index);
- if (vvrc != vv_return_ok) {
- CERROR("gid2gid_index failed for %s: %d\n",
- libcfs_nid2str(rxmsg.ibm_srcnid), vvrc);
- rc = -EIO;
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- vvrc = pkey2pkey_index(kibnal_data.kib_hca, cv->cv_port,
- cv->cv_path.pkey, &cv->cv_pkey_index);
- if (vvrc != vv_return_ok) {
- CERROR("pkey2pkey_index failed for %s: %d\n",
- libcfs_nid2str(rxmsg.ibm_srcnid), vvrc);
- rc = -EIO;
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- rc = kibnal_set_qp_state(conn, vv_qp_state_init);
- if (rc != 0) {
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- rc = kibnal_post_receives(conn);
- if (rc != 0) {
- CERROR("Can't post receives for %s\n",
- libcfs_nid2str(rxmsg.ibm_srcnid));
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- rc = kibnal_set_qp_state(conn, vv_qp_state_rtr);
- if (rc != 0) {
- reason = IBNAL_REJECT_FATAL;
- goto reject;
- }
-
- memset(&reply, 0, sizeof(reply));
- reply.qpn = cv->cv_local_qpn;
- reply.qkey = IBNAL_QKEY;
- reply.start_psn = cv->cv_rxpsn;
- reply.arb_initiator_depth = IBNAL_ARB_INITIATOR_DEPTH;
- reply.arb_resp_res = IBNAL_ARB_RESP_RES;
- reply.failover_accepted = IBNAL_FAILOVER_ACCEPTED;
- reply.rnr_retry_count = cv->cv_rnr_count;
- reply.targ_ack_delay = kibnal_data.kib_hca_attrs.ack_delay;
-
- /* setup txmsg... */
- memset(&txmsg, 0, sizeof(txmsg));
- kibnal_init_msg(&txmsg, IBNAL_MSG_CONNACK,
- sizeof(txmsg.ibm_u.connparams));
- LASSERT (txmsg.ibm_nob <= cm_REP_priv_data_len);
- txmsg.ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE;
- txmsg.ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE;
- txmsg.ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS;
- kibnal_pack_msg(&txmsg, conn->ibc_version,
- 0, rxmsg.ibm_srcnid, rxmsg.ibm_srcstamp, 0);
-
- /* ...and copy into reply to avoid alignment issues */
- memcpy(&reply.priv_data, &txmsg, txmsg.ibm_nob);
-
- kibnal_set_conn_state(conn, IBNAL_CONN_PASSIVE_WAIT);
-
- cmrc = cm_accept(conn->ibc_cep, &reply, NULL,
- kibnal_cm_callback, conn);
-
- if (cmrc == cm_stat_success)
- return; /* callback has got my ref on conn */
-
- /* back out state change (no callback happening) */
- kibnal_set_conn_state(conn, IBNAL_CONN_INIT);
- rc = -EIO;
- reason = IBNAL_REJECT_FATAL;
-
- reject:
- CDEBUG(D_NET, "Rejecting connreq from %s\n",
- libcfs_nid2str(rxmsg.ibm_srcnid));
-
- kibnal_reject(cep, reason);
-
- if (conn != NULL) {
- LASSERT (rc != 0);
- kibnal_connreq_done(conn, 0, rc);
- } else {
- cm_destroy_cep(cep);
- }
-}
-
-void
-kibnal_listen_callback(cm_cep_handle_t cep, cm_conn_data_t *data, void *arg)
-{
- cm_request_data_t *cmreq = &data->data.request;
- kib_pcreq_t *pcr;
- unsigned long flags;
-
- LASSERT (arg == NULL);
-
- if (data->status != cm_event_conn_request) {
- CERROR("status %d is not cm_event_conn_request\n",
- data->status);
- return;
- }
-
- LIBCFS_ALLOC_ATOMIC(pcr, sizeof(*pcr));
- if (pcr == NULL) {
- CERROR("Can't allocate passive connreq\n");
-
- kibnal_reject(cep, IBNAL_REJECT_NO_RESOURCES);
- cm_destroy_cep(cep);
- return;
- }
-
- pcr->pcr_cep = cep;
- pcr->pcr_cmreq = *cmreq;
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- list_add_tail(&pcr->pcr_list, &kibnal_data.kib_connd_pcreqs);
- wake_up(&kibnal_data.kib_connd_waitq);
-spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
-}
-
-
-void
-kibnal_active_connect_callback (cm_cep_handle_t cep, cm_conn_data_t *cd,
- void *arg)
-{
- /* CAVEAT EMPTOR: tasklet context */
- kib_conn_t *conn = (kib_conn_t *)arg;
- kib_connvars_t *cv = conn->ibc_connvars;
-
- LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT);
- cv->cv_conndata = *cd;
-
- kibnal_schedule_conn(conn);
- kibnal_conn_decref(conn);
-}
-
-void
-kibnal_connect_conn (kib_conn_t *conn)
-{
- static cm_request_data_t cmreq;
- static kib_msg_t msg;
-
- kib_connvars_t *cv = conn->ibc_connvars;
- kib_peer_t *peer = conn->ibc_peer;
- cm_return_t cmrc;
-
- /* Only called by connd => statics OK */
- LASSERT (!in_interrupt());
- LASSERT (current == kibnal_data.kib_connd);
- LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_ARP);
-
- memset(&cmreq, 0, sizeof(cmreq));
-
- cmreq.sid = (__u64)(*kibnal_tunables.kib_service_number);
-
- cmreq.cep_data.ca_guid = kibnal_data.kib_hca_attrs.guid;
- cmreq.cep_data.qpn = cv->cv_local_qpn;
- cmreq.cep_data.retry_cnt = *kibnal_tunables.kib_retry_cnt;
- cmreq.cep_data.rtr_retry_cnt = *kibnal_tunables.kib_rnr_cnt;
- cmreq.cep_data.start_psn = cv->cv_rxpsn;
- cmreq.cep_data.end_to_end_flow_ctrl = IBNAL_EE_FLOW_CNT;
- // XXX ack_timeout?
- // offered_resp_res
- // offered_initiator_depth
-
- cmreq.path_data.subn_local = IBNAL_LOCAL_SUB;
- cmreq.path_data.path = cv->cv_path;
-
- /* setup msg... */
- memset(&msg, 0, sizeof(msg));
- kibnal_init_msg(&msg, IBNAL_MSG_CONNREQ, sizeof(msg.ibm_u.connparams));
- LASSERT(msg.ibm_nob <= cm_REQ_priv_data_len);
- msg.ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE;
- msg.ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE;
- msg.ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS;
- kibnal_pack_msg(&msg, conn->ibc_version, 0, peer->ibp_nid, 0, 0);
-
- if (the_lnet.ln_testprotocompat != 0) {
- /* single-shot proto check */
- LNET_LOCK();
- if ((the_lnet.ln_testprotocompat & 1) != 0) {
- msg.ibm_version++;
- the_lnet.ln_testprotocompat &= ~1;
- }
- if ((the_lnet.ln_testprotocompat & 2) != 0) {
- msg.ibm_magic = LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~2;
- }
- LNET_UNLOCK();
- }
-
- /* ...and copy into cmreq to avoid alignment issues */
- memcpy(&cmreq.priv_data, &msg, msg.ibm_nob);
-
- CDEBUG(D_NET, "Connecting %p to %s\n", conn,
- libcfs_nid2str(peer->ibp_nid));
-
- kibnal_conn_addref(conn); /* ++ref for CM callback */
- kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_CONNECT);
-
- cmrc = cm_connect(conn->ibc_cep, &cmreq,
- kibnal_active_connect_callback, conn);
- if (cmrc == cm_stat_success) {
- CDEBUG(D_NET, "connection REQ sent to %s\n",
- libcfs_nid2str(peer->ibp_nid));
- return;
- }
-
- CERROR ("Connect %s failed: %d\n", libcfs_nid2str(peer->ibp_nid), cmrc);
- kibnal_conn_decref(conn); /* drop callback's ref */
- kibnal_connreq_done(conn, 1, -EHOSTUNREACH);
-}
-
-void
-kibnal_reconnect (kib_conn_t *conn, int why)
-{
- kib_peer_t *peer = conn->ibc_peer;
- int retry;
- unsigned long flags;
- cm_return_t cmrc;
- cm_cep_handle_t cep;
-
- LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT);
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- LASSERT (peer->ibp_connecting > 0); /* 'conn' at least */
-
- /* retry connection if it's still needed and no other connection
- * attempts (active or passive) are in progress.
- * Immediate reconnect is required, so I don't even look at the
- * reconnection timeout etc */
-
- retry = (!list_empty(&peer->ibp_tx_queue) &&
- peer->ibp_connecting == 1 &&
- peer->ibp_accepting == 0);
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- if (!retry) {
- kibnal_connreq_done(conn, 1, why);
- return;
- }
-
- cep = cm_create_cep(cm_cep_transp_rc);
- if (cep == NULL) {
- CERROR("Can't create new CEP\n");
- kibnal_connreq_done(conn, 1, -ENOMEM);
- return;
- }
-
- cmrc = cm_cancel(conn->ibc_cep);
- LASSERT (cmrc == cm_stat_success);
- cmrc = cm_destroy_cep(conn->ibc_cep);
- LASSERT (cmrc == cm_stat_success);
-
- conn->ibc_cep = cep;
-
- /* reuse conn; no need to peer->ibp_connecting++ */
- kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_ARP);
- kibnal_connect_conn(conn);
-}
-
-void
-kibnal_check_connreply (kib_conn_t *conn)
-{
- static cm_rtu_data_t rtu;
- static kib_msg_t msg;
-
- kib_connvars_t *cv = conn->ibc_connvars;
- cm_reply_data_t *reply = &cv->cv_conndata.data.reply;
- kib_peer_t *peer = conn->ibc_peer;
- int msgnob;
- cm_return_t cmrc;
- unsigned long flags;
- int rc;
-
- /* Only called by connd => statics OK */
- LASSERT (!in_interrupt());
- LASSERT (current == kibnal_data.kib_connd);
- LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT);
-
- if (cv->cv_conndata.status == cm_event_conn_reply) {
- cv->cv_remote_qpn = reply->qpn;
- cv->cv_txpsn = reply->start_psn;
- // XXX reply->targ_ack_delay;
- cv->cv_rnr_count = reply->rnr_retry_count;
-
- kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_CHECK_REPLY);
-
- /* copy into msg to avoid alignment issues */
- msgnob = MIN(cm_REP_priv_data_len, sizeof(msg));
- memcpy(&msg, &reply->priv_data, msgnob);
-
- rc = kibnal_unpack_msg(&msg, conn->ibc_version, msgnob);
- if (rc != 0) {
- CERROR("Can't unpack reply from %s\n",
- libcfs_nid2str(peer->ibp_nid));
- kibnal_connreq_done(conn, 1, rc);
- return;
- }
-
- if (msg.ibm_type != IBNAL_MSG_CONNACK ) {
- CERROR("Unexpected message type %d from %s\n",
- msg.ibm_type, libcfs_nid2str(peer->ibp_nid));
- kibnal_connreq_done(conn, 1, -EPROTO);
- return;
- }
-
- if (msg.ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) {
- CERROR("%s has incompatible queue depth %d(%d wanted)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg.ibm_u.connparams.ibcp_queue_depth,
- IBNAL_MSG_QUEUE_SIZE);
- kibnal_connreq_done(conn, 1, -EPROTO);
- return;
- }
-
- if (msg.ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE) {
- CERROR("%s max message size %d too big (%d max)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg.ibm_u.connparams.ibcp_max_msg_size,
- IBNAL_MSG_SIZE);
- kibnal_connreq_done(conn, 1, -EPROTO);
- return;
- }
-
- if (msg.ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("%s max frags %d too big (%d max)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg.ibm_u.connparams.ibcp_max_frags,
- IBNAL_MAX_RDMA_FRAGS);
- kibnal_connreq_done(conn, 1, -EPROTO);
- return;
- }
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- if (lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
- msg.ibm_dstnid) &&
- msg.ibm_dststamp == kibnal_data.kib_incarnation)
- rc = 0;
- else
- rc = -ESTALE;
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- if (rc != 0) {
- CERROR("Stale connection reply from %s\n",
- libcfs_nid2str(peer->ibp_nid));
- kibnal_connreq_done(conn, 1, rc);
- return;
- }
-
- conn->ibc_incarnation = msg.ibm_srcstamp;
- conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
- conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE;
- LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
- <= IBNAL_RX_MSGS);
-
- rc = kibnal_post_receives(conn);
- if (rc != 0) {
- CERROR("Can't post receives for %s\n",
- libcfs_nid2str(peer->ibp_nid));
- kibnal_connreq_done(conn, 1, rc);
- return;
- }
-
- rc = kibnal_set_qp_state(conn, vv_qp_state_rtr);
- if (rc != 0) {
- kibnal_connreq_done(conn, 1, rc);
- return;
- }
-
- rc = kibnal_set_qp_state(conn, vv_qp_state_rts);
- if (rc != 0) {
- kibnal_connreq_done(conn, 1, rc);
- return;
- }
-
- kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_RTU);
- kibnal_conn_addref(conn); /* ++for CM callback */
-
- memset(&rtu, 0, sizeof(rtu));
- cmrc = cm_accept(conn->ibc_cep, NULL, &rtu,
- kibnal_cm_callback, conn);
- if (cmrc == cm_stat_success) {
- /* Now I'm racing with disconnect signalled by
- * kibnal_cm_callback */
- kibnal_connreq_done(conn, 1, 0);
- return;
- }
-
- CERROR("cm_accept %s failed: %d\n",
- libcfs_nid2str(peer->ibp_nid), cmrc);
- /* Back out of RTU: no callback coming */
- kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_CHECK_REPLY);
- kibnal_conn_decref(conn);
- kibnal_connreq_done(conn, 1, -EIO);
- return;
- }
-
- if (cv->cv_conndata.status == cm_event_conn_reject) {
-
- if (cv->cv_conndata.data.reject.reason == cm_rej_code_usr_rej) {
- unsigned char *bytes =
- cv->cv_conndata.data.reject.priv_data;
- int magic = (bytes[0]) |
- (bytes[1] << 8) |
- (bytes[2] << 16) |
- (bytes[3] << 24);
- int version = (bytes[4]) |
- (bytes[5] << 8);
- int why = (bytes[6]);
-
- /* Expected proto/version: she just doesn't like me (or
- * ran out of resources) */
- if (magic == IBNAL_MSG_MAGIC &&
- version == conn->ibc_version) {
- CERROR("conn -> %s rejected: fatal error %d\n",
- libcfs_nid2str(peer->ibp_nid), why);
-
- if (why == IBNAL_REJECT_CONN_RACE)
- kibnal_reconnect(conn, -EALREADY);
- else
- kibnal_connreq_done(conn, 1, -ECONNREFUSED);
- return;
- }
-
- /* Fail unless it's worth retrying with an old proto
- * version */
- if (!(magic == IBNAL_MSG_MAGIC &&
- version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD &&
- conn->ibc_version == IBNAL_MSG_VERSION)) {
- CERROR("conn -> %s rejected: bad protocol "
- "magic/ver %08x/%x why %d\n",
- libcfs_nid2str(peer->ibp_nid),
- magic, version, why);
-
- kibnal_connreq_done(conn, 1, -ECONNREFUSED);
- return;
- }
-
- conn->ibc_version = version;
- CWARN ("Connection to %s refused: "
- "retrying with old protocol version 0x%x\n",
- libcfs_nid2str(peer->ibp_nid), version);
-
- kibnal_reconnect(conn, -ECONNREFUSED);
- return;
- } else if (cv->cv_conndata.data.reject.reason ==
- cm_rej_code_stale_conn) {
-
- CWARN ("conn -> %s stale: retrying\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kibnal_reconnect(conn, -ESTALE);
- return;
- } else {
- CDEBUG(D_NETERROR, "conn -> %s rejected: reason %d\n",
- libcfs_nid2str(peer->ibp_nid),
- cv->cv_conndata.data.reject.reason);
- kibnal_connreq_done(conn, 1, -ECONNREFUSED);
- return;
- }
- /* NOT REACHED */
- }
-
- CDEBUG(D_NETERROR, "conn -> %s failed: %d\n",
- libcfs_nid2str(peer->ibp_nid), cv->cv_conndata.status);
- kibnal_connreq_done(conn, 1, -ECONNABORTED);
-}
-
-void
-kibnal_arp_done (kib_conn_t *conn)
-{
- kib_peer_t *peer = conn->ibc_peer;
- kib_connvars_t *cv = conn->ibc_connvars;
- ibat_arp_data_t *arp = &cv->cv_arp;
- ib_path_record_v2_t *path = &cv->cv_path;
- vv_return_t vvrc;
- int rc;
- unsigned long flags;
-
- LASSERT (!in_interrupt());
- LASSERT (current == kibnal_data.kib_connd);
- LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_ARP);
- LASSERT (peer->ibp_arp_count > 0);
-
- if (cv->cv_arprc != ibat_stat_ok) {
- CDEBUG(D_NETERROR, "Arp %s @ %u.%u.%u.%u failed: %d\n",
- libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip),
- cv->cv_arprc);
- goto failed;
- }
-
- if ((arp->mask & IBAT_PRI_PATH_VALID) != 0) {
- CDEBUG(D_NET, "Got valid path for %s\n",
- libcfs_nid2str(peer->ibp_nid));
-
- *path = *arp->primary_path;
-
- vvrc = base_gid2port_num(kibnal_data.kib_hca, &path->sgid,
- &cv->cv_port);
- if (vvrc != vv_return_ok) {
- CWARN("base_gid2port_num failed for %s @ %u.%u.%u.%u: %d\n",
- libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), vvrc);
- goto failed;
- }
-
- vvrc = gid2gid_index(kibnal_data.kib_hca, cv->cv_port,
- &path->sgid, &cv->cv_sgid_index);
- if (vvrc != vv_return_ok) {
- CWARN("gid2gid_index failed for %s @ %u.%u.%u.%u: %d\n",
- libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), vvrc);
- goto failed;
- }
-
- vvrc = pkey2pkey_index(kibnal_data.kib_hca, cv->cv_port,
- path->pkey, &cv->cv_pkey_index);
- if (vvrc != vv_return_ok) {
- CWARN("pkey2pkey_index failed for %s @ %u.%u.%u.%u: %d\n",
- libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), vvrc);
- goto failed;
- }
-
- path->mtu = IBNAL_IB_MTU;
-
- } else if ((arp->mask & IBAT_LID_VALID) != 0) {
- CWARN("Creating new path record for %s @ %u.%u.%u.%u\n",
- libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip));
-
- cv->cv_pkey_index = IBNAL_PKEY_IDX;
- cv->cv_sgid_index = IBNAL_SGID_IDX;
- cv->cv_port = arp->local_port_num;
-
- memset(path, 0, sizeof(*path));
-
- vvrc = port_num2base_gid(kibnal_data.kib_hca, cv->cv_port,
- &path->sgid);
- if (vvrc != vv_return_ok) {
- CWARN("port_num2base_gid failed for %s @ %u.%u.%u.%u: %d\n",
- libcfs_nid2str(peer->ibp_ip),
- HIPQUAD(peer->ibp_ip), vvrc);
- goto failed;
- }
-
- vvrc = port_num2base_lid(kibnal_data.kib_hca, cv->cv_port,
- &path->slid);
- if (vvrc != vv_return_ok) {
- CWARN("port_num2base_lid failed for %s @ %u.%u.%u.%u: %d\n",
- libcfs_nid2str(peer->ibp_ip),
- HIPQUAD(peer->ibp_ip), vvrc);
- goto failed;
- }
-
- path->dgid = arp->gid;
- path->sl = IBNAL_SERVICE_LEVEL;
- path->dlid = arp->lid;
- path->mtu = IBNAL_IB_MTU;
- path->rate = IBNAL_STATIC_RATE;
- path->pkt_life_time = IBNAL_PKT_LIFETIME;
- path->pkey = IBNAL_PKEY;
- path->traffic_class = IBNAL_TRAFFIC_CLASS;
- } else {
- CWARN("Arp for %s @ %u.%u.%u.%u returned neither PATH nor LID\n",
- libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip));
- goto failed;
- }
-
- rc = kibnal_set_qp_state(conn, vv_qp_state_init);
- if (rc != 0) {
- kibnal_connreq_done(conn, 1, rc);
- }
-
- /* do the actual connection request */
- kibnal_connect_conn(conn);
- return;
-
- failed:
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- peer->ibp_arp_count--;
- if (peer->ibp_arp_count == 0) {
- /* final ARP attempt failed */
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- CDEBUG(D_NETERROR, "Arp %s @ %u.%u.%u.%u failed (final attempt)\n",
- libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip));
- } else {
- /* Retry ARP: ibp_connecting++ so terminating conn
- * doesn't end peer's connection attempt */
- peer->ibp_connecting++;
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- CDEBUG(D_NETERROR, "Arp %s @ %u.%u.%u.%u failed (%d attempts left)\n",
- libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip),
- peer->ibp_arp_count);
-
- kibnal_schedule_peer_arp(peer);
- }
- kibnal_connreq_done(conn, 1, -ENETUNREACH);
-}
-
-void
-kibnal_arp_callback (ibat_stat_t arprc, ibat_arp_data_t *arp_data, void *arg)
-{
- /* CAVEAT EMPTOR: tasklet context */
- kib_peer_t *peer;
- kib_conn_t *conn = (kib_conn_t *)arg;
-
- LASSERT (conn != NULL);
- LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_ARP);
-
- peer = conn->ibc_peer;
-
- if (arprc != ibat_stat_ok)
- CDEBUG(D_NETERROR, "Arp %s at %u.%u.%u.%u failed: %d\n",
- libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), arprc);
- else
- CDEBUG(D_NET, "Arp %s at %u.%u.%u.%u OK: LID %s PATH %s\n",
- libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip),
- (arp_data->mask & IBAT_LID_VALID) == 0 ? "invalid" : "valid",
- (arp_data->mask & IBAT_PRI_PATH_VALID) == 0 ? "invalid" : "valid");
-
- conn->ibc_connvars->cv_arprc = arprc;
- if (arprc == ibat_stat_ok)
- conn->ibc_connvars->cv_arp = *arp_data;
-
- kibnal_schedule_conn(conn);
- kibnal_conn_decref(conn);
-}
-
-void
-kibnal_arp_peer (kib_peer_t *peer)
-{
- cm_cep_handle_t cep;
- kib_conn_t *conn;
- int ibatrc;
-
- /* Only the connd does this (i.e. single threaded) */
- LASSERT (current == kibnal_data.kib_connd);
- LASSERT (peer->ibp_connecting != 0);
- LASSERT (peer->ibp_arp_count > 0);
-
- cep = cm_create_cep(cm_cep_transp_rc);
- if (cep == NULL) {
- CERROR ("Can't create cep for conn->%s\n",
- libcfs_nid2str(peer->ibp_nid));
- kibnal_peer_connect_failed(peer, 1, -ENOMEM);
- return;
- }
-
- conn = kibnal_create_conn(cep);
- if (conn == NULL) {
- CERROR ("Can't allocate conn->%s\n",
- libcfs_nid2str(peer->ibp_nid));
- cm_destroy_cep(cep);
- kibnal_peer_connect_failed(peer, 1, -ENOMEM);
- return;
- }
-
- conn->ibc_peer = peer;
- kibnal_peer_addref(peer);
-
- kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_ARP);
-
- ibatrc = ibat_get_ib_data(htonl(peer->ibp_ip), INADDR_ANY,
- ibat_paths_primary,
- &conn->ibc_connvars->cv_arp,
- kibnal_arp_callback, conn, 0);
- CDEBUG(D_NET,"ibatrc %d\n", ibatrc);
- switch (ibatrc) {
- default:
- LBUG();
-
- case ibat_stat_pending:
- /* NB callback has my ref on conn */
- break;
-
- case ibat_stat_ok:
- case ibat_stat_error:
- case ibat_stat_timeout:
- case ibat_stat_not_found:
- /* Immediate return (ARP cache hit or failure) == no callback.
- * Do the next stage directly... */
- conn->ibc_connvars->cv_arprc = ibatrc;
- kibnal_arp_done(conn);
- kibnal_conn_decref(conn);
- break;
- }
-}
-
-int
-kibnal_check_txs (kib_conn_t *conn, struct list_head *txs)
-{
- kib_tx_t *tx;
- struct list_head *ttmp;
- int timed_out = 0;
-
- spin_lock(&conn->ibc_lock);
-
- list_for_each (ttmp, txs) {
- tx = list_entry (ttmp, kib_tx_t, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_waiting || tx->tx_sending != 0);
- } else {
- LASSERT (tx->tx_queued);
- }
-
- if (time_after_eq (jiffies, tx->tx_deadline)) {
- timed_out = 1;
- break;
- }
- }
-
- spin_unlock(&conn->ibc_lock);
- return timed_out;
-}
-
-int
-kibnal_conn_timed_out (kib_conn_t *conn)
-{
- return kibnal_check_txs(conn, &conn->ibc_tx_queue) ||
- kibnal_check_txs(conn, &conn->ibc_tx_queue_rsrvd) ||
- kibnal_check_txs(conn, &conn->ibc_tx_queue_nocred) ||
- kibnal_check_txs(conn, &conn->ibc_active_txs);
-}
-
-void
-kibnal_check_conns (int idx)
-{
- struct list_head *peers = &kibnal_data.kib_peers[idx];
- struct list_head *ptmp;
- kib_peer_t *peer;
- kib_conn_t *conn;
- struct list_head *ctmp;
- unsigned long flags;
-
- again:
- /* NB. We expect to have a look at all the peers and not find any
- * rdmas to time out, so we just use a shared lock while we
- * take a look... */
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- list_for_each (ptmp, peers) {
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
-
- list_for_each (ctmp, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED);
-
- /* In case we have enough credits to return via a
- * NOOP, but there were no non-blocking tx descs
- * free to do it last time... */
- kibnal_check_sends(conn);
-
- if (!kibnal_conn_timed_out(conn))
- continue;
-
- /* Handle timeout by closing the whole connection. We
- * can only be sure RDMA activity has ceased once the
- * QP has been modified. */
-
- kibnal_conn_addref(conn); /* 1 ref for me... */
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
-
- CERROR("Timed out RDMA with %s\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kibnal_close_conn (conn, -ETIMEDOUT);
- kibnal_conn_decref(conn); /* ...until here */
-
- /* start again now I've dropped the lock */
- goto again;
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-}
-
-void
-kibnal_disconnect_conn (kib_conn_t *conn)
-{
- static cm_drequest_data_t dreq; /* just for the space */
-
- cm_return_t cmrc;
- unsigned long flags;
-
- LASSERT (!in_interrupt());
- LASSERT (current == kibnal_data.kib_connd);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (conn->ibc_disconnect) {
- /* Had the CM callback already */
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- kibnal_conn_disconnected(conn);
- return;
- }
-
- LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECT1);
-
- /* active disconnect */
- cmrc = cm_disconnect(conn->ibc_cep, &dreq, NULL);
- if (cmrc == cm_stat_success) {
- /* waiting for CM */
- conn->ibc_state = IBNAL_CONN_DISCONNECT2;
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return;
- }
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- cm_cancel(conn->ibc_cep);
- cfs_pause(cfs_time_seconds(1)/10);
-
- if (!conn->ibc_disconnect) /* CM callback will never happen now */
- kibnal_conn_decref(conn);
-
- LASSERT (atomic_read(&conn->ibc_refcount) > 0);
- LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECT1);
-
- kibnal_conn_disconnected(conn);
-}
-
-int
-kibnal_connd (void *arg)
-{
- wait_queue_t wait;
- unsigned long flags;
- kib_pcreq_t *pcr;
- kib_conn_t *conn;
- kib_peer_t *peer;
- int timeout;
- int i;
- int dropped_lock;
- int peer_index = 0;
- unsigned long deadline = jiffies;
-
- cfs_daemonize ("kibnal_connd");
- cfs_block_allsigs ();
-
- init_waitqueue_entry (&wait, current);
- kibnal_data.kib_connd = current;
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- while (!kibnal_data.kib_shutdown) {
-
- dropped_lock = 0;
-
- if (!list_empty (&kibnal_data.kib_connd_zombies)) {
- conn = list_entry (kibnal_data.kib_connd_zombies.next,
- kib_conn_t, ibc_list);
- list_del (&conn->ibc_list);
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
- dropped_lock = 1;
-
- kibnal_destroy_conn(conn);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- if (!list_empty (&kibnal_data.kib_connd_pcreqs)) {
- pcr = list_entry(kibnal_data.kib_connd_pcreqs.next,
- kib_pcreq_t, pcr_list);
- list_del(&pcr->pcr_list);
-
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
- dropped_lock = 1;
-
- kibnal_recv_connreq(pcr->pcr_cep, &pcr->pcr_cmreq);
- LIBCFS_FREE(pcr, sizeof(*pcr));
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
- }
-
- if (!list_empty (&kibnal_data.kib_connd_peers)) {
- peer = list_entry (kibnal_data.kib_connd_peers.next,
- kib_peer_t, ibp_connd_list);
-
- list_del_init (&peer->ibp_connd_list);
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
- dropped_lock = 1;
-
- kibnal_arp_peer (peer);
- kibnal_peer_decref (peer);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- if (!list_empty (&kibnal_data.kib_connd_conns)) {
- conn = list_entry (kibnal_data.kib_connd_conns.next,
- kib_conn_t, ibc_list);
- list_del (&conn->ibc_list);
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
- dropped_lock = 1;
-
- switch (conn->ibc_state) {
- default:
- LBUG();
-
- case IBNAL_CONN_ACTIVE_ARP:
- kibnal_arp_done(conn);
- break;
-
- case IBNAL_CONN_ACTIVE_CONNECT:
- kibnal_check_connreply(conn);
- break;
-
- case IBNAL_CONN_PASSIVE_WAIT:
- kibnal_check_passive_wait(conn);
- break;
-
- case IBNAL_CONN_DISCONNECT1:
- case IBNAL_CONN_DISCONNECT2:
- kibnal_disconnect_conn(conn);
- break;
- }
- kibnal_conn_decref(conn);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- /* careful with the jiffy wrap... */
- timeout = (int)(deadline - jiffies);
- if (timeout <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = kibnal_data.kib_peer_hash_size;
-
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
- dropped_lock = 1;
-
- /* Time to check for RDMA timeouts on a few more
- * peers: I do checks every 'p' seconds on a
- * proportion of the peer table and I need to check
- * every connection 'n' times within a timeout
- * interval, to ensure I detect a timeout on any
- * connection within (n+1)/n times the timeout
- * interval. */
-
- if (*kibnal_tunables.kib_timeout > n * p)
- chunk = (chunk * n * p) /
- *kibnal_tunables.kib_timeout;
- if (chunk == 0)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kibnal_check_conns (peer_index);
- peer_index = (peer_index + 1) %
- kibnal_data.kib_peer_hash_size;
- }
-
- deadline += p * HZ;
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
- }
-
- if (dropped_lock)
- continue;
-
- /* Nothing to do for 'timeout' */
- set_current_state (TASK_INTERRUPTIBLE);
- add_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- schedule_timeout (timeout);
-
- set_current_state (TASK_RUNNING);
- remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- kibnal_thread_fini ();
- return (0);
-}
-
-void
-kibnal_async_callback(vv_event_record_t ev)
-{
- CERROR("type: %d, port: %d, data: "LPX64"\n",
- ev.event_type, ev.port_num, ev.type.data);
-}
-
-void
-kibnal_cq_callback (unsigned long unused_context)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- kibnal_data.kib_ready = 1;
- wake_up(&kibnal_data.kib_sched_waitq);
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-}
-
-int
-kibnal_scheduler(void *arg)
-{
- long id = (long)arg;
- wait_queue_t wait;
- char name[16];
- vv_wc_t wc;
- vv_return_t vvrc;
- vv_return_t vvrc2;
- unsigned long flags;
- kib_rx_t *rx;
- __u64 rxseq = 0;
- int busy_loops = 0;
-
- snprintf(name, sizeof(name), "kibnal_sd_%02ld", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- init_waitqueue_entry(&wait, current);
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
-
- while (!kibnal_data.kib_shutdown) {
- if (busy_loops++ >= IBNAL_RESCHED) {
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- our_cond_resched();
- busy_loops = 0;
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- }
-
- if (kibnal_data.kib_ready &&
- !kibnal_data.kib_checking_cq) {
- /* take ownership of completion polling */
- kibnal_data.kib_checking_cq = 1;
- /* Assume I'll exhaust the CQ */
- kibnal_data.kib_ready = 0;
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- vvrc = vv_poll_for_completion(kibnal_data.kib_hca,
- kibnal_data.kib_cq, &wc);
- if (vvrc == vv_return_err_cq_empty) {
- vvrc2 = vv_request_completion_notification(
- kibnal_data.kib_hca,
- kibnal_data.kib_cq,
- vv_next_solicit_unsolicit_event);
- LASSERT (vvrc2 == vv_return_ok);
- }
-
- if (vvrc == vv_return_ok &&
- kibnal_wreqid2type(wc.wr_id) == IBNAL_WID_RX) {
- rx = (kib_rx_t *)kibnal_wreqid2ptr(wc.wr_id);
-
- /* Grab the RX sequence number NOW before
- * anyone else can get an RX completion */
- rxseq = rx->rx_conn->ibc_rxseq++;
- }
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- /* give up ownership of completion polling */
- kibnal_data.kib_checking_cq = 0;
-
- if (vvrc == vv_return_err_cq_empty)
- continue;
-
- LASSERT (vvrc == vv_return_ok);
- /* Assume there's more: get another scheduler to check
- * while I handle this completion... */
-
- kibnal_data.kib_ready = 1;
- wake_up(&kibnal_data.kib_sched_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- switch (kibnal_wreqid2type(wc.wr_id)) {
- case IBNAL_WID_RX:
- kibnal_rx_complete(
- (kib_rx_t *)kibnal_wreqid2ptr(wc.wr_id),
- wc.completion_status,
- wc.num_bytes_transfered,
- rxseq);
- break;
-
- case IBNAL_WID_TX:
- kibnal_tx_complete(
- (kib_tx_t *)kibnal_wreqid2ptr(wc.wr_id),
- wc.completion_status);
- break;
-
- case IBNAL_WID_RDMA:
- /* We only get RDMA completion notification if
- * it fails. So we just ignore them completely
- * because...
- *
- * 1) If an RDMA fails, all subsequent work
- * items, including the final SEND will fail
- * too, so I'm still guaranteed to notice that
- * this connection is hosed.
- *
- * 2) It's positively dangerous to look inside
- * the tx descriptor obtained from an RDMA work
- * item. As soon as I drop the kib_sched_lock,
- * I give a scheduler on another CPU a chance
- * to get the final SEND completion, so the tx
- * descriptor can get freed as I inspect it. */
- CDEBUG(D_NETERROR, "RDMA failed: %d\n",
- wc.completion_status);
- break;
-
- default:
- LBUG();
- }
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- continue;
- }
-
- /* Nothing to do; sleep... */
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&kibnal_data.kib_sched_waitq, &wait);
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- schedule();
-
- remove_wait_queue(&kibnal_data.kib_sched_waitq, &wait);
- set_current_state(TASK_RUNNING);
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- }
-
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-
- kibnal_thread_fini();
- return (0);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "viblnd.h"
-
-static int service_number = 0x11b9a2;
-CFS_MODULE_PARM(service_number, "i", int, 0444,
- "IB service number");
-
-static int min_reconnect_interval = 1;
-CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644,
- "minimum connection retry interval (seconds)");
-
-static int max_reconnect_interval = 60;
-CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644,
- "maximum connection retry interval (seconds)");
-
-static int concurrent_peers = 1152;
-CFS_MODULE_PARM(concurrent_peers, "i", int, 0444,
- "maximum number of peers that may connect");
-
-static int cksum = 0;
-CFS_MODULE_PARM(cksum, "i", int, 0644,
- "set non-zero to enable message (not RDMA) checksums");
-
-static int timeout = 50;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
- "timeout (seconds)");
-
-static int ntx = 256;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
- "# of message descriptors");
-
-static int credits = 128;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "# concurrent sends");
-
-static int peer_credits = 8;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
- "# concurrent sends to 1 peer");
-
-static int arp_retries = 3;
-CFS_MODULE_PARM(arp_retries, "i", int, 0644,
- "# of times to retry ARP");
-
-static char *hca_basename = "InfiniHost";
-CFS_MODULE_PARM(hca_basename, "s", charp, 0444,
- "HCA base name");
-
-static char *ipif_basename = "ipoib";
-CFS_MODULE_PARM(ipif_basename, "s", charp, 0444,
- "IPoIB interface base name");
-
-static int local_ack_timeout = 0x12;
-CFS_MODULE_PARM(local_ack_timeout, "i", int, 0644,
- "ACK timeout for low-level 'sends'");
-
-static int retry_cnt = 7;
-CFS_MODULE_PARM(retry_cnt, "i", int, 0644,
- "Retransmissions when no ACK received");
-
-static int rnr_cnt = 6;
-CFS_MODULE_PARM(rnr_cnt, "i", int, 0644,
- "RNR retransmissions");
-
-static int rnr_nak_timer = 0x10;
-CFS_MODULE_PARM(rnr_nak_timer, "i", int, 0644,
- "RNR retransmission interval");
-
-static int keepalive = 100;
-CFS_MODULE_PARM(keepalive, "i", int, 0644,
- "Idle time in seconds before sending a keepalive");
-
-static int concurrent_sends = IBNAL_RX_MSGS;
-CFS_MODULE_PARM(concurrent_sends, "i", int, 0644,
- "send work-queue sizing");
-
-#if IBNAL_USE_FMR
-static int fmr_remaps = 1000;
-CFS_MODULE_PARM(fmr_remaps, "i", int, 0444,
- "FMR mappings allowed before unmap");
-#endif
-
-kib_tunables_t kibnal_tunables = {
- .kib_service_number = &service_number,
- .kib_min_reconnect_interval = &min_reconnect_interval,
- .kib_max_reconnect_interval = &max_reconnect_interval,
- .kib_concurrent_peers = &concurrent_peers,
- .kib_cksum = &cksum,
- .kib_timeout = &timeout,
- .kib_ntx = &ntx,
- .kib_credits = &credits,
- .kib_peercredits = &peer_credits,
- .kib_arp_retries = &arp_retries,
- .kib_hca_basename = &hca_basename,
- .kib_ipif_basename = &ipif_basename,
- .kib_local_ack_timeout = &local_ack_timeout,
- .kib_retry_cnt = &retry_cnt,
- .kib_rnr_cnt = &rnr_cnt,
- .kib_rnr_nak_timer = &rnr_nak_timer,
- .kib_keepalive = &keepalive,
- .kib_concurrent_sends = &concurrent_sends,
-#if IBNAL_USE_FMR
- .kib_fmr_remaps = &fmr_remaps,
-#endif
-};
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
-static char hca_basename_space[32];
-static char ipif_basename_space[32];
-
-static cfs_sysctl_table_t kibnal_ctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "service_number",
- .data = &service_number,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 2,
- .procname = "min_reconnect_interval",
- .data = &min_reconnect_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 3,
- .procname = "max_reconnect_interval",
- .data = &max_reconnect_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 4,
- .procname = "concurrent_peers",
- .data = &concurrent_peers,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 5,
- .procname = "cksum",
- .data = &cksum,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 6,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 7,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 8,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 9,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 10,
- .procname = "arp_retries",
- .data = &arp_retries,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 11,
- .procname = "hca_basename",
- .data = hca_basename_space,
- .maxlen = sizeof(hca_basename_space),
- .mode = 0444,
- .proc_handler = &proc_dostring
- },
- {
- .ctl_name = 12,
- .procname = "ipif_basename",
- .data = ipif_basename_space,
- .maxlen = sizeof(ipif_basename_space),
- .mode = 0444,
- .proc_handler = &proc_dostring
- },
- {
- .ctl_name = 13,
- .procname = "local_ack_timeout",
- .data = &local_ack_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 14,
- .procname = "retry_cnt",
- .data = &retry_cnt,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 15,
- .procname = "rnr_cnt",
- .data = &rnr_cnt,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 16,
- .procname = "rnr_nak_timer",
- .data = &rnr_nak_timer,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 17,
- .procname = "keepalive",
- .data = &keepalive,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 18,
- .procname = "concurrent_sends",
- .data = &concurrent_sends,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
-#if IBNAL_USE_FMR
- {
- .ctl_name = 19,
- .procname = "fmr_remaps",
- .data = &fmr_remaps,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
-#endif
- {0}
-};
-
-static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
- {
- .ctl_name = 203,
- .procname = "vibnal",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = kibnal_ctl_table
- },
- {0}
-};
-
-void
-kibnal_initstrtunable(char *space, char *str, int size)
-{
- strncpy(space, str, size);
- space[size-1] = 0;
-}
-
-int
-kibnal_tunables_init ()
-{
- kibnal_initstrtunable(hca_basename_space, hca_basename,
- sizeof(hca_basename_space));
- kibnal_initstrtunable(ipif_basename_space, ipif_basename,
- sizeof(ipif_basename_space));
-
- kibnal_tunables.kib_sysctl =
- cfs_register_sysctl_table(kibnal_top_ctl_table, 0);
-
- if (kibnal_tunables.kib_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-
- if (*kibnal_tunables.kib_concurrent_sends > IBNAL_RX_MSGS)
- *kibnal_tunables.kib_concurrent_sends = IBNAL_RX_MSGS;
- if (*kibnal_tunables.kib_concurrent_sends < IBNAL_MSG_QUEUE_SIZE)
- *kibnal_tunables.kib_concurrent_sends = IBNAL_MSG_QUEUE_SIZE;
-
- return 0;
-}
-
-void
-kibnal_tunables_fini ()
-{
- if (kibnal_tunables.kib_sysctl != NULL)
- cfs_unregister_sysctl_table(kibnal_tunables.kib_sysctl);
-}
-
-#else
-
-int
-kibnal_tunables_init ()
-{
- return 0;
-}
-
-void
-kibnal_tunables_fini ()
-{
-}
-
-#endif
+++ /dev/null
-/************************************************************************
- * IB Wire message format.
- * These are sent in sender's byte order (i.e. receiver flips).
- */
-
-typedef struct kib_connparams
-{
- __u32 ibcp_queue_depth;
- __u32 ibcp_max_msg_size;
- __u32 ibcp_max_frags;
-} WIRE_ATTR kib_connparams_t;
-
-typedef struct
-{
- lnet_hdr_t ibim_hdr; /* portals header */
- char ibim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR kib_immediate_msg_t;
-
-#ifndef IBNAL_USE_FMR
-# error "IBNAL_USE_FMR must be defined 1 or 0 before including this file"
-#endif
-
-#if IBNAL_USE_FMR
-typedef struct
-{
- __u64 rd_addr; /* IO VMA address */
- __u32 rd_nob; /* # of bytes */
- __u32 rd_key; /* remote key */
-} WIRE_ATTR kib_rdma_desc_t;
-#else
-/* YEUCH! the __u64 address is split into 2 __u32 fields to ensure proper
- * packing. Otherwise we can't fit enough frags into an IBNAL message (<=
- * smallest page size on any arch). */
-typedef struct
-{
- __u32 rf_nob; /* # of bytes */
- __u32 rf_addr_lo; /* lo 4 bytes of vaddr */
- __u32 rf_addr_hi; /* hi 4 bytes of vaddr */
-} WIRE_ATTR kib_rdma_frag_t;
-
-typedef struct
-{
- __u32 rd_key; /* local/remote key */
- __u32 rd_nfrag; /* # fragments */
- kib_rdma_frag_t rd_frags[0]; /* buffer frags */
-} WIRE_ATTR kib_rdma_desc_t;
-#endif
-
-typedef struct
-{
- lnet_hdr_t ibprm_hdr; /* portals header */
- __u64 ibprm_cookie; /* opaque completion cookie */
-} WIRE_ATTR kib_putreq_msg_t;
-
-typedef struct
-{
- __u64 ibpam_src_cookie; /* reflected completion cookie */
- __u64 ibpam_dst_cookie; /* opaque completion cookie */
- kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */
-} WIRE_ATTR kib_putack_msg_t;
-
-typedef struct
-{
- lnet_hdr_t ibgm_hdr; /* portals header */
- __u64 ibgm_cookie; /* opaque completion cookie */
- kib_rdma_desc_t ibgm_rd; /* rdma descriptor */
-} WIRE_ATTR kib_get_msg_t;
-
-typedef struct
-{
- __u64 ibcm_cookie; /* opaque completion cookie */
- __s32 ibcm_status; /* < 0 failure: >= 0 length */
-} WIRE_ATTR kib_completion_msg_t;
-
-typedef struct
-{
- /* First 2 fields fixed FOR ALL TIME */
- __u32 ibm_magic; /* I'm an openibnal message */
- __u16 ibm_version; /* this is my version number */
-
- __u8 ibm_type; /* msg type */
- __u8 ibm_credits; /* returned credits */
- __u32 ibm_nob; /* # bytes in whole message */
- __u32 ibm_cksum; /* checksum (0 == no checksum) */
- __u64 ibm_srcnid; /* sender's NID */
- __u64 ibm_srcstamp; /* sender's incarnation */
- __u64 ibm_dstnid; /* destination's NID */
- __u64 ibm_dststamp; /* destination's incarnation */
- __u64 ibm_seq; /* sequence number */
-
- union {
- kib_connparams_t connparams;
- kib_immediate_msg_t immediate;
- kib_putreq_msg_t putreq;
- kib_putack_msg_t putack;
- kib_get_msg_t get;
- kib_completion_msg_t completion;
- } WIRE_ATTR ibm_u;
-} WIRE_ATTR kib_msg_t;
-
-#define IBNAL_MSG_MAGIC LNET_PROTO_VIB_MAGIC /* unique magic */
-
-#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 0x10 /* previous version */
-
-#define IBNAL_MSG_VERSION 0x11 /* current version */
-
-#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */
-#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */
-#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */
-#define IBNAL_MSG_IMMEDIATE 0xd1 /* immediate */
-#define IBNAL_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */
-#define IBNAL_MSG_PUT_NAK 0xd3 /* completion (sink->src) */
-#define IBNAL_MSG_PUT_ACK 0xd4 /* putack (sink->src) */
-#define IBNAL_MSG_PUT_DONE 0xd5 /* completion (src->sink) */
-#define IBNAL_MSG_GET_REQ 0xd6 /* getreq (sink->src) */
-#define IBNAL_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */
-
-/* connection rejection reasons */
-#define IBNAL_REJECT_CONN_RACE 0 /* You lost connection race */
-#define IBNAL_REJECT_NO_RESOURCES 1 /* Out of memory/conns etc */
-#define IBNAL_REJECT_FATAL 2 /* Anything else */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-
-#include <lnet/api-support.h>
-
-/* This ghastly hack to allows me to include lib-types.h It doesn't affect any
- * assertions generated here (but fails-safe if it ever does) */
-typedef struct {
- int counter;
-} atomic_t;
-
-#include <lnet/lib-types.h>
-
-#define IBNAL_USE_FMR 1
-#include "viblnd_wire.h"
-
-#ifndef HAVE_STRNLEN
-#define strnlen(s, i) strlen(s)
-#endif
-
-#define BLANK_LINE() \
-do { \
- printf ("\n"); \
-} while (0)
-
-#define COMMENT(c) \
-do { \
- printf (" /* "c" */\n"); \
-} while (0)
-
-#undef STRINGIFY
-#define STRINGIFY(a) #a
-
-#define CHECK_DEFINE(a) \
-do { \
- printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \
-} while (0)
-
-#define CHECK_VALUE(a) \
-do { \
- printf (" CLASSERT ("#a" == %d);\n", a); \
-} while (0)
-
-#define CHECK_MEMBER_OFFSET(s,m) \
-do { \
- CHECK_VALUE((int)offsetof(s, m)); \
-} while (0)
-
-#define CHECK_MEMBER_SIZEOF(s,m) \
-do { \
- CHECK_VALUE((int)sizeof(((s *)0)->m)); \
-} while (0)
-
-#define CHECK_MEMBER(s,m) \
-do { \
- CHECK_MEMBER_OFFSET(s, m); \
- CHECK_MEMBER_SIZEOF(s, m); \
-} while (0)
-
-#define CHECK_STRUCT(s) \
-do { \
- BLANK_LINE (); \
- COMMENT ("Checks for struct "#s); \
- CHECK_VALUE((int)sizeof(s)); \
-} while (0)
-
-void
-system_string (char *cmdline, char *str, int len)
-{
- int fds[2];
- int rc;
- pid_t pid;
-
- rc = pipe (fds);
- if (rc != 0)
- abort ();
-
- pid = fork ();
- if (pid == 0) {
- /* child */
- int fd = fileno(stdout);
-
- rc = dup2(fds[1], fd);
- if (rc != fd)
- abort();
-
- exit(system(cmdline));
- /* notreached */
- } else if ((int)pid < 0) {
- abort();
- } else {
- FILE *f = fdopen (fds[0], "r");
-
- if (f == NULL)
- abort();
-
- close(fds[1]);
-
- if (fgets(str, len, f) == NULL)
- abort();
-
- if (waitpid(pid, &rc, 0) != pid)
- abort();
-
- if (!WIFEXITED(rc) ||
- WEXITSTATUS(rc) != 0)
- abort();
-
- if (strnlen(str, len) == len)
- str[len - 1] = 0;
-
- if (str[strlen(str) - 1] == '\n')
- str[strlen(str) - 1] = 0;
-
- fclose(f);
- }
-}
-
-int
-main (int argc, char **argv)
-{
- char unameinfo[80];
- char gccinfo[80];
-
- system_string("uname -a", unameinfo, sizeof(unameinfo));
- system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo));
-
- printf ("void vibnal_assert_wire_constants (void)\n"
- "{\n"
- " /* Wire protocol assertions generated by 'wirecheck'\n"
- " * running on %s\n"
- " * with %s */\n"
- "\n", unameinfo, gccinfo);
-
- BLANK_LINE ();
-
- COMMENT ("Constants...");
- CHECK_DEFINE (IBNAL_MSG_MAGIC);
- CHECK_DEFINE (IBNAL_MSG_VERSION);
-
- CHECK_DEFINE (IBNAL_MSG_CONNREQ);
- CHECK_DEFINE (IBNAL_MSG_CONNACK);
- CHECK_DEFINE (IBNAL_MSG_NOOP);
- CHECK_DEFINE (IBNAL_MSG_IMMEDIATE);
- CHECK_DEFINE (IBNAL_MSG_PUT_REQ);
- CHECK_DEFINE (IBNAL_MSG_PUT_NAK);
- CHECK_DEFINE (IBNAL_MSG_PUT_ACK);
- CHECK_DEFINE (IBNAL_MSG_PUT_DONE);
- CHECK_DEFINE (IBNAL_MSG_GET_REQ);
- CHECK_DEFINE (IBNAL_MSG_GET_DONE);
-
- CHECK_DEFINE (IBNAL_REJECT_CONN_RACE);
- CHECK_DEFINE (IBNAL_REJECT_NO_RESOURCES);
- CHECK_DEFINE (IBNAL_REJECT_FATAL);
-
- CHECK_STRUCT (kib_connparams_t);
- CHECK_MEMBER (kib_connparams_t, ibcp_queue_depth);
- CHECK_MEMBER (kib_connparams_t, ibcp_max_msg_size);
- CHECK_MEMBER (kib_connparams_t, ibcp_max_frags);
-
- CHECK_STRUCT (kib_immediate_msg_t);
- CHECK_MEMBER (kib_immediate_msg_t, ibim_hdr);
- CHECK_MEMBER (kib_immediate_msg_t, ibim_payload[13]);
-
- CHECK_DEFINE (IBNAL_USE_FMR);
-#if IBNAL_USE_FMR
- CHECK_STRUCT (kib_rdma_desc_t);
- CHECK_MEMBER (kib_rdma_desc_t, rd_addr);
- CHECK_MEMBER (kib_rdma_desc_t, rd_nob);
- CHECK_MEMBER (kib_rdma_desc_t, rd_key);
-#else
- CHECK_STRUCT (kib_rdma_frag_t);
- CHECK_MEMBER (kib_rdma_frag_t, rf_nob);
- CHECK_MEMBER (kib_rdma_frag_t, rf_addr_lo);
- CHECK_MEMBER (kib_rdma_frag_t, rf_addr_hi);
-
- CHECK_STRUCT (kib_rdma_desc_t);
- CHECK_MEMBER (kib_rdma_desc_t, rd_key);
- CHECK_MEMBER (kib_rdma_desc_t, rd_nfrag);
- CHECK_MEMBER (kib_rdma_desc_t, rd_frags[13]);
-#endif
- CHECK_STRUCT (kib_putreq_msg_t);
- CHECK_MEMBER (kib_putreq_msg_t, ibprm_hdr);
- CHECK_MEMBER (kib_putreq_msg_t, ibprm_cookie);
-
- CHECK_STRUCT (kib_putack_msg_t);
- CHECK_MEMBER (kib_putack_msg_t, ibpam_src_cookie);
- CHECK_MEMBER (kib_putack_msg_t, ibpam_dst_cookie);
- CHECK_MEMBER (kib_putack_msg_t, ibpam_rd);
-
- CHECK_STRUCT (kib_get_msg_t);
- CHECK_MEMBER (kib_get_msg_t, ibgm_hdr);
- CHECK_MEMBER (kib_get_msg_t, ibgm_cookie);
- CHECK_MEMBER (kib_get_msg_t, ibgm_rd);
-
- CHECK_STRUCT (kib_completion_msg_t);
- CHECK_MEMBER (kib_completion_msg_t, ibcm_cookie);
- CHECK_MEMBER (kib_completion_msg_t, ibcm_status);
-
- CHECK_STRUCT (kib_msg_t);
- CHECK_MEMBER (kib_msg_t, ibm_magic);
- CHECK_MEMBER (kib_msg_t, ibm_version);
- CHECK_MEMBER (kib_msg_t, ibm_type);
- CHECK_MEMBER (kib_msg_t, ibm_credits);
- CHECK_MEMBER (kib_msg_t, ibm_nob);
- CHECK_MEMBER (kib_msg_t, ibm_cksum);
- CHECK_MEMBER (kib_msg_t, ibm_srcnid);
- CHECK_MEMBER (kib_msg_t, ibm_srcstamp);
- CHECK_MEMBER (kib_msg_t, ibm_dstnid);
- CHECK_MEMBER (kib_msg_t, ibm_dststamp);
- CHECK_MEMBER (kib_msg_t, ibm_seq);
- CHECK_MEMBER (kib_msg_t, ibm_u.connparams);
- CHECK_MEMBER (kib_msg_t, ibm_u.immediate);
- CHECK_MEMBER (kib_msg_t, ibm_u.putreq);
- CHECK_MEMBER (kib_msg_t, ibm_u.putack);
- CHECK_MEMBER (kib_msg_t, ibm_u.get);
- CHECK_MEMBER (kib_msg_t, ibm_u.completion);
-
- printf ("}\n\n");
-
- return (0);
-}
+++ /dev/null
-.deps
-Makefile
-link-stamp
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>CFBundleDevelopmentRegion</key>
- <string>English</string>
- <key>CFBundleExecutable</key>
- <string>libcfs</string>
- <key>CFBundleIconFile</key>
- <string></string>
- <key>CFBundleIdentifier</key>
- <string>com.clusterfs.lustre.libcfs</string>
- <key>CFBundleInfoDictionaryVersion</key>
- <string>6.0</string>
- <key>CFBundlePackageType</key>
- <string>KEXT</string>
- <key>CFBundleSignature</key>
- <string>????</string>
- <key>CFBundleVersion</key>
- <string>1.0.1</string>
- <key>OSBundleCompatibleVersion</key>
- <string>1.0.0</string>
- <key>OSBundleLibraries</key>
- <dict>
- <key>com.apple.kpi.bsd</key>
- <string>8.0.0b1</string>
- <key>com.apple.kpi.libkern</key>
- <string>8.0.0b1</string>
- <key>com.apple.kpi.mach</key>
- <string>8.0.0b1</string>
- <key>com.apple.kpi.unsupported</key>
- <string>8.0.0b1</string>
- </dict>
-</dict>
-</plist>
+++ /dev/null
-MODULES = libcfs
-
-libcfs-linux-objs := linux-tracefile.o linux-debug.o
-libcfs-linux-objs += linux-prim.o linux-mem.o
-libcfs-linux-objs += linux-fs.o linux-sync.o linux-tcpip.o
-libcfs-linux-objs += linux-lwt.o linux-proc.o linux-curproc.o
-libcfs-linux-objs += linux-utils.o linux-module.o
-
-ifeq ($(PATCHLEVEL),6)
-libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
-endif
-
-default: all
-
-ifeq (@linux25@,no)
-sources:
- @for i in $(libcfs-linux-objs:%.o=%.c) ; do \
- echo "ln -s @srcdir@/linux/$$i ." ; \
- ln -sf @srcdir@/linux/$$i . || exit 1 ; \
- done
-
-else
-sources:
-
-endif
-
-libcfs-all-objs := debug.o nidstrings.o lwt.o module.o tracefile.o watchdog.o
-
-libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs)
-
-EXTRA_PRE_CFLAGS := -I@LUSTRE@/../lnet/libcfs
-
-@INCLUDE_RULES@
+++ /dev/null
-# Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-SUBDIRS := linux
-if DARWIN
-SUBDIRS += darwin
-endif
-DIST_SUBDIRS := $(SUBDIRS)
-
-if LIBLUSTRE
-noinst_LIBRARIES= libcfs.a
-libcfs_a_SOURCES= debug.c user-prim.c user-lock.c user-tcpip.c user-bitops.c
-libcfs_a_CPPFLAGS = $(LLCPPFLAGS)
-libcfs_a_CFLAGS = $(LLCFLAGS)
-endif
-
-if MODULES
-
-if LINUX
-modulenet_DATA := libcfs$(KMODEXT)
-endif
-
-if DARWIN
-macos_PROGRAMS := libcfs
-
-nodist_libcfs_SOURCES := darwin/darwin-sync.c darwin/darwin-mem.c \
- darwin/darwin-prim.c darwin/darwin-fs.c darwin/darwin-curproc.c \
- darwin/darwin-tcpip.c darwin/darwin-utils.c \
- darwin/darwin-debug.c darwin/darwin-proc.c \
- darwin/darwin-tracefile.c darwin/darwin-module.c \
- debug.c module.c tracefile.c nidstrings.c watchdog.c
-
-libcfs_CFLAGS := $(EXTRA_KCFLAGS)
-libcfs_LDFLAGS := $(EXTRA_KLDFLAGS)
-libcfs_LDADD := $(EXTRA_KLIBS)
-
-plist_DATA := Info.plist
-
-install_data_hook := fix-kext-ownership
-
-endif
-
-endif
-
-install-data-hook: $(install_data_hook)
-
-EXTRA_DIST := Info.plist
-
-MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux-*.c linux/*.o darwin/*.o libcfs
-DIST_SOURCES := $(libcfs-all-objs:%.o=%.c) tracefile.h user-prim.c \
- user-lock.c user-tcpip.c user-bitops.c
+++ /dev/null
-Makefile
-Makefile.in
+++ /dev/null
-EXTRA_DIST := \
- darwin-mem.c \
- darwin-proc.c \
- darwin-utils.c \
- darwin-debug.c \
- darwin-module.c \
- darwin-sync.c \
- darwin-fs.c \
- darwin-prim.c \
- darwin-tracefile.c \
- darwin-curproc.c \
- darwin-tcpip.c
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Lustre curproc API implementation for XNU kernel
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation. Lustre is distributed in the hope that it will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
- * Public License for more details. You should have received a copy of the GNU
- * General Public License along with Lustre; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-/*
- * Implementation of cfs_curproc API (see lnet/include/libcfs/curproc.h)
- * for XNU kernel.
- */
-
-static inline struct ucred *curproc_ucred(void)
-{
-#ifdef __DARWIN8__
- return proc_ucred(current_proc());
-#else
- return current_proc()->p_cred->pc_ucred;
-#endif
-}
-
-uid_t cfs_curproc_uid(void)
-{
- return curproc_ucred()->cr_uid;
-}
-
-gid_t cfs_curproc_gid(void)
-{
- LASSERT(curproc_ucred()->cr_ngroups > 0);
- return curproc_ucred()->cr_groups[0];
-}
-
-uid_t cfs_curproc_fsuid(void)
-{
-#ifdef __DARWIN8__
- return curproc_ucred()->cr_ruid;
-#else
- return current_proc()->p_cred->p_ruid;
-#endif
-}
-
-gid_t cfs_curproc_fsgid(void)
-{
-#ifdef __DARWIN8__
- return curproc_ucred()->cr_rgid;
-#else
- return current_proc()->p_cred->p_rgid;
-#endif
-}
-
-pid_t cfs_curproc_pid(void)
-{
-#ifdef __DARWIN8__
- /* no pid for each thread, return address of thread struct */
- return (pid_t)current_thread();
-#else
- return current_proc()->p_pid;
-#endif
-}
-
-int cfs_curproc_groups_nr(void)
-{
- LASSERT(curproc_ucred()->cr_ngroups > 0);
- return curproc_ucred()->cr_ngroups - 1;
-}
-
-int cfs_curproc_is_in_groups(gid_t gid)
-{
- int i;
- struct ucred *cr;
-
- cr = curproc_ucred();
- LASSERT(cr != NULL);
-
- for (i = 0; i < cr->cr_ngroups; ++ i) {
- if (cr->cr_groups[i] == gid)
- return 1;
- }
- return 0;
-}
-
-void cfs_curproc_groups_dump(gid_t *array, int size)
-{
- struct ucred *cr;
-
- cr = curproc_ucred();
- LASSERT(cr != NULL);
- CLASSERT(sizeof array[0] == sizeof (__u32));
-
- size = min_t(int, size, cr->cr_ngroups);
- memcpy(array, &cr->cr_groups[1], size * sizeof(gid_t));
-}
-
-mode_t cfs_curproc_umask(void)
-{
-#ifdef __DARWIN8__
- /*
- * XXX Liang:
- *
- * fd_cmask is not available in kexts, so we just assume
- * verything is permited.
- */
- return -1;
-#else
- return current_proc()->p_fd->fd_cmask;
-#endif
-}
-
-char *cfs_curproc_comm(void)
-{
-#ifdef __DARWIN8__
- /*
- * Writing to proc->p_comm is not permited in Darwin8,
- * because proc_selfname() only return a copy of proc->p_comm,
- * so this function is not really working while user try to
- * change comm of current process.
- */
- static char pcomm[MAXCOMLEN+1];
-
- proc_selfname(pcomm, MAXCOMLEN+1);
- return pcomm;
-#else
- return current_proc()->p_comm;
-#endif
-}
-
-cfs_kernel_cap_t cfs_curproc_cap_get(void)
-{
- return -1;
-}
-
-void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
-{
- return;
-}
-
-
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/kp30.h>
-#include <libcfs/libcfs.h>
-#include "tracefile.h"
-
-void libcfs_debug_dumpstack(cfs_task_t *tsk)
-{
- return;
-}
-
-void libcfs_run_lbug_upcall(char *file, const char *fn, const int line)
-{
-}
-
-void lbug_with_loc(char *file, const char *func, const int line)
-{
- libcfs_catastrophe = 1;
- CEMERG("LBUG: pid: %u thread: %#x\n",
- (unsigned)cfs_curproc_pid(), (unsigned)current_thread());
- libcfs_debug_dumplog();
- libcfs_run_lbug_upcall(file, func, line);
- while (1)
- cfs_schedule();
-
- /* panic("lbug_with_loc(%s, %s, %d)", file, func, line) */
-}
-
-#if ENTRY_NESTING_SUPPORT
-
-static inline struct cfs_debug_data *__current_cdd(void)
-{
- struct cfs_debug_data *cdd;
-
- cdd = (struct cfs_debug_data *)current_uthread()->uu_nlminfo;
- if (cdd != NULL &&
- cdd->magic1 == CDD_MAGIC1 && cdd->magic2 == CDD_MAGIC2 &&
- cdd->nesting_level < 1000)
- return cdd;
- else
- return NULL;
-}
-
-static inline void __current_cdd_set(struct cfs_debug_data *cdd)
-{
- current_uthread()->uu_nlminfo = (void *)cdd;
-}
-
-void __entry_nesting(struct cfs_debug_data *child)
-{
- struct cfs_debug_data *parent;
-
- parent = __current_cdd();
- if (parent != NULL) {
- child->parent = parent;
- child->nesting_level = parent->nesting_level + 1;
- }
- __current_cdd_set(child);
-}
-
-void __exit_nesting(struct cfs_debug_data *child)
-{
- __current_cdd_set(child->parent);
-}
-
-unsigned int __current_nesting_level(void)
-{
- struct cfs_debug_data *cdd;
-
- cdd = __current_cdd();
- if (cdd != NULL)
- return cdd->nesting_level;
- else
- return 0;
-}
-/* ENTRY_NESTING_SUPPORT */
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Darwin porting library
- * Make things easy to port
- */
-#include <mach/mach_types.h>
-#include <string.h>
-#include <sys/file.h>
-#include <sys/malloc.h>
-#include <sys/conf.h>
-#include <sys/mount.h>
-#include <sys/uio.h>
-#include <sys/filedesc.h>
-#include <sys/namei.h>
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-/*
- * Kernel APIs for file system in xnu
- *
- * Public functions
- */
-
-#ifdef __DARWIN8__
-#include <sys/vnode.h>
-
-extern int vn_rdwr(enum uio_rw, vnode_t, caddr_t, int, off_t, enum uio_seg, int, kauth_cred_t, int *, proc_t);
-
-/* vnode_size() is not exported */
-static errno_t
-vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx)
-{
- struct vnode_attr va;
- int error;
-
- VATTR_INIT(&va);
- VATTR_WANTED(&va, va_data_size);
- error = vnode_getattr(vp, &va, ctx);
- if (!error)
- *sizep = va.va_data_size;
- return(error);
-}
-
-/*
- * XXX Liang:
- *
- * kern_file_*() are not safe for multi-threads now,
- * however, we need them only for tracefiled, so it's
- * not so important to implement for MT.
- */
-int
-kern_file_size(struct cfs_kern_file *fp, off_t *psize)
-{
- int error;
- off_t size;
-
- error = vnode_size(fp->f_vp, &size, fp->f_ctxt);
- if (error)
- return error;
-
- if (psize)
- *psize = size;
- return 0;
-}
-
-struct cfs_kern_file *
-kern_file_open(const char * filename, int uflags, int mode, int *err)
-{
- struct cfs_kern_file *fp;
- vnode_t vp;
- int error;
-
- fp = (struct cfs_kern_file *)_MALLOC(sizeof(struct cfs_kern_file), M_TEMP, M_WAITOK);
- if (fp == NULL) {
- if (err != NULL)
- *err = -ENOMEM;
- return NULL;
- }
- fp->f_flags = FFLAGS(uflags);
- fp->f_ctxt = vfs_context_create(NULL);
-
- if ((error = vnode_open(filename, fp->f_flags,
- mode, 0, &vp, fp->f_ctxt))){
- if (err != NULL)
- *err = -error;
- _FREE(fp, M_TEMP);
- } else {
- if (err != NULL)
- *err = 0;
- fp->f_vp = vp;
- }
-
- return fp;
-}
-
-int
-kern_file_close(struct cfs_kern_file *fp)
-{
- vnode_close(fp->f_vp, fp->f_flags, fp->f_ctxt);
- vfs_context_rele(fp->f_ctxt);
- _FREE(fp, M_TEMP);
-
- return 0;
-}
-
-int
-kern_file_read(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos)
-{
- struct proc *p = current_proc();
- int resid;
- int error;
-
- assert(buf != NULL);
- assert(fp != NULL && fp->f_vp != NULL);
-
- error = vn_rdwr(UIO_READ, fp->f_vp, buf, nbytes, *pos,
- UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p);
- if ((error) || (nbytes == resid)) {
- if (!error)
- error = -EINVAL;
- return error;
- }
- *pos += nbytes - resid;
-
- return (int)(nbytes - resid);
-}
-
-int
-kern_file_write(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos)
-{
- struct proc *p = current_proc();
- int resid;
- int error;
-
- assert(buf != NULL);
- assert(fp != NULL && fp->f_vp != NULL);
-
- error = vn_rdwr(UIO_WRITE, fp->f_vp, buf, nbytes, *pos,
- UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p);
- if ((error) || (nbytes == resid)) {
- if (!error)
- error = -EINVAL;
- return error;
- }
- *pos += nbytes - resid;
-
- return (int)(nbytes - resid);
-
-}
-
-int
-kern_file_sync (struct cfs_kern_file *fp)
-{
- return VNOP_FSYNC(fp->f_vp, MNT_WAIT, fp->f_ctxt);
-}
-
-#else /* !__DARWIN8__ */
-
-int
-kern_file_size(struct file *fp, off_t *size)
-{
- struct vnode *vp = (struct vnode *)fp->f_data;
- struct stat sb;
- int rc;
-
- rc = vn_stat(vp, &sb, current_proc());
- if (rc) {
- *size = 0;
- return rc;
- }
- *size = sb.st_size;
- return 0;
-}
-
-cfs_file_t *
-kern_file_open(const char * filename, int flags, int mode, int *err)
-{
- struct nameidata nd;
- cfs_file_t *fp;
- register struct vnode *vp;
- int rc;
- extern struct fileops vnops;
- extern int nfiles;
- CFS_DECL_CONE_DATA;
-
- CFS_CONE_IN;
- nfiles++;
- MALLOC_ZONE(fp, cfs_file_t *, sizeof(cfs_file_t), M_FILE, M_WAITOK|M_ZERO);
- bzero(fp, sizeof(cfs_file_t));
- fp->f_count = 1;
- LIST_CIRCLE(fp, f_list);
- NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)filename, current_proc());
- if ((rc = vn_open(&nd, flags, mode)) != 0){
- printf("filp_open failed at (%d)\n", rc);
- if (err != NULL)
- *err = rc;
- FREE_ZONE(fp, sizeof *fp, M_FILE);
- CFS_CONE_EX;
- return NULL;
- }
- vp = nd.ni_vp;
- fp->f_flag = flags & FMASK;
- fp->f_type = DTYPE_VNODE;
- fp->f_ops = &vnops;
- fp->f_data = (caddr_t)vp;
- fp->f_cred = current_proc()->p_ucred;
- /*
- * Hold cred to increase reference
- */
- crhold(fp->f_cred);
- /*
- * vnode is locked inside vn_open for lookup,
- * we should release the lock before return
- */
- VOP_UNLOCK(vp, 0, current_proc());
- CFS_CONE_EX;
-
- return fp;
-}
-
-static int
-frele_internal(cfs_file_t *fp)
-{
- if (fp->f_count == (short)0xffff)
- panic("frele of lustre: stale");
- if (--fp->f_count < 0)
- panic("frele of lustre: count < 0");
- return ((int)fp->f_count);
-}
-
-int
-kern_file_close (cfs_file_t *fp)
-{
- struct vnode *vp;
- CFS_DECL_CONE_DATA;
-
- if (fp == NULL)
- return 0;
-
- CFS_CONE_IN;
- if (frele_internal(fp) > 0)
- goto out;
- vp = (struct vnode *)fp->f_data;
- (void )vn_close(vp, fp->f_flag, fp->f_cred, current_proc());
- /*
- * ffree(fp);
- * Dont use ffree to release fp!!!!
- * ffree will call LIST_REMOVE(fp),
- * but fp is not in any list, this will
- * cause kernel panic
- */
- struct ucred *cred;
- cred = fp->f_cred;
- if (cred != NOCRED) {
- fp->f_cred = NOCRED;
- crfree(cred);
- }
- extern int nfiles;
- nfiles--;
- memset(fp, 0xff, sizeof *fp);
- fp->f_count = (short)0xffff;
- FREE_ZONE(fp, sizeof *fp, M_FILE);
-out:
- CFS_CONE_EX;
- return 0;
-}
-
-extern void bwillwrite(void);
-
-/*
- * Write buffer to filp inside kernel
- */
-int
-kern_file_write (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos)
-{
- struct uio auio;
- struct iovec aiov;
- struct proc *p = current_proc();
- long cnt, error = 0;
- int flags = 0;
- CFS_DECL_CONE_DATA;
-
- aiov.iov_base = (void *)(uintptr_t)buf;
- aiov.iov_len = nbyte;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- if (pos != NULL) {
- auio.uio_offset = *pos;
- /*
- * Liang: If don't set FOF_OFFSET, vn_write()
- * will use fp->f_offset as the the real offset.
- * Same in vn_read()
- */
- flags |= FOF_OFFSET;
- } else
- auio.uio_offset = (off_t)-1;
- if (nbyte > INT_MAX)
- return (EINVAL);
- auio.uio_resid = nbyte;
- auio.uio_rw = UIO_WRITE;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_procp = p;
-
- cnt = nbyte;
- CFS_CONE_IN;
- if (fp->f_type == DTYPE_VNODE)
- bwillwrite(); /* empty stuff now */
- if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
- if (auio.uio_resid != cnt && (error == ERESTART ||\
- error == EINTR || error == EWOULDBLOCK))
- error = 0;
- /* The socket layer handles SIGPIPE */
- if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
- psignal(p, SIGPIPE);
- }
- CFS_CONE_EX;
- if (error != 0)
- cnt = -error;
- else
- cnt -= auio.uio_resid;
- if (pos != NULL)
- *pos += cnt;
- return cnt;
-}
-
-/*
- * Read from filp inside kernel
- */
-int
-kern_file_read (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos)
-{
- struct uio auio;
- struct iovec aiov;
- struct proc *p = current_proc();
- long cnt, error = 0;
- int flags = 0;
- CFS_DECL_CONE_DATA;
-
- aiov.iov_base = (caddr_t)buf;
- aiov.iov_len = nbyte;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- if (pos != NULL) {
- auio.uio_offset = *pos;
- flags |= FOF_OFFSET;
- } else
- auio.uio_offset = (off_t)-1;
- if (nbyte > INT_MAX)
- return (EINVAL);
- auio.uio_resid = nbyte;
- auio.uio_rw = UIO_READ;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_procp = p;
-
- cnt = nbyte;
- CFS_CONE_IN;
- if ((error = fo_read(fp, &auio, fp->f_cred, flags, p)) != 0) {
- if (auio.uio_resid != cnt && (error == ERESTART ||
- error == EINTR || error == EWOULDBLOCK))
- error = 0;
- }
- CFS_CONE_EX;
- if (error != 0)
- cnt = -error;
- else
- cnt -= auio.uio_resid;
- if (pos != NULL)
- *pos += cnt;
-
- return cnt;
-}
-
-int
-kern_file_sync (cfs_file_t *fp)
-{
- struct vnode *vp = (struct vnode *)fp->f_data;
- struct proc *p = current_proc();
- int error = 0;
- CFS_DECL_CONE_DATA;
-
- CFS_CONE_IN;
- if (fref(fp) == -1) {
- CFS_CONE_EX;
- return (-EBADF);
- }
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
- VOP_UNLOCK(vp, 0, p);
- frele(fp);
- CFS_CONE_EX;
-
- return error;
-}
-
-#endif /* !__DARWIN8__ */
-
-struct posix_acl *posix_acl_alloc(int count, int flags)
-{
- static struct posix_acl acl;
- return &acl;
-}
-
-/*
- * XXX Liang: I've not converted all of them,
- * more is needed?
- */
-int cfs_oflags2univ(int flags)
-{
- int f;
-
- f = flags & O_ACCMODE;
- f |= (flags & O_CREAT) ? CFS_O_CREAT: 0;
- f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0;
- f |= (flags & O_EXCL) ? CFS_O_EXCL: 0;
- f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0;
- f |= (flags & O_APPEND) ? CFS_O_APPEND: 0;
- f |= (flags & O_NOFOLLOW) ? CFS_O_NOFOLLOW: 0;
- f |= (flags & O_SYNC)? CFS_O_SYNC: 0;
- return f;
-}
-
-/*
- * XXX Liang: we don't need it in OSX.
- * But it should be implemented anyway.
- */
-int cfs_univ2oflags(int flags)
-{
- return flags;
-}
+++ /dev/null
-#ifndef __LIBCFS_DARWIN_INTERNAL_H__
-#define __LIBCFS_DARWIN_INTERNAL_H__
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/systm.h>
-#include <sys/sysctl.h>
-
-int cfs_sysctl_isvalid(void);
-struct sysctl_oid *cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access,
- const char *name, int (*handler) SYSCTL_HANDLER_ARGS);
-struct sysctl_oid *cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int n,
- const char *name, int *ptr, int val);
-struct sysctl_oid * cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access,
- const char *name, int *ptr, int val);
-struct sysctl_oid * cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access,
- const char *name, char *ptr, int len);
-struct sysctl_oid * cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access,
- const char *name, void *ptr, int size);
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- * Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Darwin porting library
- * Make things easy to port
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <mach/mach_types.h>
-#include <string.h>
-#include <sys/malloc.h>
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-#include "darwin-internal.h"
-
-#if CFS_INDIVIDUAL_ZONE
-extern zone_t zinit( vm_size_t, vm_size_t, vm_size_t, const char *);
-extern void * zalloc(zone_t zone);
-extern void *zalloc_noblock(zone_t zone);
-extern void zfree(zone_t zone, void *addr);
-
-struct cfs_zone_nob {
- struct list_head *z_nob; /* Pointer to z_link */
- struct list_head z_link; /* Do NOT access it directly */
-};
-
-static struct cfs_zone_nob cfs_zone_nob;
-static spinlock_t cfs_zone_guard;
-
-cfs_mem_cache_t *mem_cache_find(const char *name, size_t objsize)
-{
- cfs_mem_cache_t *walker = NULL;
-
- LASSERT(cfs_zone_nob.z_nob != NULL);
-
- spin_lock(&cfs_zone_guard);
- list_for_each_entry(walker, cfs_zone_nob.z_nob, mc_link) {
- if (!strcmp(walker->mc_name, name) && \
- walker->mc_size == objsize)
- break;
- }
- spin_unlock(&cfs_zone_guard);
-
- return walker;
-}
-
-/*
- * our wrapper around kern/zalloc.c:zinit()
- *
- * Creates copy of name and calls zinit() to do real work. Needed because zone
- * survives kext unloading, so that @name cannot be just static string
- * embedded into kext image.
- */
-cfs_mem_cache_t *mem_cache_create(vm_size_t objsize, const char *name)
-{
- cfs_mem_cache_t *mc = NULL;
- char *cname;
-
- MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO);
- if (mc == NULL){
- CERROR("cfs_mem_cache created fail!\n");
- return NULL;
- }
-
- cname = _MALLOC(strlen(name) + 1, M_TEMP, M_WAITOK);
- LASSERT(cname != NULL);
- mc->mc_cache = zinit(objsize, (KMEM_MAX_ZONE * objsize), 0, strcpy(cname, name));
- mc->mc_size = objsize;
- CFS_INIT_LIST_HEAD(&mc->mc_link);
- strncpy(mc->mc_name, name, 1 + strlen(name));
- return mc;
-}
-
-void mem_cache_destroy(cfs_mem_cache_t *mc)
-{
- /*
- * zone can NOT be destroyed after creating,
- * so just keep it in list.
- *
- * We will not lost a zone after we unload
- * libcfs, it can be found by from libcfs.zone
- */
- return;
-}
-
-#define mem_cache_alloc(mc) zalloc((mc)->mc_cache)
-#ifdef __DARWIN8__
-# define mem_cache_alloc_nb(mc) zalloc((mc)->mc_cache)
-#else
-/* XXX Liang: Tiger doesn't export zalloc_noblock() */
-# define mem_cache_alloc_nb(mc) zalloc_noblock((mc)->mc_cache)
-#endif
-#define mem_cache_free(mc, p) zfree((mc)->mc_cache, p)
-
-#else /* !CFS_INDIVIDUAL_ZONE */
-
-cfs_mem_cache_t *
-mem_cache_find(const char *name, size_t objsize)
-{
- return NULL;
-}
-
-cfs_mem_cache_t *mem_cache_create(vm_size_t size, const char *name)
-{
- cfs_mem_cache_t *mc = NULL;
-
- MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO);
- if (mc == NULL){
- CERROR("cfs_mem_cache created fail!\n");
- return NULL;
- }
- mc->mc_cache = OSMalloc_Tagalloc(name, OSMT_DEFAULT);
- mc->mc_size = size;
- return mc;
-}
-
-void mem_cache_destroy(cfs_mem_cache_t *mc)
-{
- OSMalloc_Tagfree(mc->mc_cache);
- FREE(mc, M_TEMP);
-}
-
-#define mem_cache_alloc(mc) OSMalloc((mc)->mc_size, (mc)->mc_cache)
-#define mem_cache_alloc_nb(mc) OSMalloc_noblock((mc)->mc_size, (mc)->mc_cache)
-#define mem_cache_free(mc, p) OSFree(p, (mc)->mc_size, (mc)->mc_cache)
-
-#endif /* !CFS_INDIVIDUAL_ZONE */
-
-cfs_mem_cache_t *
-cfs_mem_cache_create (const char *name,
- size_t objsize, size_t off, unsigned long arg1)
-{
- cfs_mem_cache_t *mc;
-
- mc = mem_cache_find(name, objsize);
- if (mc)
- return mc;
- mc = mem_cache_create(objsize, name);
- return mc;
-}
-
-int cfs_mem_cache_destroy (cfs_mem_cache_t *cachep)
-{
- mem_cache_destroy(cachep);
- return 0;
-}
-
-void *cfs_mem_cache_alloc (cfs_mem_cache_t *cachep, int flags)
-{
- void *result;
-
- /* zalloc_canblock() is not exported... Emulate it. */
- if (flags & CFS_ALLOC_ATOMIC) {
- result = (void *)mem_cache_alloc_nb(cachep);
- } else {
- LASSERT(get_preemption_level() == 0);
- result = (void *)mem_cache_alloc(cachep);
- }
- if (result != NULL && (flags & CFS_ALLOC_ZERO))
- memset(result, 0, cachep->mc_size);
-
- return result;
-}
-
-void cfs_mem_cache_free (cfs_mem_cache_t *cachep, void *objp)
-{
- mem_cache_free(cachep, objp);
-}
-
-/* ---------------------------------------------------------------------------
- * Page operations
- *
- * --------------------------------------------------------------------------- */
-
-/*
- * "Raw" pages
- */
-
-static unsigned int raw_pages = 0;
-static cfs_mem_cache_t *raw_page_cache = NULL;
-
-static struct xnu_page_ops raw_page_ops;
-static struct xnu_page_ops *page_ops[XNU_PAGE_NTYPES] = {
- [XNU_PAGE_RAW] = &raw_page_ops
-};
-
-#if defined(LIBCFS_DEBUG)
-static int page_type_is_valid(cfs_page_t *page)
-{
- LASSERT(page != NULL);
- return 0 <= page->type && page->type < XNU_PAGE_NTYPES;
-}
-
-static int page_is_raw(cfs_page_t *page)
-{
- return page->type == XNU_PAGE_RAW;
-}
-#endif
-
-static struct xnu_raw_page *as_raw(cfs_page_t *page)
-{
- LASSERT(page_is_raw(page));
- return list_entry(page, struct xnu_raw_page, header);
-}
-
-static void *raw_page_address(cfs_page_t *pg)
-{
- return (void *)as_raw(pg)->virtual;
-}
-
-static void *raw_page_map(cfs_page_t *pg)
-{
- return (void *)as_raw(pg)->virtual;
-}
-
-static void raw_page_unmap(cfs_page_t *pg)
-{
-}
-
-static struct xnu_page_ops raw_page_ops = {
- .page_map = raw_page_map,
- .page_unmap = raw_page_unmap,
- .page_address = raw_page_address
-};
-
-extern int get_preemption_level(void);
-
-struct list_head page_death_row;
-spinlock_t page_death_row_phylax;
-
-static void raw_page_finish(struct xnu_raw_page *pg)
-{
- -- raw_pages;
- if (pg->virtual != NULL)
- cfs_mem_cache_free(raw_page_cache, pg->virtual);
- cfs_free(pg);
-}
-
-void raw_page_death_row_clean(void)
-{
- struct xnu_raw_page *pg;
-
- spin_lock(&page_death_row_phylax);
- while (!list_empty(&page_death_row)) {
- pg = container_of(page_death_row.next,
- struct xnu_raw_page, link);
- list_del(&pg->link);
- spin_unlock(&page_death_row_phylax);
- raw_page_finish(pg);
- spin_lock(&page_death_row_phylax);
- }
- spin_unlock(&page_death_row_phylax);
-}
-
-/* Free a "page" */
-void free_raw_page(struct xnu_raw_page *pg)
-{
- if (!atomic_dec_and_test(&pg->count))
- return;
- /*
- * kmem_free()->vm_map_remove()->vm_map_delete()->lock_write() may
- * block. (raw_page_done()->upl_abort() can block too) On the other
- * hand, cfs_free_page() may be called in non-blockable context. To
- * work around this, park pages on global list when cannot block.
- */
- if (get_preemption_level() > 0) {
- spin_lock(&page_death_row_phylax);
- list_add(&pg->link, &page_death_row);
- spin_unlock(&page_death_row_phylax);
- } else {
- raw_page_finish(pg);
- raw_page_death_row_clean();
- }
-}
-
-cfs_page_t *cfs_alloc_page(u_int32_t flags)
-{
- struct xnu_raw_page *page;
-
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- page = cfs_alloc(sizeof *page, flags);
- if (page != NULL) {
- page->virtual = cfs_mem_cache_alloc(raw_page_cache, flags);
- if (page->virtual != NULL) {
- ++ raw_pages;
- page->header.type = XNU_PAGE_RAW;
- atomic_set(&page->count, 1);
- } else {
- cfs_free(page);
- page = NULL;
- }
- }
- return page != NULL ? &page->header : NULL;
-}
-
-void cfs_free_page(cfs_page_t *pages)
-{
- free_raw_page(as_raw(pages));
-}
-
-void cfs_get_page(cfs_page_t *p)
-{
- atomic_inc(&as_raw(p)->count);
-}
-
-int cfs_put_page_testzero(cfs_page_t *p)
-{
- return atomic_dec_and_test(&as_raw(p)->count);
-}
-
-int cfs_page_count(cfs_page_t *p)
-{
- return atomic_read(&as_raw(p)->count);
-}
-
-/*
- * Generic page operations
- */
-
-void *cfs_page_address(cfs_page_t *pg)
-{
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
- LASSERT(page_type_is_valid(pg));
- return page_ops[pg->type]->page_address(pg);
-}
-
-void *cfs_kmap(cfs_page_t *pg)
-{
- LASSERT(page_type_is_valid(pg));
- return page_ops[pg->type]->page_map(pg);
-}
-
-void cfs_kunmap(cfs_page_t *pg)
-{
- LASSERT(page_type_is_valid(pg));
- return page_ops[pg->type]->page_unmap(pg);
-}
-
-void xnu_page_ops_register(int type, struct xnu_page_ops *ops)
-{
- LASSERT(0 <= type && type < XNU_PAGE_NTYPES);
- LASSERT(ops != NULL);
- LASSERT(page_ops[type] == NULL);
-
- page_ops[type] = ops;
-}
-
-void xnu_page_ops_unregister(int type)
-{
- LASSERT(0 <= type && type < XNU_PAGE_NTYPES);
- LASSERT(page_ops[type] != NULL);
-
- page_ops[type] = NULL;
-}
-
-/*
- * Portable memory allocator API
- */
-#ifdef HAVE_GET_PREEMPTION_LEVEL
-extern int get_preemption_level(void);
-#else
-#define get_preemption_level() (0)
-#endif
-
-void *cfs_alloc(size_t nr_bytes, u_int32_t flags)
-{
- int mflags;
-
- mflags = 0;
- if (flags & CFS_ALLOC_ATOMIC) {
- mflags |= M_NOWAIT;
- } else {
- LASSERT(get_preemption_level() == 0);
- mflags |= M_WAITOK;
- }
-
- if (flags & CFS_ALLOC_ZERO)
- mflags |= M_ZERO;
-
- return _MALLOC(nr_bytes, M_TEMP, mflags);
-}
-
-void cfs_free(void *addr)
-{
- return _FREE(addr, M_TEMP);
-}
-
-void *cfs_alloc_large(size_t nr_bytes)
-{
- LASSERT(get_preemption_level() == 0);
- return _MALLOC(nr_bytes, M_TEMP, M_WAITOK);
-}
-
-void cfs_free_large(void *addr)
-{
- LASSERT(get_preemption_level() == 0);
- return _FREE(addr, M_TEMP);
-}
-
-/*
- * Lookup cfs_zone_nob by sysctl.zone, if it cannot be
- * found (first load of * libcfs since boot), allocate
- * sysctl libcfs.zone.
- */
-int cfs_mem_init(void)
-{
-#if CFS_INDIVIDUAL_ZONE
- int rc;
- size_t len;
-
- len = sizeof(struct cfs_zone_nob);
- rc = sysctlbyname("libcfs.zone",
- (void *)&cfs_zone_nob, &len, NULL, 0);
- if (rc == ENOENT) {
- /* zone_nob is not register in libcfs_sysctl */
- struct cfs_zone_nob *nob;
- struct sysctl_oid *oid;
-
- assert(cfs_sysctl_isvalid());
-
- nob = _MALLOC(sizeof(struct cfs_zone_nob),
- M_TEMP, M_WAITOK | M_ZERO);
- CFS_INIT_LIST_HEAD(&nob->z_link);
- nob->z_nob = &nob->z_link;
- oid = cfs_alloc_sysctl_struct(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN,
- "zone", nob, sizeof(struct cfs_zone_nob));
- if (oid == NULL) {
- _FREE(nob, M_TEMP);
- return -ENOMEM;
- }
- sysctl_register_oid(oid);
-
- cfs_zone_nob.z_nob = nob->z_nob;
- }
- spin_lock_init(&cfs_zone_guard);
-#endif
- CFS_INIT_LIST_HEAD(&page_death_row);
- spin_lock_init(&page_death_row_phylax);
- raw_page_cache = cfs_mem_cache_create("raw-page", CFS_PAGE_SIZE, 0, 0);
- return 0;
-}
-
-void cfs_mem_fini(void)
-{
- raw_page_death_row_clean();
- spin_lock_done(&page_death_row_phylax);
- cfs_mem_cache_destroy(raw_page_cache);
-
-#if CFS_INDIVIDUAL_ZONE
- cfs_zone_nob.z_nob = NULL;
- spin_lock_done(&cfs_zone_guard);
-#endif
-}
+++ /dev/null
-#include <mach/mach_types.h>
-#include <string.h>
-#include <sys/file.h>
-#include <sys/conf.h>
-#include <miscfs/devfs/devfs.h>
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
-{
- struct libcfs_ioctl_hdr *hdr;
- struct libcfs_ioctl_data *data;
- int err = 0;
- ENTRY;
-
- hdr = (struct libcfs_ioctl_hdr *)buf;
- data = (struct libcfs_ioctl_data *)buf;
- /* libcfs_ioctl_data has been copied in by ioctl of osx */
- memcpy(buf, arg, sizeof(struct libcfs_ioctl_data));
-
- if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) {
- CERROR("LIBCFS: version mismatch kernel vs application\n");
- RETURN(-EINVAL);
- }
-
- if (hdr->ioc_len + buf >= end) {
- CERROR("LIBCFS: user buffer exceeds kernel buffer\n");
- RETURN(-EINVAL);
- }
-
- if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) {
- CERROR("LIBCFS: user buffer too small for ioctl\n");
- RETURN(-EINVAL);
- }
- buf += size_round(sizeof(*data));
-
- if (data->ioc_inllen1) {
- err = copy_from_user(buf, data->ioc_inlbuf1, size_round(data->ioc_inllen1));
- if (err)
- RETURN(err);
- data->ioc_inlbuf1 = buf;
- buf += size_round(data->ioc_inllen1);
- }
-
- if (data->ioc_inllen2) {
- copy_from_user(buf, data->ioc_inlbuf2, size_round(data->ioc_inllen2));
- if (err)
- RETURN(err);
- data->ioc_inlbuf2 = buf;
- }
-
- RETURN(err);
-}
-
-int libcfs_ioctl_popdata(void *arg, void *data, int size)
-{
- /*
- * system call will copy out ioctl arg to user space
- */
- memcpy(arg, data, size);
- return 0;
-}
-
-extern struct cfs_psdev_ops libcfs_psdev_ops;
-struct libcfs_device_userstate *mdev_state[16];
-
-static int
-libcfs_psdev_open(dev_t dev, int flags, int devtype, struct proc *p)
-{
- struct libcfs_device_userstate *mstat = NULL;
- int rc = 0;
- int devid;
- devid = minor(dev);
-
- if (devid > 16) return (ENXIO);
-
- if (libcfs_psdev_ops.p_open != NULL)
- rc = -libcfs_psdev_ops.p_open(0, &mstat);
- else
- rc = EPERM;
- if (rc == 0)
- mdev_state[devid] = mstat;
- return rc;
-}
-
-static int
-libcfs_psdev_close(dev_t dev, int flags, int mode, struct proc *p)
-{
- int devid;
- devid = minor(dev);
- int rc = 0;
-
- if (devid > 16) return (ENXIO);
-
- if (libcfs_psdev_ops.p_close != NULL)
- rc = -libcfs_psdev_ops.p_close(0, mdev_state[devid]);
- else
- rc = EPERM;
- if (rc == 0)
- mdev_state[devid] = NULL;
- return rc;
-}
-
-static int
-libcfs_ioctl (dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p)
-{
- int rc = 0;
- struct cfs_psdev_file pfile;
- int devid;
- devid = minor(dev);
-
- if (devid > 16) return (ENXIO);
-
- if (!is_suser())
- return (EPERM);
-
- pfile.off = 0;
- pfile.private_data = mdev_state[devid];
-
- if (libcfs_psdev_ops.p_ioctl != NULL)
- rc = -libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
- else
- rc = EPERM;
- return rc;
-}
-
-static struct cdevsw libcfs_devsw =
-{
- .d_open = libcfs_psdev_open,
- .d_close = libcfs_psdev_close,
- .d_read = eno_rdwrt,
- .d_write = eno_rdwrt,
- .d_ioctl = libcfs_ioctl,
- .d_stop = eno_stop,
- .d_reset = eno_reset,
- .d_ttys = NULL,
- .d_select = eno_select,
- .d_mmap = eno_mmap,
- .d_strategy = eno_strat,
- .d_getc = eno_getc,
- .d_putc = eno_putc,
- .d_type = 0
-};
-
-cfs_psdev_t libcfs_dev = {
- -1,
- NULL,
- "lnet",
- &libcfs_devsw,
- NULL
-};
-
-extern spinlock_t trace_cpu_serializer;
-extern void cfs_sync_init(void);
-extern void cfs_sync_fini(void);
-extern int cfs_sysctl_init(void);
-extern void cfs_sysctl_fini(void);
-extern int cfs_mem_init(void);
-extern int cfs_mem_fini(void);
-extern void raw_page_death_row_clean(void);
-extern void cfs_thread_agent_init(void);
-extern void cfs_thread_agent_fini(void);
-extern void cfs_symbol_init(void);
-extern void cfs_symbol_fini(void);
-
-int libcfs_arch_init(void)
-{
- cfs_sync_init();
- cfs_sysctl_init();
- cfs_mem_init();
- cfs_thread_agent_init();
- cfs_symbol_init();
-
- spin_lock_init(&trace_cpu_serializer);
-
- return 0;
-}
-
-void libcfs_arch_cleanup(void)
-{
- spin_lock_done(&trace_cpu_serializer);
-
- cfs_symbol_fini();
- cfs_thread_agent_fini();
- cfs_mem_fini();
- cfs_sysctl_fini();
- cfs_sync_fini();
-}
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Darwin porting library
- * Make things easy to port
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <mach/mach_types.h>
-#include <string.h>
-#include <sys/file.h>
-#include <sys/conf.h>
-#include <sys/uio.h>
-#include <sys/filedesc.h>
-#include <sys/namei.h>
-#include <miscfs/devfs/devfs.h>
-#include <kern/thread.h>
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-/*
- * cfs pseudo device, actually pseudo char device in darwin
- */
-#define KLNET_MAJOR -1
-
-kern_return_t cfs_psdev_register(cfs_psdev_t *dev) {
- dev->index = cdevsw_add(KLNET_MAJOR, dev->devsw);
- if (dev->index < 0) {
- printf("libcfs_init: failed to allocate a major number!\n");
- return KERN_FAILURE;
- }
- dev->handle = devfs_make_node(makedev (dev->index, 0),
- DEVFS_CHAR, UID_ROOT,
- GID_WHEEL, 0666, (char *)dev->name, 0);
- return KERN_SUCCESS;
-}
-
-kern_return_t cfs_psdev_deregister(cfs_psdev_t *dev) {
- devfs_remove(dev->handle);
- cdevsw_remove(dev->index, dev->devsw);
- return KERN_SUCCESS;
-}
-
-/*
- * KPortal symbol register / unregister support
- */
-struct rw_semaphore cfs_symbol_lock;
-struct list_head cfs_symbol_list;
-
-void *
-cfs_symbol_get(const char *name)
-{
- struct list_head *walker;
- struct cfs_symbol *sym = NULL;
-
- down_read(&cfs_symbol_lock);
- list_for_each(walker, &cfs_symbol_list) {
- sym = list_entry (walker, struct cfs_symbol, sym_list);
- if (!strcmp(sym->name, name)) {
- sym->ref ++;
- break;
- }
- }
- up_read(&cfs_symbol_lock);
- if (sym != NULL)
- return sym->value;
- return NULL;
-}
-
-kern_return_t
-cfs_symbol_put(const char *name)
-{
- struct list_head *walker;
- struct cfs_symbol *sym = NULL;
-
- down_read(&cfs_symbol_lock);
- list_for_each(walker, &cfs_symbol_list) {
- sym = list_entry (walker, struct cfs_symbol, sym_list);
- if (!strcmp(sym->name, name)) {
- sym->ref --;
- LASSERT(sym->ref >= 0);
- break;
- }
- }
- up_read(&cfs_symbol_lock);
- LASSERT(sym != NULL);
-
- return 0;
-}
-
-kern_return_t
-cfs_symbol_register(const char *name, const void *value)
-{
- struct list_head *walker;
- struct cfs_symbol *sym = NULL;
- struct cfs_symbol *new = NULL;
-
- MALLOC(new, struct cfs_symbol *, sizeof(struct cfs_symbol), M_TEMP, M_WAITOK|M_ZERO);
- strncpy(new->name, name, CFS_SYMBOL_LEN);
- new->value = (void *)value;
- new->ref = 0;
- CFS_INIT_LIST_HEAD(&new->sym_list);
-
- down_write(&cfs_symbol_lock);
- list_for_each(walker, &cfs_symbol_list) {
- sym = list_entry (walker, struct cfs_symbol, sym_list);
- if (!strcmp(sym->name, name)) {
- up_write(&cfs_symbol_lock);
- FREE(new, M_TEMP);
- return KERN_NAME_EXISTS;
- }
-
- }
- list_add_tail(&new->sym_list, &cfs_symbol_list);
- up_write(&cfs_symbol_lock);
-
- return KERN_SUCCESS;
-}
-
-kern_return_t
-cfs_symbol_unregister(const char *name)
-{
- struct list_head *walker;
- struct list_head *nxt;
- struct cfs_symbol *sym = NULL;
-
- down_write(&cfs_symbol_lock);
- list_for_each_safe(walker, nxt, &cfs_symbol_list) {
- sym = list_entry (walker, struct cfs_symbol, sym_list);
- if (!strcmp(sym->name, name)) {
- LASSERT(sym->ref == 0);
- list_del (&sym->sym_list);
- FREE(sym, M_TEMP);
- break;
- }
- }
- up_write(&cfs_symbol_lock);
-
- return KERN_SUCCESS;
-}
-
-void
-cfs_symbol_init()
-{
- CFS_INIT_LIST_HEAD(&cfs_symbol_list);
- init_rwsem(&cfs_symbol_lock);
-}
-
-void
-cfs_symbol_fini()
-{
- struct list_head *walker;
- struct cfs_symbol *sym = NULL;
-
- down_write(&cfs_symbol_lock);
- list_for_each(walker, &cfs_symbol_list) {
- sym = list_entry (walker, struct cfs_symbol, sym_list);
- LASSERT(sym->ref == 0);
- list_del (&sym->sym_list);
- FREE(sym, M_TEMP);
- }
- up_write(&cfs_symbol_lock);
-
- fini_rwsem(&cfs_symbol_lock);
- return;
-}
-
-struct kernel_thread_arg
-{
- spinlock_t lock;
- atomic_t inuse;
- cfs_thread_t func;
- void *arg;
-};
-
-struct kernel_thread_arg cfs_thread_arg;
-
-#define THREAD_ARG_FREE 0
-#define THREAD_ARG_HOLD 1
-#define THREAD_ARG_RECV 2
-
-#define set_targ_stat(a, v) atomic_set(&(a)->inuse, v)
-#define get_targ_stat(a) atomic_read(&(a)->inuse)
-
-/*
- * Hold the thread argument and set the status of thread_status
- * to THREAD_ARG_HOLD, if the thread argument is held by other
- * threads (It's THREAD_ARG_HOLD already), current-thread has to wait.
- */
-#define thread_arg_hold(pta, _func, _arg) \
- do { \
- spin_lock(&(pta)->lock); \
- if (get_targ_stat(pta) == THREAD_ARG_FREE) { \
- set_targ_stat((pta), THREAD_ARG_HOLD); \
- (pta)->arg = (void *)_arg; \
- (pta)->func = _func; \
- spin_unlock(&(pta)->lock); \
- break; \
- } \
- spin_unlock(&(pta)->lock); \
- cfs_schedule(); \
- } while(1); \
-
-/*
- * Release the thread argument if the thread argument has been
- * received by the child-thread (Status of thread_args is
- * THREAD_ARG_RECV), otherwise current-thread has to wait.
- * After release, the thread_args' status will be set to
- * THREAD_ARG_FREE, and others can re-use the thread_args to
- * create new kernel_thread.
- */
-#define thread_arg_release(pta) \
- do { \
- spin_lock(&(pta)->lock); \
- if (get_targ_stat(pta) == THREAD_ARG_RECV) { \
- (pta)->arg = NULL; \
- (pta)->func = NULL; \
- set_targ_stat(pta, THREAD_ARG_FREE); \
- spin_unlock(&(pta)->lock); \
- break; \
- } \
- spin_unlock(&(pta)->lock); \
- cfs_schedule(); \
- } while(1)
-
-/*
- * Receive thread argument (Used in child thread), set the status
- * of thread_args to THREAD_ARG_RECV.
- */
-#define __thread_arg_recv_fin(pta, _func, _arg, fin) \
- do { \
- spin_lock(&(pta)->lock); \
- if (get_targ_stat(pta) == THREAD_ARG_HOLD) { \
- if (fin) \
- set_targ_stat(pta, THREAD_ARG_RECV);\
- _arg = (pta)->arg; \
- _func = (pta)->func; \
- spin_unlock(&(pta)->lock); \
- break; \
- } \
- spin_unlock(&(pta)->lock); \
- cfs_schedule(); \
- } while (1); \
-
-/*
- * Just set the thread_args' status to THREAD_ARG_RECV
- */
-#define thread_arg_fin(pta) \
- do { \
- spin_lock(&(pta)->lock); \
- assert( get_targ_stat(pta) == THREAD_ARG_HOLD); \
- set_targ_stat(pta, THREAD_ARG_RECV); \
- spin_unlock(&(pta)->lock); \
- } while(0)
-
-#define thread_arg_recv(pta, f, a) __thread_arg_recv_fin(pta, f, a, 1)
-#define thread_arg_keep(pta, f, a) __thread_arg_recv_fin(pta, f, a, 0)
-
-void
-cfs_thread_agent_init(void)
-{
- set_targ_stat(&cfs_thread_arg, THREAD_ARG_FREE);
- spin_lock_init(&cfs_thread_arg.lock);
- cfs_thread_arg.arg = NULL;
- cfs_thread_arg.func = NULL;
-}
-
-void
-cfs_thread_agent_fini(void)
-{
- assert(get_targ_stat(&cfs_thread_arg) == THREAD_ARG_FREE);
-
- spin_lock_done(&cfs_thread_arg.lock);
-}
-
-/*
- *
- * All requests to create kernel thread will create a new
- * thread instance of cfs_thread_agent, one by one.
- * cfs_thread_agent will call the caller's thread function
- * with argument supplied by caller.
- */
-void
-cfs_thread_agent (void)
-{
- cfs_thread_t func = NULL;
- void *arg = NULL;
-
- thread_arg_recv(&cfs_thread_arg, func, arg);
- /* printf("entry of thread agent (func: %08lx).\n", (void *)func); */
- assert(func != NULL);
- func(arg);
- /* printf("thread agent exit. (func: %08lx)\n", (void *)func); */
- (void) thread_terminate(current_thread());
-}
-
-extern thread_t kernel_thread(task_t task, void (*start)(void));
-
-int
-cfs_kernel_thread(cfs_thread_t func, void *arg, int flag)
-{
- int ret = 0;
- thread_t th = NULL;
-
- thread_arg_hold(&cfs_thread_arg, func, arg);
- th = kernel_thread(kernel_task, cfs_thread_agent);
- thread_arg_release(&cfs_thread_arg);
- if (th == THREAD_NULL)
- ret = -1;
- return ret;
-}
-
-void cfs_daemonize(char *str)
-{
- snprintf(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX, "%s", str);
- return;
-}
-
-/*
- * XXX Liang: kexts cannot access sigmask in Darwin8.
- * it's almost impossible for us to get/set signal mask
- * without patching kernel.
- * Should we provide these functions in xnu?
- *
- * These signal functions almost do nothing now, we
- * need to investigate more about signal in Darwin.
- */
-cfs_sigset_t cfs_get_blockedsigs()
-{
- return (cfs_sigset_t)0;
-}
-
-extern int block_procsigmask(struct proc *p, int bit);
-
-cfs_sigset_t cfs_block_allsigs()
-{
- cfs_sigset_t old = 0;
-#ifdef __DARWIN8__
-#else
- block_procsigmask(current_proc(), -1);
-#endif
- return old;
-}
-
-cfs_sigset_t cfs_block_sigs(sigset_t bit)
-{
- cfs_sigset_t old = 0;
-#ifdef __DARWIN8__
-#else
- block_procsigmask(current_proc(), bit);
-#endif
- return old;
-}
-
-void cfs_restore_sigs(cfs_sigset_t old)
-{
-}
-
-int cfs_signal_pending(void)
-
-{
-#ifdef __DARWIN8__
- extern int thread_issignal(proc_t, thread_t, sigset_t);
- return thread_issignal(current_proc(), current_thread(), (sigset_t)-1);
-#else
- return SHOULDissignal(current_proc(), current_uthread())
-#endif
-}
-
-void cfs_clear_sigpending(void)
-{
-#ifdef __DARWIN8__
-#else
- clear_procsiglist(current_proc(), -1);
-#endif
-}
-
-#ifdef __DARWIN8__
-
-#else /* !__DARWIN8__ */
-
-void lustre_cone_in(boolean_t *state, funnel_t **cone)
-{
- *cone = thread_funnel_get();
- if (*cone == network_flock)
- thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
- else if (*cone == NULL)
- *state = thread_funnel_set(kernel_flock, TRUE);
-}
-
-void lustre_cone_ex(boolean_t state, funnel_t *cone)
-{
- if (cone == network_flock)
- thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
- else if (cone == NULL)
- (void) thread_funnel_set(kernel_flock, state);
-}
-
-void lustre_net_in(boolean_t *state, funnel_t **cone)
-{
- *cone = thread_funnel_get();
- if (*cone == kernel_flock)
- thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
- else if (*cone == NULL)
- *state = thread_funnel_set(network_flock, TRUE);
-}
-
-void lustre_net_ex(boolean_t state, funnel_t *cone)
-{
- if (cone == kernel_flock)
- thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
- else if (cone == NULL)
- (void) thread_funnel_set(network_flock, state);
-}
-#endif /* !__DARWIN8__ */
-
-void cfs_waitq_init(struct cfs_waitq *waitq)
-{
- ksleep_chan_init(&waitq->wq_ksleep_chan);
-}
-
-void cfs_waitlink_init(struct cfs_waitlink *link)
-{
- ksleep_link_init(&link->wl_ksleep_link);
-}
-
-void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link)
-{
- link->wl_waitq = waitq;
- ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link);
-}
-
-void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
- struct cfs_waitlink *link)
-{
- link->wl_waitq = waitq;
- link->wl_ksleep_link.flags |= KSLEEP_EXCLUSIVE;
- ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link);
-}
-
-void cfs_waitq_forward(struct cfs_waitlink *link,
- struct cfs_waitq *waitq)
-{
- link->wl_ksleep_link.forward = &waitq->wq_ksleep_chan;
-}
-
-void cfs_waitq_del(struct cfs_waitq *waitq,
- struct cfs_waitlink *link)
-{
- ksleep_del(&waitq->wq_ksleep_chan, &link->wl_ksleep_link);
-}
-
-int cfs_waitq_active(struct cfs_waitq *waitq)
-{
- return (1);
-}
-
-void cfs_waitq_signal(struct cfs_waitq *waitq)
-{
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
- ksleep_wake(&waitq->wq_ksleep_chan);
-}
-
-void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr)
-{
- ksleep_wake_nr(&waitq->wq_ksleep_chan, nr);
-}
-
-void cfs_waitq_broadcast(struct cfs_waitq *waitq)
-{
- ksleep_wake_all(&waitq->wq_ksleep_chan);
-}
-
-void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state)
-{
- ksleep_wait(&link->wl_waitq->wq_ksleep_chan, state);
-}
-
-cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
- cfs_task_state_t state,
- cfs_duration_t timeout)
-{
- return ksleep_timedwait(&link->wl_waitq->wq_ksleep_chan,
- state, timeout);
-}
-
-typedef void (*ktimer_func_t)(void *);
-void cfs_timer_init(cfs_timer_t *t, void (* func)(unsigned long), void *arg)
-{
- ktimer_init(&t->t, (ktimer_func_t)func, arg);
-}
-
-void cfs_timer_done(struct cfs_timer *t)
-{
- ktimer_done(&t->t);
-}
-
-void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline)
-{
- ktimer_arm(&t->t, deadline);
-}
-
-void cfs_timer_disarm(struct cfs_timer *t)
-{
- ktimer_disarm(&t->t);
-}
-
-int cfs_timer_is_armed(struct cfs_timer *t)
-{
- return ktimer_is_armed(&t->t);
-}
-
-cfs_time_t cfs_timer_deadline(struct cfs_timer *t)
-{
- return ktimer_deadline(&t->t);
-}
-
-void cfs_enter_debugger(void)
-{
-#ifdef __DARWIN8__
- extern void Debugger(const char * reason);
- Debugger("CFS");
-#else
- extern void PE_enter_debugger(char *cause);
- PE_enter_debugger("CFS");
-#endif
-}
-
-int cfs_online_cpus(void)
-{
- int activecpu;
- size_t size;
-
-#ifdef __DARWIN8__
- size = sizeof(int);
- sysctlbyname("hw.activecpu", &activecpu, &size, NULL, 0);
- return activecpu;
-#else
- host_basic_info_data_t hinfo;
- kern_return_t kret;
- int count = HOST_BASIC_INFO_COUNT;
-#define BSD_HOST 1
- kret = host_info(BSD_HOST, HOST_BASIC_INFO, &hinfo, &count);
- if (kret == KERN_SUCCESS)
- return (hinfo.avail_cpus);
- return(-EINVAL);
-#endif
-}
-
-int cfs_ncpus(void)
-{
- int ncpu;
- size_t size;
-
- size = sizeof(int);
-
- sysctlbyname("hw.ncpu", &ncpu, &size, NULL, 0);
- return ncpu;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/systm.h>
-#include <sys/sysctl.h>
-#include <sys/proc.h>
-#include <sys/unistd.h>
-#include <mach/mach_types.h>
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-
-#define LIBCFS_SYSCTL "libcfs"
-#define LIBCFS_SYSCTL_SPRITE "sprite"
-#define LIBCFS_SYSCTL_MAGIC 0xbabeface
-
-static struct libcfs_sysctl_sprite {
- int ss_magic;
- struct sysctl_oid_list *ss_link;
-} libcfs_sysctl_sprite = { 0, NULL };
-
-static cfs_sysctl_table_header_t *libcfs_table_header = NULL;
-extern unsigned int libcfs_debug;
-extern unsigned int libcfs_subsystem_debug;
-extern unsigned int libcfs_printk;
-extern unsigned int libcfs_console_ratelimit;
-extern unsigned int libcfs_catastrophe;
-extern atomic_t libcfs_kmemory;
-
-static int sysctl_debug_kernel SYSCTL_HANDLER_ARGS
-{
-#error "Check me"
- const int maxstr = 1024;
- char *str;
- int error;
-
- if (req->newptr == USER_ADDR_NULL) {
- /* read request */
- return -EINVAL;
- }
-
- /* write request */
- error = trace_allocate_string_buffer(&str, maxstr + 1);
- if (error != 0)
- return error;
-
- error = SYSCTL_IN(req, str, maxstr);
-
- /* NB str guaranteed terminted */
- if (error == 0)
- error = tracefile_dump_all_pages(str);
-
- trace_free_string_buffer(str, maxstr + 1);
- return error;
-}
-
-static int sysctl_daemon_file SYSCTL_HANDLER_ARGS
-{
-#error "Check me"
- int error;
- char *str;
-
- if (req->newptr == USER_ADDR_NULL) {
- /* a read */
- tracefile_read_lock();
-
- /* include terminating '\0' */
- error = SYSCTL_OUT(req, tracefile, strlen(tracefile) + 1);
-
- tracefile_read_unlock();
- return error;
- }
-
- /* write request */
- error = trace_allocate_string_buffer(&str, TRACEFILE_NAME_SIZE);
- if (error != 0)
- return error;
-
- error = SYSCTL_IN(req, str, TRACEFILE_NAME_SIZE - 1);
-
- /* NB str guaranteed terminted */
- if (error == 0)
- error = trace_daemon_command(str);
-
- trace_free_string_buffer(str, TRACEFILE_NAME_SIZE);
- return error;
-}
-
-
-static int sysctl_debug_mb SYSCTL_HANDLER_ARGS
-{
-#error "Check me"
- long mb;
- int error;
-
- if (req->newptr == USER_ADDR_NULL) {
- /* read */
- mb = trace_get_debug_mb();
- error = SYSCTL_OUT(req, &mb, sizeof(mb));
- } else {
- /* write */
- error = SYSCTL_IN(req, &mb, sizeof(mb));
- if (error == 0)
- error = trace_set_debug_mb(mb);
- }
-
- return error;
-}
-
-/*
- * sysctl table for lnet
- */
-
-SYSCTL_NODE (, OID_AUTO, lnet, CTLFLAG_RW,
- 0, "lnet sysctl top");
-
-SYSCTL_INT(_lnet, OID_AUTO, debug,
- CTLTYPE_INT | CTLFLAG_RW , &libcfs_debug,
- 0, "debug");
-SYSCTL_INT(_lnet, OID_AUTO, subsystem_debug,
- CTLTYPE_INT | CTLFLAG_RW, &libcfs_subsystem_debug,
- 0, "subsystem debug");
-SYSCTL_INT(_lnet, OID_AUTO, printk,
- CTLTYPE_INT | CTLFLAG_RW, &libcfs_printk,
- 0, "printk");
-SYSCTL_INT(_lnet, OID_AUTO, console_ratelimit,
- CTLTYPE_INT | CTLFLAG_RW, &libcfs_console_ratelimit,
- 0, "console_ratelimit");
-SYSCTL_STRING(_lnet, OID_AUTO, debug_path,
- CTLTYPE_STRING | CTLFLAG_RW, debug_file_path,
- 1024, "debug path");
-SYSCTL_INT(_lnet, OID_AUTO, memused,
- CTLTYPE_INT | CTLFLAG_RW, (int *)&libcfs_kmemory.counter,
- 0, "memused");
-SYSCTL_INT(_lnet, OID_AUTO, catastrophe,
- CTLTYPE_INT | CTLFLAG_RW, (int *)&libcfs_catastrophe,
- 0, "catastrophe");
-
-#error "check me"
-SYSCTL_PROC(_lnet, OID_AUTO, debug_kernel,
- CTLTYPE_STRING | CTLFLAG_W, 0,
- 0, &sysctl_debug_kernel, "A", "debug_kernel");
-SYSCTL_PROC(_lnet, OID_AUTO, daemon_file,
- CTLTYPE_STRING | CTLFLAG_RW, 0,
- 0, &sysctl_daemon_file, "A", "daemon_file");
-SYSCTL_PROC(_lnet, OID_AUTO, debug_mb,
- CTLTYPE_INT | CTLFLAG_RW, 0,
- 0, &sysctl_debug_mb, "L", "debug_mb");
-
-
-static cfs_sysctl_table_t top_table[] = {
- &sysctl__lnet,
- &sysctl__lnet_debug,
- &sysctl__lnet_subsystem_debug,
- &sysctl__lnet_printk,
- &sysctl__lnet_console_ratelimit,
- &sysctl__lnet_debug_path,
- &sysctl__lnet_memused,
- &sysctl__lnet_catastrophe,
- &sysctl__lnet_debug_kernel,
- &sysctl__lnet_daemon_file,
- &sysctl__lnet_debug_mb,
- NULL
-};
-
-/*
- * Register sysctl table
- */
-cfs_sysctl_table_header_t *
-cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg)
-{
- cfs_sysctl_table_t item;
- int i = 0;
-
- while ((item = table[i++]) != NULL)
- sysctl_register_oid(item);
- return table;
-}
-
-/*
- * Unregister sysctl table
- */
-void
-cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table) {
- int i = 0;
- cfs_sysctl_table_t item;
-
- while ((item = table[i++]) != NULL)
- sysctl_unregister_oid(item);
- return;
-}
-
-/*
- * Allocate a sysctl oid.
- */
-static struct sysctl_oid *
-cfs_alloc_sysctl(struct sysctl_oid_list *parent, int nbr, int access,
- const char *name, void *arg1, int arg2, const char *fmt,
- int (*handler) SYSCTL_HANDLER_ARGS)
-{
- struct sysctl_oid *oid;
- char *sname = NULL;
- char *sfmt = NULL;
-
- if (strlen(name) + 1 > CTL_MAXNAME) {
- printf("libcfs: sysctl name: %s is too long.\n", name);
- return NULL;
- }
- oid = (struct sysctl_oid*)_MALLOC(sizeof(struct sysctl_oid),
- M_TEMP, M_WAITOK | M_ZERO);
- if (oid == NULL)
- return NULL;
-
- sname = (char *)_MALLOC(sizeof(CTL_MAXNAME),
- M_TEMP, M_WAITOK | M_ZERO);
- if (sname == NULL)
- goto error;
- strcpy(sname, name);
-
- sfmt = (char *)_MALLOC(4, M_TEMP, M_WAITOK | M_ZERO);
- if (sfmt == NULL)
- goto error;
- strcpy(sfmt, fmt);
-
- if (parent == NULL)
- oid->oid_parent = &sysctl__children;
- else
- oid->oid_parent = parent;
- oid->oid_number = nbr;
- oid->oid_kind = access;
- oid->oid_name = sname;
- oid->oid_handler = handler;
- oid->oid_fmt = sfmt;
-
- if ((access & CTLTYPE) == CTLTYPE_NODE){
- /* It's a sysctl node */
- struct sysctl_oid_list *link;
-
- link = (struct sysctl_oid_list *)_MALLOC(sizeof(struct sysctl_oid_list),
- M_TEMP, M_WAITOK | M_ZERO);
- if (link == NULL)
- goto error;
- oid->oid_arg1 = link;
- oid->oid_arg2 = 0;
- } else {
- oid->oid_arg1 = arg1;
- oid->oid_arg2 = arg2;
- }
-
- return oid;
-error:
- if (sfmt != NULL)
- _FREE(sfmt, M_TEMP);
- if (sname != NULL)
- _FREE(sname, M_TEMP);
- if (oid != NULL)
- _FREE(oid, M_TEMP);
- return NULL;
-}
-
-void cfs_free_sysctl(struct sysctl_oid *oid)
-{
- if (oid->oid_name != NULL)
- _FREE((void *)oid->oid_name, M_TEMP);
- if (oid->oid_fmt != NULL)
- _FREE((void *)oid->oid_fmt, M_TEMP);
- if ((oid->oid_kind & CTLTYPE_NODE != 0) && oid->oid_arg1)
- /* XXX Liang: need to assert the list is empty */
- _FREE(oid->oid_arg1, M_TEMP);
- _FREE(oid, M_TEMP);
-}
-
-#define CFS_SYSCTL_ISVALID ((libcfs_sysctl_sprite.ss_magic == LIBCFS_SYSCTL_MAGIC) && \
- (libcfs_sysctl_sprite.ss_link != NULL))
-
-int
-cfs_sysctl_isvalid(void)
-{
- return CFS_SYSCTL_ISVALID;
-}
-
-struct sysctl_oid *
-cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access,
- const char *name, int (*handler) SYSCTL_HANDLER_ARGS)
-{
- if (parent == NULL && CFS_SYSCTL_ISVALID)
- parent = libcfs_sysctl_sprite.ss_link;
- return cfs_alloc_sysctl(parent, nbr, CTLTYPE_NODE | access, name,
- NULL, 0, "N", handler);
-}
-
-struct sysctl_oid *
-cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int nbr, int access,
- const char *name, int *ptr, int val)
-{
- if (parent == NULL && CFS_SYSCTL_ISVALID)
- parent = libcfs_sysctl_sprite.ss_link;
- return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name,
- ptr, val, "I", sysctl_handle_int);
-}
-
-struct sysctl_oid *
-cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access,
- const char *name, int *ptr, int val)
-{
- if (parent == NULL && CFS_SYSCTL_ISVALID)
- parent = libcfs_sysctl_sprite.ss_link;
- return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name,
- ptr, val, "L", sysctl_handle_long);
-}
-
-struct sysctl_oid *
-cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access,
- const char *name, char *ptr, int len)
-{
- if (parent == NULL && CFS_SYSCTL_ISVALID)
- parent = libcfs_sysctl_sprite.ss_link;
- return cfs_alloc_sysctl(parent, nbr, CTLTYPE_STRING | access, name,
- ptr, len, "A", sysctl_handle_string);
-}
-
-struct sysctl_oid *
-cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access,
- const char *name, void *ptr, int size)
-{
- if (parent == NULL && CFS_SYSCTL_ISVALID)
- parent = libcfs_sysctl_sprite.ss_link;
- return cfs_alloc_sysctl(parent, nbr, CTLTYPE_OPAQUE | access, name,
- ptr, size, "S", sysctl_handle_opaque);
-}
-
-/* no proc in osx */
-cfs_proc_dir_entry_t *
-cfs_create_proc_entry(char *name, int mod, cfs_proc_dir_entry_t *parent)
-{
- cfs_proc_dir_entry_t *entry;
- MALLOC(entry, cfs_proc_dir_entry_t *, sizeof(cfs_proc_dir_entry_t), M_TEMP, M_WAITOK|M_ZERO);
-
- return entry;
-}
-
-void
-cfs_free_proc_entry(cfs_proc_dir_entry_t *de){
- FREE(de, M_TEMP);
- return;
-};
-
-void
-cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry)
-{
- cfs_free_proc_entry(entry);
- return;
-}
-
-int
-insert_proc(void)
-{
-#if 1
- if (!libcfs_table_header)
- libcfs_table_header = cfs_register_sysctl_table(top_table, 0);
-#endif
- return 0;
-}
-
-void
-remove_proc(void)
-{
-#if 1
- if (libcfs_table_header != NULL)
- cfs_unregister_sysctl_table(libcfs_table_header);
- libcfs_table_header = NULL;
-#endif
- return;
-}
-
-int
-cfs_sysctl_init(void)
-{
- struct sysctl_oid *oid_root;
- struct sysctl_oid *oid_sprite;
- struct libcfs_sysctl_sprite *sprite;
- size_t len;
- int rc;
-
- len = sizeof(struct libcfs_sysctl_sprite);
- rc = sysctlbyname("libcfs.sprite",
- (void *)&libcfs_sysctl_sprite, &len, NULL, 0);
- if (rc == 0) {
- /*
- * XXX Liang: assert (rc == 0 || rc == ENOENT)
- *
- * libcfs.sprite has been registered by previous
- * loading of libcfs
- */
- if (libcfs_sysctl_sprite.ss_magic != LIBCFS_SYSCTL_MAGIC) {
- printf("libcfs: magic number of libcfs.sprite "
- "is not right (%lx, %lx)\n",
- libcfs_sysctl_sprite.ss_magic,
- LIBCFS_SYSCTL_MAGIC);
- return -1;
- }
- assert(libcfs_sysctl_sprite.ss_link != NULL);
- printf("libcfs: registered libcfs.sprite found.\n");
- return 0;
- }
- oid_root = cfs_alloc_sysctl_node(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN,
- LIBCFS_SYSCTL, 0);
- if (oid_root == NULL)
- return -1;
- sysctl_register_oid(oid_root);
-
- sprite = (struct libcfs_sysctl_sprite *)_MALLOC(sizeof(struct libcfs_sysctl_sprite),
- M_TEMP, M_WAITOK | M_ZERO);
- if (sprite == NULL) {
- sysctl_unregister_oid(oid_root);
- cfs_free_sysctl(oid_root);
- return -1;
- }
- sprite->ss_magic = LIBCFS_SYSCTL_MAGIC;
- sprite->ss_link = (struct sysctl_oid_list *)oid_root->oid_arg1;
- oid_sprite = cfs_alloc_sysctl_struct((struct sysctl_oid_list *)oid_root->oid_arg1,
- OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN,
- LIBCFS_SYSCTL_SPRITE, sprite,
- sizeof(struct libcfs_sysctl_sprite));
- if (oid_sprite == NULL) {
- cfs_free_sysctl(oid_sprite);
- sysctl_unregister_oid(oid_root);
- cfs_free_sysctl(oid_root);
- return -1;
- }
- sysctl_register_oid(oid_sprite);
-
- libcfs_sysctl_sprite.ss_magic = sprite->ss_magic;
- libcfs_sysctl_sprite.ss_link = sprite->ss_link;
-
- return 0;
-}
-
-void
-cfs_sysctl_fini(void)
-{
- libcfs_sysctl_sprite.ss_magic = 0;
- libcfs_sysctl_sprite.ss_link = NULL;
-}
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Lustre Light Super operations
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-/*
- * xnu_sync.c
- *
- * Created by nikita on Sun Jul 18 2004.
- *
- * XNU synchronization primitives.
- */
-
-/*
- * This file contains very simplistic implementations of (saner) API for
- * basic synchronization primitives:
- *
- * - spin-lock (kspin)
- *
- * - semaphore (ksem)
- *
- * - mutex (kmut)
- *
- * - condition variable (kcond)
- *
- * - wait-queue (ksleep_chan and ksleep_link)
- *
- * - timer (ktimer)
- *
- * A lot can be optimized here.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#ifdef __DARWIN8__
-# include <kern/locks.h>
-#else
-# include <mach/mach_types.h>
-# include <sys/types.h>
-# include <kern/simple_lock.h>
-#endif
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-#define SLASSERT(e) ON_SYNC_DEBUG(LASSERT(e))
-
-#ifdef HAVE_GET_PREEMPTION_LEVEL
-extern int get_preemption_level(void);
-#else
-#define get_preemption_level() (0)
-#endif
-
-#if SMP
-#ifdef __DARWIN8__
-
-static lck_grp_t *cfs_lock_grp = NULL;
-#warning "Verify definition of lck_spin_t hasn't been changed while building!"
-
-/* hw_lock_* are not exported by Darwin8 */
-static inline void xnu_spin_init(xnu_spin_t *s)
-{
- SLASSERT(cfs_lock_grp != NULL);
- //*s = lck_spin_alloc_init(cfs_lock_grp, LCK_ATTR_NULL);
- lck_spin_init((lck_spin_t *)s, cfs_lock_grp, LCK_ATTR_NULL);
-}
-
-static inline void xnu_spin_done(xnu_spin_t *s)
-{
- SLASSERT(cfs_lock_grp != NULL);
- //lck_spin_free(*s, cfs_lock_grp);
- //*s = NULL;
- lck_spin_destroy((lck_spin_t *)s, cfs_lock_grp);
-}
-
-#define xnu_spin_lock(s) lck_spin_lock((lck_spin_t *)(s))
-#define xnu_spin_unlock(s) lck_spin_unlock((lck_spin_t *)(s))
-
-#warning "Darwin8 does not export lck_spin_try_lock"
-#define xnu_spin_try(s) (1)
-
-#else /* DARWIN8 */
-extern void hw_lock_init(hw_lock_t);
-extern void hw_lock_lock(hw_lock_t);
-extern void hw_lock_unlock(hw_lock_t);
-extern unsigned int hw_lock_to(hw_lock_t, unsigned int);
-extern unsigned int hw_lock_try(hw_lock_t);
-extern unsigned int hw_lock_held(hw_lock_t);
-
-#define xnu_spin_init(s) hw_lock_init(s)
-#define xnu_spin_done(s) do {} while (0)
-#define xnu_spin_lock(s) hw_lock_lock(s)
-#define xnu_spin_unlock(s) hw_lock_unlock(s)
-#define xnu_spin_try(s) hw_lock_try(s)
-#endif /* DARWIN8 */
-
-#else /* SMP */
-#define xnu_spin_init(s) do {} while (0)
-#define xnu_spin_done(s) do {} while (0)
-#define xnu_spin_lock(s) do {} while (0)
-#define xnu_spin_unlock(s) do {} while (0)
-#define xnu_spin_try(s) (1)
-#endif /* SMP */
-
-/*
- * Warning: low level libcfs debugging code (libcfs_debug_msg(), for
- * example), uses spin-locks, so debugging output here may lead to nasty
- * surprises.
- *
- * In uniprocessor version of spin-lock. Only checks.
- */
-
-void kspin_init(struct kspin *spin)
-{
- SLASSERT(spin != NULL);
- xnu_spin_init(&spin->lock);
- ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC);
- ON_SYNC_DEBUG(spin->owner = NULL);
-}
-
-void kspin_done(struct kspin *spin)
-{
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
- SLASSERT(spin->owner == NULL);
- xnu_spin_done(&spin->lock);
-}
-
-void kspin_lock(struct kspin *spin)
-{
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
- SLASSERT(spin->owner != current_thread());
-
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- xnu_spin_lock(&spin->lock);
- SLASSERT(spin->owner == NULL);
- ON_SYNC_DEBUG(spin->owner = current_thread());
-}
-
-void kspin_unlock(struct kspin *spin)
-{
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
- SLASSERT(spin->owner == current_thread());
- ON_SYNC_DEBUG(spin->owner = NULL);
- xnu_spin_unlock(&spin->lock);
-}
-
-int kspin_trylock(struct kspin *spin)
-{
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
-
- if (xnu_spin_try(&spin->lock)) {
- SLASSERT(spin->owner == NULL);
- ON_SYNC_DEBUG(spin->owner = current_thread());
- return 1;
- } else
- return 0;
-}
-
-#if XNU_SYNC_DEBUG
-int kspin_islocked(struct kspin *spin)
-{
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
- return spin->owner == current_thread();
-}
-
-int kspin_isnotlocked(struct kspin *spin)
-{
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
- return spin->owner != current_thread();
-}
-#endif
-
-/*
- * read/write spin-lock
- */
-void krw_spin_init(struct krw_spin *rwspin)
-{
- SLASSERT(rwspin != NULL);
-
- kspin_init(&rwspin->guard);
- rwspin->count = 0;
- ON_SYNC_DEBUG(rwspin->magic = KRW_SPIN_MAGIC);
-}
-
-void krw_spin_done(struct krw_spin *rwspin)
-{
- SLASSERT(rwspin != NULL);
- SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
- SLASSERT(rwspin->count == 0);
- kspin_done(&rwspin->guard);
-}
-
-void krw_spin_down_r(struct krw_spin *rwspin)
-{
- int i;
- SLASSERT(rwspin != NULL);
- SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
-
- kspin_lock(&rwspin->guard);
- while(rwspin->count < 0) {
- i = -1;
- kspin_unlock(&rwspin->guard);
- while (--i != 0 && rwspin->count < 0)
- continue;
- kspin_lock(&rwspin->guard);
- }
- ++ rwspin->count;
- kspin_unlock(&rwspin->guard);
-}
-
-void krw_spin_down_w(struct krw_spin *rwspin)
-{
- int i;
- SLASSERT(rwspin != NULL);
- SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
-
- kspin_lock(&rwspin->guard);
- while (rwspin->count != 0) {
- i = -1;
- kspin_unlock(&rwspin->guard);
- while (--i != 0 && rwspin->count != 0)
- continue;
- kspin_lock(&rwspin->guard);
- }
- rwspin->count = -1;
- kspin_unlock(&rwspin->guard);
-}
-
-void krw_spin_up_r(struct krw_spin *rwspin)
-{
- SLASSERT(rwspin != NULL);
- SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
- SLASSERT(rwspin->count > 0);
-
- kspin_lock(&rwspin->guard);
- -- rwspin->count;
- kspin_unlock(&rwspin->guard);
-}
-
-void krw_spin_up_w(struct krw_spin *rwspin)
-{
- SLASSERT(rwspin != NULL);
- SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
- SLASSERT(rwspin->count == -1);
-
- kspin_lock(&rwspin->guard);
- rwspin->count = 0;
- kspin_unlock(&rwspin->guard);
-}
-
-/*
- * semaphore
- */
-#ifdef __DARWIN8__
-
-#define xnu_waitq_init(q, a) do {} while (0)
-#define xnu_waitq_done(q) do {} while (0)
-#define xnu_waitq_wakeup_one(q, e, s) ({wakeup_one((void *)(e)); KERN_SUCCESS;})
-#define xnu_waitq_wakeup_all(q, e, s) ({wakeup((void *)(e)); KERN_SUCCESS;})
-#define xnu_waitq_assert_wait(q, e, s) assert_wait((e), s)
-
-#else /* DARWIN8 */
-
-#define xnu_waitq_init(q, a) wait_queue_init((q), a)
-#define xnu_waitq_done(q) do {} while (0)
-#define xnu_waitq_wakeup_one(q, e, s) wait_queue_wakeup_one((q), (event_t)(e), s)
-#define xnu_waitq_wakeup_all(q, e, s) wait_queue_wakeup_all((q), (event_t)(e), s)
-#define xnu_waitq_assert_wait(q, e, s) wait_queue_assert_wait((q), (event_t)(e), s)
-
-#endif /* DARWIN8 */
-void ksem_init(struct ksem *sem, int value)
-{
- SLASSERT(sem != NULL);
- kspin_init(&sem->guard);
- xnu_waitq_init(&sem->q, SYNC_POLICY_FIFO);
- sem->value = value;
- ON_SYNC_DEBUG(sem->magic = KSEM_MAGIC);
-}
-
-void ksem_done(struct ksem *sem)
-{
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KSEM_MAGIC);
- /*
- * XXX nikita: cannot check that &sem->q is empty because
- * wait_queue_empty() is Apple private API.
- */
- kspin_done(&sem->guard);
-}
-
-int ksem_up(struct ksem *sem, int value)
-{
- int result;
-
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KSEM_MAGIC);
- SLASSERT(value >= 0);
-
- kspin_lock(&sem->guard);
- sem->value += value;
- if (sem->value == 0)
- result = xnu_waitq_wakeup_one(&sem->q, sem,
- THREAD_AWAKENED);
- else
- result = xnu_waitq_wakeup_all(&sem->q, sem,
- THREAD_AWAKENED);
- kspin_unlock(&sem->guard);
- SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING);
- return (result == KERN_SUCCESS) ? 0 : 1;
-}
-
-void ksem_down(struct ksem *sem, int value)
-{
- int result;
-
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KSEM_MAGIC);
- SLASSERT(value >= 0);
- SLASSERT(get_preemption_level() == 0);
-
- kspin_lock(&sem->guard);
- while (sem->value < value) {
- result = xnu_waitq_assert_wait(&sem->q, sem,
- THREAD_UNINT);
- SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
- kspin_unlock(&sem->guard);
- if (result == THREAD_WAITING)
- thread_block(THREAD_CONTINUE_NULL);
- kspin_lock(&sem->guard);
- }
- sem->value -= value;
- kspin_unlock(&sem->guard);
-}
-
-int ksem_trydown(struct ksem *sem, int value)
-{
- int result;
-
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KSEM_MAGIC);
- SLASSERT(value >= 0);
-
- kspin_lock(&sem->guard);
- if (sem->value >= value) {
- sem->value -= value;
- result = 0;
- } else
- result = -EBUSY;
- kspin_unlock(&sem->guard);
- return result;
-}
-
-void kmut_init(struct kmut *mut)
-{
- SLASSERT(mut != NULL);
- ksem_init(&mut->s, 1);
- ON_SYNC_DEBUG(mut->magic = KMUT_MAGIC);
- ON_SYNC_DEBUG(mut->owner = NULL);
-}
-
-void kmut_done(struct kmut *mut)
-{
- SLASSERT(mut != NULL);
- SLASSERT(mut->magic == KMUT_MAGIC);
- SLASSERT(mut->owner == NULL);
- ksem_done(&mut->s);
-}
-
-void kmut_lock(struct kmut *mut)
-{
- SLASSERT(mut != NULL);
- SLASSERT(mut->magic == KMUT_MAGIC);
- SLASSERT(mut->owner != current_thread());
- SLASSERT(get_preemption_level() == 0);
-
- ksem_down(&mut->s, 1);
- ON_SYNC_DEBUG(mut->owner = current_thread());
-}
-
-void kmut_unlock(struct kmut *mut)
-{
- SLASSERT(mut != NULL);
- SLASSERT(mut->magic == KMUT_MAGIC);
- SLASSERT(mut->owner == current_thread());
-
- ON_SYNC_DEBUG(mut->owner = NULL);
- ksem_up(&mut->s, 1);
-}
-
-int kmut_trylock(struct kmut *mut)
-{
- SLASSERT(mut != NULL);
- SLASSERT(mut->magic == KMUT_MAGIC);
- return ksem_trydown(&mut->s, 1);
-}
-
-#if XNU_SYNC_DEBUG
-int kmut_islocked(struct kmut *mut)
-{
- SLASSERT(mut != NULL);
- SLASSERT(mut->magic == KMUT_MAGIC);
- return mut->owner == current_thread();
-}
-
-int kmut_isnotlocked(struct kmut *mut)
-{
- SLASSERT(mut != NULL);
- SLASSERT(mut->magic == KMUT_MAGIC);
- return mut->owner != current_thread();
-}
-#endif
-
-
-void kcond_init(struct kcond *cond)
-{
- SLASSERT(cond != NULL);
-
- kspin_init(&cond->guard);
- cond->waiters = NULL;
- ON_SYNC_DEBUG(cond->magic = KCOND_MAGIC);
-}
-
-void kcond_done(struct kcond *cond)
-{
- SLASSERT(cond != NULL);
- SLASSERT(cond->magic == KCOND_MAGIC);
- SLASSERT(cond->waiters == NULL);
- kspin_done(&cond->guard);
-}
-
-void kcond_wait(struct kcond *cond, struct kspin *lock)
-{
- struct kcond_link link;
-
- SLASSERT(cond != NULL);
- SLASSERT(lock != NULL);
- SLASSERT(cond->magic == KCOND_MAGIC);
- SLASSERT(kspin_islocked(lock));
-
- ksem_init(&link.sem, 0);
- kspin_lock(&cond->guard);
- link.next = cond->waiters;
- cond->waiters = &link;
- kspin_unlock(&cond->guard);
- kspin_unlock(lock);
-
- ksem_down(&link.sem, 1);
-
- kspin_lock(&cond->guard);
- kspin_unlock(&cond->guard);
- kspin_lock(lock);
-}
-
-void kcond_wait_guard(struct kcond *cond)
-{
- struct kcond_link link;
-
- SLASSERT(cond != NULL);
- SLASSERT(cond->magic == KCOND_MAGIC);
- SLASSERT(kspin_islocked(&cond->guard));
-
- ksem_init(&link.sem, 0);
- link.next = cond->waiters;
- cond->waiters = &link;
- kspin_unlock(&cond->guard);
-
- ksem_down(&link.sem, 1);
-
- kspin_lock(&cond->guard);
-}
-
-void kcond_signal_guard(struct kcond *cond)
-{
- struct kcond_link *link;
-
- SLASSERT(cond != NULL);
- SLASSERT(cond->magic == KCOND_MAGIC);
- SLASSERT(kspin_islocked(&cond->guard));
-
- link = cond->waiters;
- if (link != NULL) {
- cond->waiters = link->next;
- ksem_up(&link->sem, 1);
- }
-}
-
-void kcond_signal(struct kcond *cond)
-{
- SLASSERT(cond != NULL);
- SLASSERT(cond->magic == KCOND_MAGIC);
-
- kspin_lock(&cond->guard);
- kcond_signal_guard(cond);
- kspin_unlock(&cond->guard);
-}
-
-void kcond_broadcast_guard(struct kcond *cond)
-{
- struct kcond_link *link;
-
- SLASSERT(cond != NULL);
- SLASSERT(cond->magic == KCOND_MAGIC);
- SLASSERT(kspin_islocked(&cond->guard));
-
- for (link = cond->waiters; link != NULL; link = link->next)
- ksem_up(&link->sem, 1);
- cond->waiters = NULL;
-}
-
-void kcond_broadcast(struct kcond *cond)
-{
- SLASSERT(cond != NULL);
- SLASSERT(cond->magic == KCOND_MAGIC);
-
- kspin_lock(&cond->guard);
- kcond_broadcast_guard(cond);
- kspin_unlock(&cond->guard);
-}
-
-void krw_sem_init(struct krw_sem *sem)
-{
- SLASSERT(sem != NULL);
-
- kcond_init(&sem->cond);
- sem->count = 0;
- ON_SYNC_DEBUG(sem->magic = KRW_MAGIC);
-}
-
-void krw_sem_done(struct krw_sem *sem)
-{
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KRW_MAGIC);
- SLASSERT(sem->count == 0);
- kcond_done(&sem->cond);
-}
-
-void krw_sem_down_r(struct krw_sem *sem)
-{
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KRW_MAGIC);
- SLASSERT(get_preemption_level() == 0);
-
- kspin_lock(&sem->cond.guard);
- while (sem->count < 0)
- kcond_wait_guard(&sem->cond);
- ++ sem->count;
- kspin_unlock(&sem->cond.guard);
-}
-
-int krw_sem_down_r_try(struct krw_sem *sem)
-{
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KRW_MAGIC);
-
- kspin_lock(&sem->cond.guard);
- if (sem->count < 0) {
- kspin_unlock(&sem->cond.guard);
- return -EBUSY;
- }
- ++ sem->count;
- kspin_unlock(&sem->cond.guard);
- return 0;
-}
-
-void krw_sem_down_w(struct krw_sem *sem)
-{
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KRW_MAGIC);
- SLASSERT(get_preemption_level() == 0);
-
- kspin_lock(&sem->cond.guard);
- while (sem->count != 0)
- kcond_wait_guard(&sem->cond);
- sem->count = -1;
- kspin_unlock(&sem->cond.guard);
-}
-
-int krw_sem_down_w_try(struct krw_sem *sem)
-{
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KRW_MAGIC);
-
- kspin_lock(&sem->cond.guard);
- if (sem->count != 0) {
- kspin_unlock(&sem->cond.guard);
- return -EBUSY;
- }
- sem->count = -1;
- kspin_unlock(&sem->cond.guard);
- return 0;
-}
-
-void krw_sem_up_r(struct krw_sem *sem)
-{
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KRW_MAGIC);
- SLASSERT(sem->count > 0);
-
- kspin_lock(&sem->cond.guard);
- -- sem->count;
- if (sem->count == 0)
- kcond_broadcast_guard(&sem->cond);
- kspin_unlock(&sem->cond.guard);
-}
-
-void krw_sem_up_w(struct krw_sem *sem)
-{
- SLASSERT(sem != NULL);
- SLASSERT(sem->magic == KRW_MAGIC);
- SLASSERT(sem->count == -1);
-
- kspin_lock(&sem->cond.guard);
- sem->count = 0;
- kspin_unlock(&sem->cond.guard);
- kcond_broadcast(&sem->cond);
-}
-
-void ksleep_chan_init(struct ksleep_chan *chan)
-{
- SLASSERT(chan != NULL);
-
- kspin_init(&chan->guard);
- CFS_INIT_LIST_HEAD(&chan->waiters);
- ON_SYNC_DEBUG(chan->magic = KSLEEP_CHAN_MAGIC);
-}
-
-void ksleep_chan_done(struct ksleep_chan *chan)
-{
- SLASSERT(chan != NULL);
- SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
- SLASSERT(list_empty(&chan->waiters));
- kspin_done(&chan->guard);
-}
-
-void ksleep_link_init(struct ksleep_link *link)
-{
- SLASSERT(link != NULL);
-
- CFS_INIT_LIST_HEAD(&link->linkage);
- link->flags = 0;
- link->event = current_thread();
- link->hits = 0;
- link->forward = NULL;
- ON_SYNC_DEBUG(link->magic = KSLEEP_LINK_MAGIC);
-}
-
-void ksleep_link_done(struct ksleep_link *link)
-{
- SLASSERT(link != NULL);
- SLASSERT(link->magic == KSLEEP_LINK_MAGIC);
- SLASSERT(list_empty(&link->linkage));
-}
-
-void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link)
-{
- SLASSERT(chan != NULL);
- SLASSERT(link != NULL);
- SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
- SLASSERT(link->magic == KSLEEP_LINK_MAGIC);
- SLASSERT(list_empty(&link->linkage));
-
- kspin_lock(&chan->guard);
- if (link->flags & KSLEEP_EXCLUSIVE)
- list_add_tail(&link->linkage, &chan->waiters);
- else
- list_add(&link->linkage, &chan->waiters);
- kspin_unlock(&chan->guard);
-}
-
-void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link)
-{
- SLASSERT(chan != NULL);
- SLASSERT(link != NULL);
- SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
- SLASSERT(link->magic == KSLEEP_LINK_MAGIC);
-
- kspin_lock(&chan->guard);
- list_del_init(&link->linkage);
- kspin_unlock(&chan->guard);
-}
-
-static int has_hits(struct ksleep_chan *chan, event_t event)
-{
- struct ksleep_link *scan;
-
- SLASSERT(kspin_islocked(&chan->guard));
- list_for_each_entry(scan, &chan->waiters, linkage) {
- if (scan->event == event && scan->hits > 0) {
- /* consume hit */
- -- scan->hits;
- return 1;
- }
- }
- return 0;
-}
-
-static void add_hit(struct ksleep_chan *chan, event_t event)
-{
- struct ksleep_link *scan;
-
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- SLASSERT(kspin_islocked(&chan->guard));
- list_for_each_entry(scan, &chan->waiters, linkage) {
- if (scan->event == event) {
- ++ scan->hits;
- break;
- }
- }
-}
-
-void ksleep_wait(struct ksleep_chan *chan, cfs_task_state_t state)
-{
- event_t event;
- int result;
-
- ENTRY;
-
- SLASSERT(chan != NULL);
- SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
- SLASSERT(get_preemption_level() == 0);
-
- event = current_thread();
- kspin_lock(&chan->guard);
- if (!has_hits(chan, event)) {
- result = assert_wait(event, state);
- kspin_unlock(&chan->guard);
- SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
- if (result == THREAD_WAITING)
- thread_block(THREAD_CONTINUE_NULL);
- } else
- kspin_unlock(&chan->guard);
- EXIT;
-}
-
-/*
- * Sleep on @chan for no longer than @timeout nano-seconds. Return remaining
- * sleep time (non-zero only if thread was waken by a signal (not currently
- * implemented), or waitq was already in the "signalled" state).
- */
-int64_t ksleep_timedwait(struct ksleep_chan *chan,
- cfs_task_state_t state,
- __u64 timeout)
-{
- event_t event;
-
- ENTRY;
-
- SLASSERT(chan != NULL);
- SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
- SLASSERT(get_preemption_level() == 0);
-
- event = current_thread();
- kspin_lock(&chan->guard);
- if (!has_hits(chan, event)) {
- int result;
- __u64 expire;
- result = assert_wait(event, state);
- if (timeout > 0) {
- /*
- * arm a timer. thread_set_timer()'s first argument is
- * uint32_t, so we have to cook deadline ourselves.
- */
- nanoseconds_to_absolutetime(timeout, &expire);
- clock_absolutetime_interval_to_deadline(expire, &expire);
- thread_set_timer_deadline(expire);
- }
- kspin_unlock(&chan->guard);
- SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
- if (result == THREAD_WAITING)
- result = thread_block(THREAD_CONTINUE_NULL);
- thread_cancel_timer();
-
- if (result == THREAD_TIMED_OUT)
- timeout = 0;
- else {
- __u64 now;
- clock_get_uptime(&now);
- if (expire > now)
- absolutetime_to_nanoseconds(expire - now, &timeout);
- else
- timeout = 0;
- }
- } else {
- /* just return timeout, because I've got event and don't need to wait */
- kspin_unlock(&chan->guard);
- }
-
- RETURN(timeout);
-}
-
-/*
- * wake up single exclusive waiter (plus some arbitrary number of *
- * non-exclusive)
- */
-void ksleep_wake(struct ksleep_chan *chan)
-{
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
- ksleep_wake_nr(chan, 1);
-}
-
-/*
- * wake up all waiters on @chan
- */
-void ksleep_wake_all(struct ksleep_chan *chan)
-{
- ENTRY;
- ksleep_wake_nr(chan, 0);
- EXIT;
-}
-
-/*
- * wakeup no more than @nr exclusive waiters from @chan, plus some arbitrary
- * number of non-exclusive. If @nr is 0, wake up all waiters.
- */
-void ksleep_wake_nr(struct ksleep_chan *chan, int nr)
-{
- struct ksleep_link *scan;
- int result;
-
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- SLASSERT(chan != NULL);
- SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
-
- kspin_lock(&chan->guard);
- list_for_each_entry(scan, &chan->waiters, linkage) {
- struct ksleep_chan *forward;
-
- forward = scan->forward;
- if (forward != NULL)
- kspin_lock(&forward->guard);
- result = thread_wakeup(scan->event);
- SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING);
- if (result == KERN_NOT_WAITING) {
- ++ scan->hits;
- if (forward != NULL)
- add_hit(forward, scan->event);
- }
- if (forward != NULL)
- kspin_unlock(&forward->guard);
- if ((scan->flags & KSLEEP_EXCLUSIVE) && --nr == 0)
- break;
- }
- kspin_unlock(&chan->guard);
-}
-
-void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg)
-{
- SLASSERT(t != NULL);
- SLASSERT(func != NULL);
-
- kspin_init(&t->guard);
- t->func = func;
- t->arg = arg;
- ON_SYNC_DEBUG(t->magic = KTIMER_MAGIC);
-}
-
-void ktimer_done(struct ktimer *t)
-{
- SLASSERT(t != NULL);
- SLASSERT(t->magic == KTIMER_MAGIC);
- kspin_done(&t->guard);
- ON_SYNC_DEBUG(t->magic = 0);
-}
-
-static void ktimer_actor(void *arg0, void *arg1)
-{
- struct ktimer *t;
- int armed;
-
- t = arg0;
- /*
- * this assumes that ktimer's are never freed.
- */
- SLASSERT(t != NULL);
- SLASSERT(t->magic == KTIMER_MAGIC);
-
- /*
- * call actual timer function
- */
- kspin_lock(&t->guard);
- armed = t->armed;
- t->armed = 0;
- kspin_unlock(&t->guard);
-
- if (armed)
- t->func(t->arg);
-}
-
-extern boolean_t thread_call_func_cancel(thread_call_func_t, thread_call_param_t, boolean_t);
-extern void thread_call_func_delayed(thread_call_func_t, thread_call_param_t, __u64);
-
-static void ktimer_disarm_locked(struct ktimer *t)
-{
- SLASSERT(t != NULL);
- SLASSERT(t->magic == KTIMER_MAGIC);
-
- thread_call_func_cancel(ktimer_actor, t, FALSE);
-}
-
-/*
- * Received deadline is nanoseconds, but time checked by
- * thread_call is absolute time (The abstime unit is equal to
- * the length of one bus cycle, so the duration is dependent
- * on the bus speed of the computer), so we need to convert
- * nanotime to abstime by nanoseconds_to_absolutetime().
- *
- * Refer to _delayed_call_timer(...)
- *
- * if thread_call_func_delayed is not exported in the future,
- * we can use timeout() or bsd_timeout() to replace it.
- */
-void ktimer_arm(struct ktimer *t, u_int64_t deadline)
-{
- cfs_time_t abstime;
- SLASSERT(t != NULL);
- SLASSERT(t->magic == KTIMER_MAGIC);
-
- kspin_lock(&t->guard);
- ktimer_disarm_locked(t);
- t->armed = 1;
- nanoseconds_to_absolutetime(deadline, &abstime);
- thread_call_func_delayed(ktimer_actor, t, deadline);
- kspin_unlock(&t->guard);
-}
-
-void ktimer_disarm(struct ktimer *t)
-{
- SLASSERT(t != NULL);
- SLASSERT(t->magic == KTIMER_MAGIC);
-
- kspin_lock(&t->guard);
- t->armed = 0;
- ktimer_disarm_locked(t);
- kspin_unlock(&t->guard);
-}
-
-int ktimer_is_armed(struct ktimer *t)
-{
- SLASSERT(t != NULL);
- SLASSERT(t->magic == KTIMER_MAGIC);
-
- /*
- * no locking---result is only a hint anyway.
- */
- return t->armed;
-}
-
-u_int64_t ktimer_deadline(struct ktimer *t)
-{
- SLASSERT(t != NULL);
- SLASSERT(t->magic == KTIMER_MAGIC);
-
- return t->deadline;
-}
-
-void cfs_sync_init(void)
-{
-#ifdef __DARWIN8__
- /* Initialize lock group */
- cfs_lock_grp = lck_grp_alloc_init("libcfs sync", LCK_GRP_ATTR_NULL);
-#endif
-}
-
-void cfs_sync_fini(void)
-{
-#ifdef __DARWIN8__
- /*
- * XXX Liang: destroy lock group. As we haven't called lock_done
- * for all locks, cfs_lock_grp may not be freed by kernel(reference
- * count > 1).
- */
- lck_grp_free(cfs_lock_grp);
- cfs_lock_grp = NULL;
-#endif
-}
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Darwin porting library
- * Make things easy to port
- */
-
-#include <mach/mach_types.h>
-#include <sys/file.h>
-#include <sys/mount.h>
-#include <string.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/sockio.h>
-#include <sys/protosw.h>
-#include <net/if.h>
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-static __inline__ struct sockaddr_in
-blank_sin()
-{
- struct sockaddr_in blank = { sizeof(struct sockaddr_in), AF_INET };
- return (blank);
-}
-
-void
-libcfs_ipif_free_enumeration (char **names, int n)
-{
- int i;
-
- LASSERT (n > 0);
-
- for (i = 0; i < n && names[i] != NULL; i++)
- LIBCFS_FREE(names[i], IFNAMSIZ);
-
- LIBCFS_FREE(names, n * sizeof(*names));
-}
-
-#ifdef __DARWIN8__
-/*
- * Darwin 8.x
- *
- * No hack kernel structre, all using KPI.
- */
-
-int
-libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask)
-{
- struct ifreq ifr;
- socket_t so;
- __u32 val;
- int nob;
- int rc;
-
- rc = -sock_socket(PF_INET, SOCK_STREAM, 0,
- NULL, NULL, &so);
- if (rc != 0) {
- CERROR ("Can't create socket: %d\n", rc);
- return rc;
- }
-
- nob = strnlen(name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- CERROR("Interface name %s too long\n", name);
- rc = -EINVAL;
- goto out;
- }
-
- CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
- bzero(&ifr, sizeof(ifr));
- strcpy(ifr.ifr_name, name);
- rc = -sock_ioctl (so, SIOCGIFFLAGS, &ifr);
-
- if (rc != 0) {
- CERROR("Can't get flags for interface %s\n", name);
- goto out;
- }
-
- if ((ifr.ifr_flags & IFF_UP) == 0) {
- CDEBUG(D_NET, "Interface %s down\n", name);
- *up = 0;
- *ip = *mask = 0;
- goto out;
- }
-
- *up = 1;
-
- bzero(&ifr, sizeof(ifr));
- strcpy(ifr.ifr_name, name);
- *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
- rc = -sock_ioctl(so, SIOCGIFADDR, &ifr);
-
- if (rc != 0) {
- CERROR("Can't get IP address for interface %s\n", name);
- goto out;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *ip = ntohl(val);
-
- bzero(&ifr, sizeof(ifr));
- strcpy(ifr.ifr_name, name);
- *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
- rc = -sock_ioctl(so, SIOCGIFNETMASK, &ifr);
-
- if (rc != 0) {
- CERROR("Can't get netmask for interface %s\n", name);
- goto out;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *mask = ntohl(val);
-out:
- sock_close(so);
- return rc;
-}
-
-int
-libcfs_ipif_enumerate (char ***namesp)
-{
- /* Allocate and fill in 'names', returning # interfaces/error */
- char **names;
- int toobig;
- int nalloc;
- int nfound;
- socket_t so;
- struct ifreq *ifr;
- struct ifconf ifc;
- int rc;
- int nob;
- int i;
-
- rc = -sock_socket(PF_INET, SOCK_STREAM, 0,
- NULL, NULL, &so);
- if (rc != 0) {
- CERROR ("Can't create socket: %d\n", rc);
- return (rc);
- }
-
- nalloc = 16; /* first guess at max interfaces */
- toobig = 0;
- for (;;) {
- if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) {
- toobig = 1;
- nalloc = CFS_PAGE_SIZE/sizeof(*ifr);
- CWARN("Too many interfaces: only enumerating first %d\n",
- nalloc);
- }
-
- LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
- if (ifr == NULL) {
- CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
- rc = -ENOMEM;
- goto out0;
- }
-
- ifc.ifc_buf = (char *)ifr;
- ifc.ifc_len = nalloc * sizeof(*ifr);
-
-#if 1
- /*
- * XXX Liang:
- * sock_ioctl(..., SIOCGIFCONF, ...) is not supposed to be used in
- * kernel space because it always try to copy result to userspace.
- * So we can't get interfaces name by sock_ioctl(...,SIOCGIFCONF,...).
- * I've created a bug for Apple, let's wait...
- */
- nfound = 0;
- for (i = 0; i < 16; i++) {
- struct ifreq en;
- bzero(&en, sizeof(en));
- snprintf(en.ifr_name, IFNAMSIZ, "en%d", i);
- rc = -sock_ioctl (so, SIOCGIFFLAGS, &en);
- if (rc != 0)
- continue;
- strcpy(ifr[nfound++].ifr_name, en.ifr_name);
- }
-
-#else /* NOT in using now */
- rc = -sock_ioctl(so, SIOCGIFCONF, (caddr_t)&ifc);
-
- if (rc < 0) {
- CERROR ("Error %d enumerating interfaces\n", rc);
- goto out1;
- }
-
- nfound = ifc.ifc_len/sizeof(*ifr);
- LASSERT (nfound <= nalloc);
-#endif
-
- if (nfound < nalloc || toobig)
- break;
-
- LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
- nalloc *= 2;
- }
- if (nfound == 0)
- goto out1;
-
- LIBCFS_ALLOC(names, nfound * sizeof(*names));
- if (names == NULL) {
- rc = -ENOMEM;
- goto out1;
- }
- /* NULL out all names[i] */
- memset (names, 0, nfound * sizeof(*names));
-
- for (i = 0; i < nfound; i++) {
-
- nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- /* no space for terminating NULL */
- CERROR("interface name %.*s too long (%d max)\n",
- nob, ifr[i].ifr_name, IFNAMSIZ);
- rc = -ENAMETOOLONG;
- goto out2;
- }
-
- LIBCFS_ALLOC(names[i], IFNAMSIZ);
- if (names[i] == NULL) {
- rc = -ENOMEM;
- goto out2;
- }
-
- memcpy(names[i], ifr[i].ifr_name, nob);
- names[i][nob] = 0;
- }
-
- *namesp = names;
- rc = nfound;
-
-out2:
- if (rc < 0)
- libcfs_ipif_free_enumeration(names, nfound);
-out1:
- LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
-out0:
- sock_close(so);
- return rc;
-
-}
-
-/*
- * Public entry of socket upcall.
- *
- * so_upcall can only be installed while create/accept of socket in
- * Darwin 8.0, so we setup libcfs_sock_upcall() as upcall for all
- * sockets in creat/accept, it will call upcall provided by user
- * which can be setup after create/accept of socket.
- */
-static void libcfs_sock_upcall(socket_t so, void* arg, int waitf)
-{
- cfs_socket_t *sock;
-
- sock = (cfs_socket_t *)arg;
- LASSERT(sock->s_magic == CFS_SOCK_MAGIC);
-
- if ((sock->s_flags & CFS_SOCK_UPCALL) != 0 && sock->s_upcall != NULL)
- sock->s_upcall(so, sock->s_upcallarg, waitf);
- return;
-}
-
-void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg)
-{
- sock->s_upcall = callback;
- sock->s_upcallarg = arg;
- sock->s_flags |= CFS_SOCK_UPCALL;
- return;
-}
-
-void libcfs_sock_reset_cb(cfs_socket_t *sock)
-{
- sock->s_flags &= ~CFS_SOCK_UPCALL;
- sock->s_upcall = NULL;
- sock->s_upcallarg = NULL;
- return;
-}
-
-static int
-libcfs_sock_create (cfs_socket_t **sockp, int *fatal,
- __u32 local_ip, int local_port)
-{
- struct sockaddr_in locaddr;
- cfs_socket_t *sock;
- int option;
- int optlen;
- int rc;
-
- /* All errors are fatal except bind failure if the port is in use */
- *fatal = 1;
-
- sock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO);
- if (!sock) {
- CERROR("Can't allocate cfs_socket.\n");
- return -ENOMEM;
- }
- *sockp = sock;
- sock->s_magic = CFS_SOCK_MAGIC;
-
- rc = -sock_socket(PF_INET, SOCK_STREAM, 0,
- libcfs_sock_upcall, sock, &C2B_SOCK(sock));
- if (rc != 0)
- goto out;
- option = 1;
- optlen = sizeof(option);
- rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET,
- SO_REUSEADDR, &option, optlen);
- if (rc != 0)
- goto out;
-
- /* can't specify a local port without a local IP */
- LASSERT (local_ip == 0 || local_port != 0);
-
- if (local_ip != 0 || local_port != 0) {
- bzero (&locaddr, sizeof (locaddr));
- locaddr.sin_len = sizeof(struct sockaddr_in);
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons (local_port);
- locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : INADDR_ANY;
- rc = -sock_bind(C2B_SOCK(sock), (struct sockaddr *)&locaddr);
- if (rc == -EADDRINUSE) {
- CDEBUG(D_NET, "Port %d already in use\n", local_port);
- *fatal = 0;
- goto out;
- }
- if (rc != 0) {
- CERROR("Error trying to bind to port %d: %d\n",
- local_port, rc);
- goto out;
- }
- }
- return 0;
-out:
- if (C2B_SOCK(sock) != NULL)
- sock_close(C2B_SOCK(sock));
- FREE(sock, M_TEMP);
- return rc;
-}
-
-int
-libcfs_sock_listen (cfs_socket_t **sockp,
- __u32 local_ip, int local_port, int backlog)
-{
- cfs_socket_t *sock;
- int fatal;
- int rc;
-
- rc = libcfs_sock_create(&sock, &fatal, local_ip, local_port);
- if (rc != 0) {
- if (!fatal)
- CERROR("Can't create socket: port %d already in use\n",
- local_port);
- return rc;
-
- }
- rc = -sock_listen(C2B_SOCK(sock), backlog);
- if (rc == 0) {
- *sockp = sock;
- return 0;
- }
-
- if (C2B_SOCK(sock) != NULL)
- sock_close(C2B_SOCK(sock));
- FREE(sock, M_TEMP);
- return rc;
-}
-
-int
-libcfs_sock_accept (cfs_socket_t **newsockp, cfs_socket_t *sock)
-{
- cfs_socket_t *newsock;
- int rc;
-
- newsock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO);
- if (!newsock) {
- CERROR("Can't allocate cfs_socket.\n");
- return -ENOMEM;
- }
- newsock->s_magic = CFS_SOCK_MAGIC;
- /*
- * thread will sleep in sock_accept by calling of msleep(),
- * it can be interrupted because msleep() use PCATCH as argument.
- */
- rc = -sock_accept(C2B_SOCK(sock), NULL, 0, 0,
- libcfs_sock_upcall, newsock, &C2B_SOCK(newsock));
- if (rc) {
- if (C2B_SOCK(newsock) != NULL)
- sock_close(C2B_SOCK(newsock));
- FREE(newsock, M_TEMP);
- if ((sock->s_flags & CFS_SOCK_DOWN) != 0)
- /* shutdown by libcfs_sock_abort_accept(), fake
- * error number for lnet_acceptor() */
- rc = -EAGAIN;
- return rc;
- }
- *newsockp = newsock;
- return 0;
-}
-
-void
-libcfs_sock_abort_accept (cfs_socket_t *sock)
-{
- /*
- * XXX Liang:
- *
- * we want to wakeup thread blocked by sock_accept, but we don't
- * know the address where thread is sleeping on, so we cannot
- * wakeup it directly.
- * The thread slept in sock_accept will be waken up while:
- * 1. interrupt by signal
- * 2. new connection is coming (sonewconn)
- * 3. disconnecting of the socket (soisconnected)
- *
- * Cause we can't send signal to a thread directly(no KPI), so the
- * only thing can be done here is disconnect the socket (by
- * sock_shutdown() or sth else? ).
- *
- * Shutdown request of socket with SHUT_WR or SHUT_RDWR will
- * be issured to the protocol.
- * sock_shutdown()->tcp_usr_shutdown()->tcp_usrclosed()->
- * tcp_close()->soisdisconnected(), it will wakeup thread by
- * wakeup((caddr_t)&so->so_timeo);
- */
- sock->s_flags |= CFS_SOCK_DOWN;
- sock_shutdown(C2B_SOCK(sock), SHUT_RDWR);
-}
-
-int
-libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout)
-{
- size_t rcvlen;
- int rc;
- cfs_duration_t to = cfs_time_seconds(timeout);
- cfs_time_t then;
- struct timeval tv;
-
- LASSERT(nob > 0);
-
- for (;;) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0,
- };
- cfs_duration_usec(to, &tv);
- rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO,
- &tv, sizeof(tv));
- if (rc != 0) {
- CERROR("Can't set socket recv timeout "
- "%ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
-
- then = cfs_time_current();
- rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen);
- to -= cfs_time_current() - then;
-
- if (rc != 0 && rc != -EWOULDBLOCK)
- return rc;
- if (rcvlen == nob)
- return 0;
-
- if (to <= 0)
- return -EAGAIN;
-
- buffer = ((char *)buffer) + rcvlen;
- nob -= rcvlen;
- }
- return 0;
-}
-
-int
-libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout)
-{
- size_t sndlen;
- int rc;
- cfs_duration_t to = cfs_time_seconds(timeout);
- cfs_time_t then;
- struct timeval tv;
-
- LASSERT(nob > 0);
-
- for (;;) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0,
- };
-
- if (timeout != 0) {
- cfs_duration_usec(to, &tv);
- rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO,
- &tv, sizeof(tv));
- if (rc != 0) {
- CERROR("Can't set socket send timeout "
- "%ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
- }
-
- then = cfs_time_current();
- rc = -sock_send(C2B_SOCK(sock), &msg,
- ((timeout == 0) ? MSG_DONTWAIT : 0), &sndlen);
- to -= cfs_time_current() - then;
-
- if (rc != 0 && rc != -EWOULDBLOCK)
- return rc;
- if (sndlen == nob)
- return 0;
-
- if (to <= 0)
- return -EAGAIN;
- buffer = ((char *)buffer) + sndlen;
- nob -= sndlen;
- }
- return 0;
-
-}
-
-int
-libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port)
-{
- struct sockaddr_in sin;
- int rc;
-
- if (remote != 0)
- /* Get remote address */
- rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin));
- else
- /* Get local address */
- rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin));
- if (rc != 0) {
- CERROR ("Error %d getting sock %s IP/port\n",
- rc, remote ? "peer" : "local");
- return rc;
- }
-
- if (ip != NULL)
- *ip = ntohl (sin.sin_addr.s_addr);
-
- if (port != NULL)
- *port = ntohs (sin.sin_port);
- return 0;
-}
-
-int
-libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize)
-{
- int option;
- int rc;
-
- if (txbufsize != 0) {
- option = txbufsize;
- rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF,
- (char *)&option, sizeof (option));
- if (rc != 0) {
- CERROR ("Can't set send buffer %d: %d\n",
- option, rc);
- return (rc);
- }
- }
-
- if (rxbufsize != 0) {
- option = rxbufsize;
- rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF,
- (char *)&option, sizeof (option));
- if (rc != 0) {
- CERROR ("Can't set receive buffer %d: %d\n",
- option, rc);
- return (rc);
- }
- }
- return 0;
-}
-
-int
-libcfs_sock_getbuf (cfs_socket_t *sock, int *txbufsize, int *rxbufsize)
-{
- int option;
- int optlen;
- int rc;
-
- if (txbufsize != NULL) {
- optlen = sizeof(option);
- rc = -sock_getsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF,
- (char *)&option, &optlen);
- if (rc != 0) {
- CERROR ("Can't get send buffer size: %d\n", rc);
- return (rc);
- }
- *txbufsize = option;
- }
-
- if (rxbufsize != NULL) {
- optlen = sizeof(option);
- rc = -sock_getsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF,
- (char *)&option, &optlen);
- if (rc != 0) {
- CERROR ("Can't get receive buffer size: %d\n", rc);
- return (rc);
- }
- *rxbufsize = option;
- }
- return 0;
-}
-
-void
-libcfs_sock_release (cfs_socket_t *sock)
-{
- if (C2B_SOCK(sock) != NULL) {
- sock_shutdown(C2B_SOCK(sock), 2);
- sock_close(C2B_SOCK(sock));
- }
- FREE(sock, M_TEMP);
-}
-
-int
-libcfs_sock_connect (cfs_socket_t **sockp, int *fatal,
- __u32 local_ip, int local_port,
- __u32 peer_ip, int peer_port)
-{
- cfs_socket_t *sock;
- struct sockaddr_in srvaddr;
- int rc;
-
- rc = libcfs_sock_create(&sock, fatal, local_ip, local_port);
- if (rc != 0)
- return rc;
-
- bzero(&srvaddr, sizeof(srvaddr));
- srvaddr.sin_len = sizeof(struct sockaddr_in);
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons(peer_port);
- srvaddr.sin_addr.s_addr = htonl(peer_ip);
-
- rc = -sock_connect(C2B_SOCK(sock), (struct sockaddr *)&srvaddr, 0);
- if (rc == 0) {
- *sockp = sock;
- return 0;
- }
-
- *fatal = !(rc == -EADDRNOTAVAIL || rc == -EADDRINUSE);
- CDEBUG(*fatal ? D_NETERROR : D_NET,
- "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
- HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
-
- libcfs_sock_release(sock);
- return rc;
-}
-
-#else /* !__DARWIN8__ */
-
-/*
- * To use bigger buffer for socket:
- * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so
- * we must patch kernel).
- * 2. Increase net.inet.tcp.reass.maxsegments
- * 3. Increase net.inet.tcp.sendspace
- * 4. Increase net.inet.tcp.recvspace
- * 5. Increase kern.ipc.maxsockbuf
- */
-#define KSOCK_MAX_BUF (1152*1024)
-
-int
-libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask)
-{
- struct socket *so;
- struct ifreq ifr;
- int nob;
- int rc;
- __u32 val;
- CFS_DECL_FUNNEL_DATA;
-
- CFS_NET_IN;
- rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
- CFS_NET_EX;
- if (rc != 0) {
- CERROR ("Can't create socket: %d\n", rc);
- return (-rc);
- }
- nob = strnlen(name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- CERROR("Interface name %s too long\n", name);
- rc = -EINVAL;
- goto out;
- }
-
- CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
- strcpy(ifr.ifr_name, name);
- CFS_NET_IN;
- rc = ifioctl(so, SIOCGIFFLAGS, (caddr_t)&ifr, current_proc());
- CFS_NET_EX;
-
- if (rc != 0) {
- CERROR("Can't get flags for interface %s\n", name);
- goto out;
- }
- if ((ifr.ifr_flags & IFF_UP) == 0) {
- CDEBUG(D_NET, "Interface %s down\n", name);
- *up = 0;
- *ip = *mask = 0;
- goto out;
- }
-
- *up = 1;
- strcpy(ifr.ifr_name, name);
- *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
- CFS_NET_IN;
- rc = ifioctl(so, SIOCGIFADDR, (caddr_t)&ifr, current_proc());
- CFS_NET_EX;
-
- if (rc != 0) {
- CERROR("Can't get IP address for interface %s\n", name);
- goto out;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *ip = ntohl(val);
-
- strcpy(ifr.ifr_name, name);
- *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
- CFS_NET_IN;
- rc = ifioctl(so, SIOCGIFNETMASK, (caddr_t)&ifr, current_proc());
- CFS_NET_EX;
-
- if (rc != 0) {
- CERROR("Can't get netmask for interface %s\n", name);
- goto out;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *mask = ntohl(val);
-out:
- CFS_NET_IN;
- soclose(so);
- CFS_NET_EX;
- return -rc;
-}
-
-int
-libcfs_ipif_enumerate (char ***namesp)
-{
- /* Allocate and fill in 'names', returning # interfaces/error */
- char **names;
- int toobig;
- int nalloc;
- int nfound;
- struct socket *so;
- struct ifreq *ifr;
- struct ifconf ifc;
- int rc;
- int nob;
- int i;
- CFS_DECL_FUNNEL_DATA;
-
- CFS_NET_IN;
- rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
- CFS_NET_EX;
- if (rc != 0) {
- CERROR ("Can't create socket: %d\n", rc);
- return (-rc);
- }
-
- nalloc = 16; /* first guess at max interfaces */
- toobig = 0;
- for (;;) {
- if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) {
- toobig = 1;
- nalloc = CFS_PAGE_SIZE/sizeof(*ifr);
- CWARN("Too many interfaces: only enumerating first %d\n",
- nalloc);
- }
-
- LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
- if (ifr == NULL) {
- CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
- rc = -ENOMEM;
- goto out0;
- }
-
- ifc.ifc_buf = (char *)ifr;
- ifc.ifc_len = nalloc * sizeof(*ifr);
-
- CFS_NET_IN;
- rc = -ifioctl(so, SIOCGIFCONF, (caddr_t)&ifc, current_proc());
- CFS_NET_EX;
-
- if (rc < 0) {
- CERROR ("Error %d enumerating interfaces\n", rc);
- goto out1;
- }
-
- nfound = ifc.ifc_len/sizeof(*ifr);
- LASSERT (nfound <= nalloc);
-
- if (nfound < nalloc || toobig)
- break;
-
- LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
- nalloc *= 2;
- }
- if (nfound == 0)
- goto out1;
-
- LIBCFS_ALLOC(names, nfound * sizeof(*names));
- if (names == NULL) {
- rc = -ENOMEM;
- goto out1;
- }
- /* NULL out all names[i] */
- memset (names, 0, nfound * sizeof(*names));
-
- for (i = 0; i < nfound; i++) {
-
- nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- /* no space for terminating NULL */
- CERROR("interface name %.*s too long (%d max)\n",
- nob, ifr[i].ifr_name, IFNAMSIZ);
- rc = -ENAMETOOLONG;
- goto out2;
- }
-
- LIBCFS_ALLOC(names[i], IFNAMSIZ);
- if (names[i] == NULL) {
- rc = -ENOMEM;
- goto out2;
- }
-
- memcpy(names[i], ifr[i].ifr_name, nob);
- names[i][nob] = 0;
- }
-
- *namesp = names;
- rc = nfound;
-
-out2:
- if (rc < 0)
- libcfs_ipif_free_enumeration(names, nfound);
-out1:
- LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
-out0:
- CFS_NET_IN;
- soclose(so);
- CFS_NET_EX;
- return rc;
-}
-
-static int
-libcfs_sock_create (struct socket **sockp, int *fatal,
- __u32 local_ip, int local_port)
-{
- struct sockaddr_in locaddr;
- struct socket *so;
- struct sockopt sopt;
- int option;
- int rc;
- CFS_DECL_FUNNEL_DATA;
-
- *fatal = 1;
- CFS_NET_IN;
- rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
- CFS_NET_EX;
- if (rc != 0) {
- CERROR ("Can't create socket: %d\n", rc);
- return (-rc);
- }
-
- bzero(&sopt, sizeof sopt);
- option = 1;
- sopt.sopt_level = SOL_SOCKET;
- sopt.sopt_name = SO_REUSEADDR;
- sopt.sopt_val = &option;
- sopt.sopt_valsize = sizeof(option);
- CFS_NET_IN;
- rc = sosetopt(so, &sopt);
- if (rc != 0) {
- CFS_NET_EX;
- CERROR ("Can't set sock reuse address: %d\n", rc);
- goto out;
- }
- /* can't specify a local port without a local IP */
- LASSERT (local_ip == 0 || local_port != 0);
-
- if (local_ip != 0 || local_port != 0) {
- bzero (&locaddr, sizeof (locaddr));
- locaddr.sin_len = sizeof(struct sockaddr_in);
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons (local_port);
- locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) :
- INADDR_ANY;
-
- rc = sobind(so, (struct sockaddr *)&locaddr);
- if (rc == EADDRINUSE) {
- CFS_NET_EX;
- CDEBUG(D_NET, "Port %d already in use\n", local_port);
- *fatal = 0;
- goto out;
- }
- if (rc != 0) {
- CFS_NET_EX;
- CERROR ("Can't bind to local IP Address %u.%u.%u.%u: %d\n",
- HIPQUAD(local_ip), rc);
- goto out;
- }
- }
- *sockp = so;
- return 0;
-out:
- CFS_NET_IN;
- soclose(so);
- CFS_NET_EX;
- return -rc;
-}
-
-int
-libcfs_sock_listen (struct socket **sockp,
- __u32 local_ip, int local_port, int backlog)
-{
- int fatal;
- int rc;
- CFS_DECL_FUNNEL_DATA;
-
- rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port);
- if (rc != 0) {
- if (!fatal)
- CERROR("Can't create socket: port %d already in use\n",
- local_port);
- return rc;
- }
- CFS_NET_IN;
- rc = solisten(*sockp, backlog);
- CFS_NET_EX;
- if (rc == 0)
- return 0;
- CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
- CFS_NET_IN;
- soclose(*sockp);
- CFS_NET_EX;
- return -rc;
-}
-
-int
-libcfs_sock_accept (struct socket **newsockp, struct socket *sock)
-{
- struct socket *so;
- struct sockaddr *sa;
- int error, s;
- CFS_DECL_FUNNEL_DATA;
-
- CFS_NET_IN;
- s = splnet();
- if ((sock->so_options & SO_ACCEPTCONN) == 0) {
- splx(s);
- CFS_NET_EX;
- return (-EINVAL);
- }
-
- if ((sock->so_state & SS_NBIO) && sock->so_comp.tqh_first == NULL) {
- splx(s);
- CFS_NET_EX;
- return (-EWOULDBLOCK);
- }
-
- error = 0;
- while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) {
- if (sock->so_state & SS_CANTRCVMORE) {
- sock->so_error = ECONNABORTED;
- break;
- }
- error = tsleep((caddr_t)&sock->so_timeo, PSOCK | PCATCH,
- "accept", 0);
- if (error) {
- splx(s);
- CFS_NET_EX;
- return (-error);
- }
- }
- if (sock->so_error) {
- error = sock->so_error;
- sock->so_error = 0;
- splx(s);
- CFS_NET_EX;
- return (-error);
- }
-
- /*
- * At this point we know that there is at least one connection
- * ready to be accepted. Remove it from the queue prior to
- * allocating the file descriptor for it since falloc() may
- * block allowing another process to accept the connection
- * instead.
- */
- so = TAILQ_FIRST(&sock->so_comp);
- TAILQ_REMOVE(&sock->so_comp, so, so_list);
- sock->so_qlen--;
-
- so->so_state &= ~SS_COMP;
- so->so_head = NULL;
- sa = 0;
- (void) soaccept(so, &sa);
-
- *newsockp = so;
- FREE(sa, M_SONAME);
- splx(s);
- CFS_NET_EX;
- return (-error);
-}
-
-void
-libcfs_sock_abort_accept (struct socket *sock)
-{
- wakeup(&sock->so_timeo);
-}
-
-/*
- * XXX Liang: timeout for write is not supported yet.
- */
-int
-libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- CFS_DECL_NET_DATA;
-
- while (nob > 0) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct uio suio = {
- .uio_iov = &iov,
- .uio_iovcnt = 1,
- .uio_offset = 0,
- .uio_resid = nob,
- .uio_segflg = UIO_SYSSPACE,
- .uio_rw = UIO_WRITE,
- .uio_procp = NULL
- };
-
- CFS_NET_IN;
- rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, 0);
- CFS_NET_EX;
-
- if (rc != 0) {
- if ( suio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\
- rc == EWOULDBLOCK))
- rc = 0;
- if ( rc != 0 )
- return -rc;
- rc = nob - suio.uio_resid;
- buffer = ((char *)buffer) + rc;
- nob = suio.uio_resid;
- continue;
- }
- break;
- }
- return (0);
-}
-
-/*
- * XXX Liang: timeout for read is not supported yet.
- */
-int
-libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- CFS_DECL_NET_DATA;
-
- while (nob > 0) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct uio ruio = {
- .uio_iov = &iov,
- .uio_iovcnt = 1,
- .uio_offset = 0,
- .uio_resid = nob,
- .uio_segflg = UIO_SYSSPACE,
- .uio_rw = UIO_READ,
- .uio_procp = NULL
- };
-
- CFS_NET_IN;
- rc = soreceive(sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, (int *)0);
- CFS_NET_EX;
-
- if (rc != 0) {
- if ( ruio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\
- rc == EWOULDBLOCK))
- rc = 0;
- if (rc != 0)
- return -rc;
- rc = nob - ruio.uio_resid;
- buffer = ((char *)buffer) + rc;
- nob = ruio.uio_resid;
- continue;
- }
- break;
- }
- return (0);
-}
-
-int
-libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize)
-{
- struct sockopt sopt;
- int rc = 0;
- int option;
- CFS_DECL_NET_DATA;
-
- bzero(&sopt, sizeof sopt);
- sopt.sopt_dir = SOPT_SET;
- sopt.sopt_level = SOL_SOCKET;
- sopt.sopt_val = &option;
- sopt.sopt_valsize = sizeof(option);
-
- if (txbufsize != 0) {
- option = txbufsize;
- if (option > KSOCK_MAX_BUF)
- option = KSOCK_MAX_BUF;
-
- sopt.sopt_name = SO_SNDBUF;
- CFS_NET_IN;
- rc = sosetopt(sock, &sopt);
- CFS_NET_EX;
- if (rc != 0) {
- CERROR ("Can't set send buffer %d: %d\n",
- option, rc);
-
- return -rc;
- }
- }
-
- if (rxbufsize != 0) {
- option = rxbufsize;
- sopt.sopt_name = SO_RCVBUF;
- CFS_NET_IN;
- rc = sosetopt(sock, &sopt);
- CFS_NET_EX;
- if (rc != 0) {
- CERROR ("Can't set receive buffer %d: %d\n",
- option, rc);
- return -rc;
- }
- }
- return 0;
-}
-
-int
-libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port)
-{
- struct sockaddr_in *sin;
- struct sockaddr *sa = NULL;
- int rc;
- CFS_DECL_NET_DATA;
-
- if (remote != 0) {
- CFS_NET_IN;
- rc = sock->so_proto->pr_usrreqs->pru_peeraddr(sock, &sa);
- CFS_NET_EX;
-
- if (rc != 0) {
- if (sa) FREE(sa, M_SONAME);
- CERROR ("Error %d getting sock peer IP\n", rc);
- return -rc;
- }
- } else {
- CFS_NET_IN;
- rc = sock->so_proto->pr_usrreqs->pru_sockaddr(sock, &sa);
- CFS_NET_EX;
- if (rc != 0) {
- if (sa) FREE(sa, M_SONAME);
- CERROR ("Error %d getting sock local IP\n", rc);
- return -rc;
- }
- }
- if (sa != NULL) {
- sin = (struct sockaddr_in *)sa;
- if (ip != NULL)
- *ip = ntohl (sin->sin_addr.s_addr);
- if (port != NULL)
- *port = ntohs (sin->sin_port);
- if (sa)
- FREE(sa, M_SONAME);
- }
- return 0;
-}
-
-int
-libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize)
-{
- struct sockopt sopt;
- int rc;
- CFS_DECL_NET_DATA;
-
- bzero(&sopt, sizeof sopt);
- sopt.sopt_dir = SOPT_GET;
- sopt.sopt_level = SOL_SOCKET;
-
- if (txbufsize != NULL) {
- sopt.sopt_val = txbufsize;
- sopt.sopt_valsize = sizeof(*txbufsize);
- sopt.sopt_name = SO_SNDBUF;
- CFS_NET_IN;
- rc = sogetopt(sock, &sopt);
- CFS_NET_EX;
- if (rc != 0) {
- CERROR ("Can't get send buffer size: %d\n", rc);
- return -rc;
- }
- }
-
- if (rxbufsize != NULL) {
- sopt.sopt_val = rxbufsize;
- sopt.sopt_valsize = sizeof(*rxbufsize);
- sopt.sopt_name = SO_RCVBUF;
- CFS_NET_IN;
- rc = sogetopt(sock, &sopt);
- CFS_NET_EX;
- if (rc != 0) {
- CERROR ("Can't get receive buffer size: %d\n", rc);
- return -rc;
- }
- }
- return 0;
-}
-
-int
-libcfs_sock_connect (struct socket **sockp, int *fatal,
- __u32 local_ip, int local_port,
- __u32 peer_ip, int peer_port)
-{
- struct sockaddr_in srvaddr;
- struct socket *so;
- int s;
- int rc;
- CFS_DECL_FUNNEL_DATA;
-
- rc = libcfs_sock_create(sockp, fatal, local_ip, local_port);
- if (rc != 0)
- return rc;
- so = *sockp;
- bzero(&srvaddr, sizeof(srvaddr));
- srvaddr.sin_len = sizeof(struct sockaddr_in);
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons (peer_port);
- srvaddr.sin_addr.s_addr = htonl (peer_ip);
-
- CFS_NET_IN;
- rc = soconnect(so, (struct sockaddr *)&srvaddr);
- if (rc != 0) {
- CFS_NET_EX;
- if (rc != EADDRNOTAVAIL && rc != EADDRINUSE)
- CDEBUG(D_NETERROR,
- "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
- HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
- s = splnet();
- while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
- CDEBUG(D_NET, "ksocknal sleep for waiting auto_connect.\n");
- (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "ksocknal_conn", hz);
- }
- if ((rc = so->so_error) != 0) {
- so->so_error = 0;
- splx(s);
- CFS_NET_EX;
- CDEBUG(D_NETERROR,
- "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
- HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
- LASSERT(so->so_state & SS_ISCONNECTED);
- splx(s);
- CFS_NET_EX;
- if (sockp)
- *sockp = so;
- return (0);
-out:
- CFS_NET_IN;
- soshutdown(so, 2);
- soclose(so);
- CFS_NET_EX;
- return (-rc);
-}
-
-void
-libcfs_sock_release (struct socket *sock)
-{
- CFS_DECL_FUNNEL_DATA;
- CFS_NET_IN;
- soshutdown(sock, 0);
- CFS_NET_EX;
-}
-
-#endif
+++ /dev/null
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-#include "tracefile.h"
-
-/*
- * We can't support smp tracefile currently.
- * Everything is put on one cpu.
- */
-
-#define M_TCD_MAX_PAGES (128 * 1280)
-
-static long max_permit_mb = (64 * 1024);
-
-spinlock_t trace_cpu_serializer;
-
-/*
- * thread currently executing tracefile code or NULL if none does. Used to
- * detect recursive calls to libcfs_debug_msg().
- */
-static thread_t trace_owner = NULL;
-
-extern int get_preemption_level(void);
-extern atomic_t tage_allocated;
-
-struct rw_semaphore tracefile_sem;
-
-int tracefile_init_arch() {
- init_rwsem(&tracefile_sem);
-#error "Todo: initialise per-cpu console buffers"
- return 0;
-}
-
-void tracefile_fini_arch() {
-}
-
-void tracefile_read_lock() {
- down_read(&tracefile_sem);
-}
-
-void tracefile_read_unlock() {
- up_read(&tracefile_sem);
-}
-
-void tracefile_write_lock() {
- down_write(&tracefile_sem);
-}
-
-void tracefile_write_unlock() {
- up_write(&tracefile_sem);
-}
-
-char *trace_get_console_buffer(void)
-{
-#error "todo: return a per-cpu/interrupt console buffer and disable pre-emption"
-}
-
-void trace_put_console_buffer(char *buffer)
-{
-#error "todo: re-enable pre-emption"
-}
-
-struct trace_cpu_data *trace_get_tcd(void)
-{
- struct trace_cpu_data *tcd;
- int nr_pages;
- struct list_head pages;
-
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- /*
- * debugging check for recursive call to libcfs_debug_msg()
- */
- if (trace_owner == current_thread()) {
- /*
- * Cannot assert here.
- */
- printk(KERN_EMERG "recursive call to %s", __FUNCTION__);
- /*
- * "The death of God left the angels in a strange position."
- */
- cfs_enter_debugger();
- }
- tcd = &trace_data[0].tcd;
- CFS_INIT_LIST_HEAD(&pages);
- if (get_preemption_level() == 0)
- nr_pages = trace_refill_stock(tcd, CFS_ALLOC_STD, &pages);
- else
- nr_pages = 0;
- spin_lock(&trace_cpu_serializer);
- trace_owner = current_thread();
- tcd->tcd_cur_stock_pages += nr_pages;
- list_splice(&pages, &tcd->tcd_stock_pages);
- return tcd;
-}
-
-extern void raw_page_death_row_clean(void);
-
-void __trace_put_tcd(struct trace_cpu_data *tcd)
-{
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
- LASSERT(trace_owner == current_thread());
- trace_owner = NULL;
- spin_unlock(&trace_cpu_serializer);
- if (get_preemption_level() == 0)
- /* purge all pending pages */
- raw_page_death_row_clean();
-}
-
-int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
-{
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
- /* XNU has global tcd, and all pages are owned by it */
- return 1;
-}
-
-void
-set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
- const int line, unsigned long stack)
-{
- struct timeval tv;
-
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
- do_gettimeofday(&tv);
- header->ph_subsys = subsys;
- header->ph_mask = mask;
- header->ph_cpu_id = smp_processor_id();
- header->ph_sec = (__u32)tv.tv_sec;
- header->ph_usec = tv.tv_usec;
- header->ph_stack = stack;
- header->ph_pid = cfs_curproc_pid();
- header->ph_line_num = line;
- header->ph_extern_pid = (__u32)current_thread();
-}
-
-void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf,
- int len, const char *file, const char *fn)
-{
- char *prefix = "Lustre", *ptype = KERN_INFO;
-
- /*
- * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
- if ((mask & D_EMERG) != 0) {
- prefix = "LustreError";
- ptype = KERN_EMERG;
- } else if ((mask & D_ERROR) != 0) {
- prefix = "LustreError";
- ptype = KERN_ERR;
- } else if ((mask & D_WARNING) != 0) {
- prefix = "Lustre";
- ptype = KERN_WARNING;
- } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) {
- prefix = "Lustre";
- ptype = KERN_INFO;
- }
-
- if ((mask & D_CONSOLE) != 0) {
- printk("%s%s: %.*s", ptype, prefix, len, buf);
- } else {
- printk("%s%s: %d:%d:(%s:%d:%s()) %*s",
- ptype, prefix, hdr->ph_pid, hdr->ph_extern_pid,
- file, hdr->ph_line_num, fn, len, buf);
- }
-}
-
-int trace_max_debug_mb(void)
-{
- return max_permit_mb;
-}
-
-void
-trace_call_on_all_cpus(void (*fn)(void *arg), void *arg)
-{
-#error "tbd"
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Darwin porting library
- * Make things easy to port
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <mach/mach_types.h>
-#include <string.h>
-#include <sys/errno.h>
-#include <sys/types.h>
-#include <sys/fcntl.h>
-#include <lnet/types.h>
-
-#include <libcfs/kp30.h>
-
-#ifndef isspace
-inline int
-isspace(char c)
-{
- return (c == ' ' || c == '\t' || c == '\n' || c == '\12');
-}
-#endif
-
-char * strpbrk(const char * cs,const char * ct)
-{
- const char *sc1,*sc2;
-
- for( sc1 = cs; *sc1 != '\0'; ++sc1) {
- for( sc2 = ct; *sc2 != '\0'; ++sc2) {
- if (*sc1 == *sc2)
- return (char *) sc1;
- }
- }
- return NULL;
-}
-
-char * strsep(char **s, const char *ct)
-{
- char *sbegin = *s, *end;
-
- if (sbegin == NULL)
- return NULL;
- end = strpbrk(sbegin, ct);
- if (end != NULL)
- *end++ = '\0';
- *s = end;
-
- return sbegin;
-}
-
-size_t strnlen(const char * s, size_t count)
-{
- const char *sc;
-
- for (sc = s; count-- && *sc != '\0'; ++sc)
- /* nothing */;
- return sc - s;
-}
-
-char *
-strstr(const char *in, const char *str)
-{
- char c;
- size_t len;
-
- c = *str++;
- if (!c)
- return (char *) in; // Trivial empty string case
- len = strlen(str);
- do {
- char sc;
- do {
- sc = *in++;
- if (!sc)
- return (char *) 0;
- } while (sc != c);
- } while (strncmp(in, str, len) != 0);
- return (char *) (in - 1);
-}
-
-char *
-strrchr(const char *p, int ch)
-{
- const char *end = p + strlen(p);
- do {
- if (*end == (char)ch)
- return (char *)end;
- } while (--end >= p);
- return NULL;
-}
-
-char *
-ul2dstr(unsigned long address, char *buf, int len)
-{
- char *pos = buf + len - 1;
-
- if (len <= 0 || !buf)
- return NULL;
- *pos = 0;
- while (address) {
- if (!--len) break;
- *--pos = address % 10 + '0';
- address /= 10;
- }
- return pos;
-}
-
-/*
- * miscellaneous libcfs stuff
- */
-
-/*
- * Convert server error code to client format.
- * Linux errno.h.
- */
-
-/* obtained by
- *
- * cc /usr/include/asm/errno.h -E -dM | grep '#define E' | sort -n -k3,3
- *
- */
-enum linux_errnos {
- LINUX_EPERM = 1,
- LINUX_ENOENT = 2,
- LINUX_ESRCH = 3,
- LINUX_EINTR = 4,
- LINUX_EIO = 5,
- LINUX_ENXIO = 6,
- LINUX_E2BIG = 7,
- LINUX_ENOEXEC = 8,
- LINUX_EBADF = 9,
- LINUX_ECHILD = 10,
- LINUX_EAGAIN = 11,
- LINUX_ENOMEM = 12,
- LINUX_EACCES = 13,
- LINUX_EFAULT = 14,
- LINUX_ENOTBLK = 15,
- LINUX_EBUSY = 16,
- LINUX_EEXIST = 17,
- LINUX_EXDEV = 18,
- LINUX_ENODEV = 19,
- LINUX_ENOTDIR = 20,
- LINUX_EISDIR = 21,
- LINUX_EINVAL = 22,
- LINUX_ENFILE = 23,
- LINUX_EMFILE = 24,
- LINUX_ENOTTY = 25,
- LINUX_ETXTBSY = 26,
- LINUX_EFBIG = 27,
- LINUX_ENOSPC = 28,
- LINUX_ESPIPE = 29,
- LINUX_EROFS = 30,
- LINUX_EMLINK = 31,
- LINUX_EPIPE = 32,
- LINUX_EDOM = 33,
- LINUX_ERANGE = 34,
- LINUX_EDEADLK = 35,
- LINUX_ENAMETOOLONG = 36,
- LINUX_ENOLCK = 37,
- LINUX_ENOSYS = 38,
- LINUX_ENOTEMPTY = 39,
- LINUX_ELOOP = 40,
- LINUX_ENOMSG = 42,
- LINUX_EIDRM = 43,
- LINUX_ECHRNG = 44,
- LINUX_EL2NSYNC = 45,
- LINUX_EL3HLT = 46,
- LINUX_EL3RST = 47,
- LINUX_ELNRNG = 48,
- LINUX_EUNATCH = 49,
- LINUX_ENOCSI = 50,
- LINUX_EL2HLT = 51,
- LINUX_EBADE = 52,
- LINUX_EBADR = 53,
- LINUX_EXFULL = 54,
- LINUX_ENOANO = 55,
- LINUX_EBADRQC = 56,
- LINUX_EBADSLT = 57,
- LINUX_EBFONT = 59,
- LINUX_ENOSTR = 60,
- LINUX_ENODATA = 61,
- LINUX_ETIME = 62,
- LINUX_ENOSR = 63,
- LINUX_ENONET = 64,
- LINUX_ENOPKG = 65,
- LINUX_EREMOTE = 66,
- LINUX_ENOLINK = 67,
- LINUX_EADV = 68,
- LINUX_ESRMNT = 69,
- LINUX_ECOMM = 70,
- LINUX_EPROTO = 71,
- LINUX_EMULTIHOP = 72,
- LINUX_EDOTDOT = 73,
- LINUX_EBADMSG = 74,
- LINUX_EOVERFLOW = 75,
- LINUX_ENOTUNIQ = 76,
- LINUX_EBADFD = 77,
- LINUX_EREMCHG = 78,
- LINUX_ELIBACC = 79,
- LINUX_ELIBBAD = 80,
- LINUX_ELIBSCN = 81,
- LINUX_ELIBMAX = 82,
- LINUX_ELIBEXEC = 83,
- LINUX_EILSEQ = 84,
- LINUX_ERESTART = 85,
- LINUX_ESTRPIPE = 86,
- LINUX_EUSERS = 87,
- LINUX_ENOTSOCK = 88,
- LINUX_EDESTADDRREQ = 89,
- LINUX_EMSGSIZE = 90,
- LINUX_EPROTOTYPE = 91,
- LINUX_ENOPROTOOPT = 92,
- LINUX_EPROTONOSUPPORT = 93,
- LINUX_ESOCKTNOSUPPORT = 94,
- LINUX_EOPNOTSUPP = 95,
- LINUX_EPFNOSUPPORT = 96,
- LINUX_EAFNOSUPPORT = 97,
- LINUX_EADDRINUSE = 98,
- LINUX_EADDRNOTAVAIL = 99,
- LINUX_ENETDOWN = 100,
- LINUX_ENETUNREACH = 101,
- LINUX_ENETRESET = 102,
- LINUX_ECONNABORTED = 103,
- LINUX_ECONNRESET = 104,
- LINUX_ENOBUFS = 105,
- LINUX_EISCONN = 106,
- LINUX_ENOTCONN = 107,
- LINUX_ESHUTDOWN = 108,
- LINUX_ETOOMANYREFS = 109,
- LINUX_ETIMEDOUT = 110,
- LINUX_ECONNREFUSED = 111,
- LINUX_EHOSTDOWN = 112,
- LINUX_EHOSTUNREACH = 113,
- LINUX_EALREADY = 114,
- LINUX_EINPROGRESS = 115,
- LINUX_ESTALE = 116,
- LINUX_EUCLEAN = 117,
- LINUX_ENOTNAM = 118,
- LINUX_ENAVAIL = 119,
- LINUX_EISNAM = 120,
- LINUX_EREMOTEIO = 121,
- LINUX_EDQUOT = 122,
- LINUX_ENOMEDIUM = 123,
- LINUX_EMEDIUMTYPE = 124,
-
- /*
- * we don't need these, but for completeness..
- */
- LINUX_EDEADLOCK = LINUX_EDEADLK,
- LINUX_EWOULDBLOCK = LINUX_EAGAIN
-};
-
-int convert_server_error(__u64 ecode)
-{
- int sign;
- int code;
-
- static int errno_xlate[] = {
- /* success is always success */
- [0] = 0,
- [LINUX_EPERM] = EPERM,
- [LINUX_ENOENT] = ENOENT,
- [LINUX_ESRCH] = ESRCH,
- [LINUX_EINTR] = EINTR,
- [LINUX_EIO] = EIO,
- [LINUX_ENXIO] = ENXIO,
- [LINUX_E2BIG] = E2BIG,
- [LINUX_ENOEXEC] = ENOEXEC,
- [LINUX_EBADF] = EBADF,
- [LINUX_ECHILD] = ECHILD,
- [LINUX_EAGAIN] = EAGAIN,
- [LINUX_ENOMEM] = ENOMEM,
- [LINUX_EACCES] = EACCES,
- [LINUX_EFAULT] = EFAULT,
- [LINUX_ENOTBLK] = ENOTBLK,
- [LINUX_EBUSY] = EBUSY,
- [LINUX_EEXIST] = EEXIST,
- [LINUX_EXDEV] = EXDEV,
- [LINUX_ENODEV] = ENODEV,
- [LINUX_ENOTDIR] = ENOTDIR,
- [LINUX_EISDIR] = EISDIR,
- [LINUX_EINVAL] = EINVAL,
- [LINUX_ENFILE] = ENFILE,
- [LINUX_EMFILE] = EMFILE,
- [LINUX_ENOTTY] = ENOTTY,
- [LINUX_ETXTBSY] = ETXTBSY,
- [LINUX_EFBIG] = EFBIG,
- [LINUX_ENOSPC] = ENOSPC,
- [LINUX_ESPIPE] = ESPIPE,
- [LINUX_EROFS] = EROFS,
- [LINUX_EMLINK] = EMLINK,
- [LINUX_EPIPE] = EPIPE,
- [LINUX_EDOM] = EDOM,
- [LINUX_ERANGE] = ERANGE,
- [LINUX_EDEADLK] = EDEADLK,
- [LINUX_ENAMETOOLONG] = ENAMETOOLONG,
- [LINUX_ENOLCK] = ENOLCK,
- [LINUX_ENOSYS] = ENOSYS,
- [LINUX_ENOTEMPTY] = ENOTEMPTY,
- [LINUX_ELOOP] = ELOOP,
- [LINUX_ENOMSG] = ENOMSG,
- [LINUX_EIDRM] = EIDRM,
- [LINUX_ECHRNG] = EINVAL /* ECHRNG */,
- [LINUX_EL2NSYNC] = EINVAL /* EL2NSYNC */,
- [LINUX_EL3HLT] = EINVAL /* EL3HLT */,
- [LINUX_EL3RST] = EINVAL /* EL3RST */,
- [LINUX_ELNRNG] = EINVAL /* ELNRNG */,
- [LINUX_EUNATCH] = EINVAL /* EUNATCH */,
- [LINUX_ENOCSI] = EINVAL /* ENOCSI */,
- [LINUX_EL2HLT] = EINVAL /* EL2HLT */,
- [LINUX_EBADE] = EINVAL /* EBADE */,
- [LINUX_EBADR] = EBADRPC,
- [LINUX_EXFULL] = EINVAL /* EXFULL */,
- [LINUX_ENOANO] = EINVAL /* ENOANO */,
- [LINUX_EBADRQC] = EINVAL /* EBADRQC */,
- [LINUX_EBADSLT] = EINVAL /* EBADSLT */,
- [LINUX_EBFONT] = EINVAL /* EBFONT */,
- [LINUX_ENOSTR] = EINVAL /* ENOSTR */,
- [LINUX_ENODATA] = EINVAL /* ENODATA */,
- [LINUX_ETIME] = EINVAL /* ETIME */,
- [LINUX_ENOSR] = EINVAL /* ENOSR */,
- [LINUX_ENONET] = EINVAL /* ENONET */,
- [LINUX_ENOPKG] = EINVAL /* ENOPKG */,
- [LINUX_EREMOTE] = EREMOTE,
- [LINUX_ENOLINK] = EINVAL /* ENOLINK */,
- [LINUX_EADV] = EINVAL /* EADV */,
- [LINUX_ESRMNT] = EINVAL /* ESRMNT */,
- [LINUX_ECOMM] = EINVAL /* ECOMM */,
- [LINUX_EPROTO] = EPROTOTYPE,
- [LINUX_EMULTIHOP] = EINVAL /* EMULTIHOP */,
- [LINUX_EDOTDOT] = EINVAL /* EDOTDOT */,
- [LINUX_EBADMSG] = EINVAL /* EBADMSG */,
- [LINUX_EOVERFLOW] = EOVERFLOW,
- [LINUX_ENOTUNIQ] = EINVAL /* ENOTUNIQ */,
- [LINUX_EBADFD] = EINVAL /* EBADFD */,
- [LINUX_EREMCHG] = EINVAL /* EREMCHG */,
- [LINUX_ELIBACC] = EINVAL /* ELIBACC */,
- [LINUX_ELIBBAD] = EINVAL /* ELIBBAD */,
- [LINUX_ELIBSCN] = EINVAL /* ELIBSCN */,
- [LINUX_ELIBMAX] = EINVAL /* ELIBMAX */,
- [LINUX_ELIBEXEC] = EINVAL /* ELIBEXEC */,
- [LINUX_EILSEQ] = EILSEQ,
- [LINUX_ERESTART] = EINVAL /* because ERESTART is
- * negative in XNU */,
- [LINUX_ESTRPIPE] = EINVAL /* ESTRPIPE */,
- [LINUX_EUSERS] = EUSERS,
- [LINUX_ENOTSOCK] = ENOTSOCK,
- [LINUX_EDESTADDRREQ] = EDESTADDRREQ,
- [LINUX_EMSGSIZE] = EMSGSIZE,
- [LINUX_EPROTOTYPE] = EPROTOTYPE,
- [LINUX_ENOPROTOOPT] = ENOPROTOOPT,
- [LINUX_EPROTONOSUPPORT] = EPROTONOSUPPORT,
- [LINUX_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT,
- [LINUX_EOPNOTSUPP] = EOPNOTSUPP,
- [LINUX_EPFNOSUPPORT] = EPFNOSUPPORT,
- [LINUX_EAFNOSUPPORT] = EAFNOSUPPORT,
- [LINUX_EADDRINUSE] = EADDRINUSE,
- [LINUX_EADDRNOTAVAIL] = EADDRNOTAVAIL,
- [LINUX_ENETDOWN] = ENETDOWN,
- [LINUX_ENETUNREACH] = ENETUNREACH,
- [LINUX_ENETRESET] = ENETRESET,
- [LINUX_ECONNABORTED] = ECONNABORTED,
- [LINUX_ECONNRESET] = ECONNRESET,
- [LINUX_ENOBUFS] = ENOBUFS,
- [LINUX_EISCONN] = EISCONN,
- [LINUX_ENOTCONN] = ENOTCONN,
- [LINUX_ESHUTDOWN] = ESHUTDOWN,
- [LINUX_ETOOMANYREFS] = ETOOMANYREFS,
- [LINUX_ETIMEDOUT] = ETIMEDOUT,
- [LINUX_ECONNREFUSED] = ECONNREFUSED,
- [LINUX_EHOSTDOWN] = EHOSTDOWN,
- [LINUX_EHOSTUNREACH] = EHOSTUNREACH,
- [LINUX_EALREADY] = EALREADY,
- [LINUX_EINPROGRESS] = EINPROGRESS,
- [LINUX_ESTALE] = ESTALE,
- [LINUX_EUCLEAN] = EINVAL /* EUCLEAN */,
- [LINUX_ENOTNAM] = EINVAL /* ENOTNAM */,
- [LINUX_ENAVAIL] = EINVAL /* ENAVAIL */,
- [LINUX_EISNAM] = EINVAL /* EISNAM */,
- [LINUX_EREMOTEIO] = EINVAL /* EREMOTEIO */,
- [LINUX_EDQUOT] = EDQUOT,
- [LINUX_ENOMEDIUM] = EINVAL /* ENOMEDIUM */,
- [LINUX_EMEDIUMTYPE] = EINVAL /* EMEDIUMTYPE */,
- };
- code = (int)ecode;
- if (code >= 0) {
- sign = +1;
- } else {
- sign = -1;
- code = -code;
- }
- if (code < (sizeof errno_xlate) / (sizeof errno_xlate[0])) {
- code = errno_xlate[code];
- LASSERT(code >= 0);
- }
- return sign * code;
-}
-
-enum {
- LINUX_O_RDONLY = 00,
- LINUX_O_WRONLY = 01,
- LINUX_O_RDWR = 02,
- LINUX_O_CREAT = 0100,
- LINUX_O_EXCL = 0200,
- LINUX_O_NOCTTY = 0400,
- LINUX_O_TRUNC = 01000,
- LINUX_O_APPEND = 02000,
- LINUX_O_NONBLOCK = 04000,
- LINUX_O_NDELAY = LINUX_O_NONBLOCK,
- LINUX_O_SYNC = 010000,
- LINUX_O_FSYNC = LINUX_O_SYNC,
- LINUX_O_ASYNC = 020000,
- LINUX_O_DIRECT = 040000,
- LINUX_O_NOFOLLOW = 0400000
-};
-
-static inline void obit_convert(int *cflag, int *sflag,
- unsigned cmask, unsigned smask)
-{
- if (*cflag & cmask != 0) {
- *sflag |= smask;
- *cflag &= ~cmask;
- }
-}
-
-/*
- * convert <fcntl.h> flag from XNU client to Linux _i386_ server.
- */
-int convert_client_oflag(int cflag, int *result)
-{
- int sflag = 0;
-
- cflag = 0;
- obit_convert(&cflag, &sflag, O_RDONLY, LINUX_O_RDONLY);
- obit_convert(&cflag, &sflag, O_WRONLY, LINUX_O_WRONLY);
- obit_convert(&cflag, &sflag, O_RDWR, LINUX_O_RDWR);
- obit_convert(&cflag, &sflag, O_NONBLOCK, LINUX_O_NONBLOCK);
- obit_convert(&cflag, &sflag, O_APPEND, LINUX_O_APPEND);
- obit_convert(&cflag, &sflag, O_ASYNC, LINUX_O_ASYNC);
- obit_convert(&cflag, &sflag, O_FSYNC, LINUX_O_FSYNC);
- obit_convert(&cflag, &sflag, O_NOFOLLOW, LINUX_O_NOFOLLOW);
- obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT);
- obit_convert(&cflag, &sflag, O_TRUNC, LINUX_O_TRUNC);
- obit_convert(&cflag, &sflag, O_EXCL, LINUX_O_EXCL);
- obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT);
- obit_convert(&cflag, &sflag, O_NDELAY, LINUX_O_NDELAY);
- obit_convert(&cflag, &sflag, O_NOCTTY, LINUX_O_NOCTTY);
- /*
- * Some more obscure BSD flags have no Linux counterparts:
- *
- * O_SHLOCK 0x0010
- * O_EXLOCK 0x0020
- * O_EVTONLY 0x8000
- * O_POPUP 0x80000000
- * O_ALERT 0x20000000
- */
- if (cflag == 0) {
- *result = sflag;
- return 0;
- } else
- return -EINVAL;
-}
-
-#ifdef __DARWIN8__
-#else /* !__DARWIN8__ */
-extern int unix_syscall();
-extern int unix_syscall_return();
-
-extern int ktrsysret();
-extern int ktrace();
-
-extern int ast_taken();
-extern int ast_check();
-
-extern int trap();
-extern int syscall_trace();
-
-static int is_addr_in_range(void *addr, void *start, void *end)
-{
- return start <= addr && addr <= end;
-}
-
-extern void cfs_thread_agent (void);
-
-static int is_last_frame(void *addr)
-{
- if (addr == NULL)
- return 1;
- else if (is_addr_in_range(addr, unix_syscall, unix_syscall_return))
- return 1;
- else if (is_addr_in_range(addr, ktrsysret, ktrace))
- return 1;
- else if (is_addr_in_range(addr, ast_taken, ast_check))
- return 1;
- else if (is_addr_in_range(addr, trap, syscall_trace))
- return 1;
- else if (is_addr_in_range(addr, cfs_thread_agent, cfs_kernel_thread))
- return 1;
- else
- return 0;
-}
-
-static void *get_frame(int i)
-{
- void *result;
-
-#define CASE(i) case (i): result = __builtin_return_address(i); break
- switch (i + 1) {
- CASE(1);
- CASE(2);
- CASE(3);
- CASE(4);
- CASE(5);
- CASE(6);
- CASE(7);
- CASE(8);
- CASE(9);
- CASE(10);
- CASE(11);
- CASE(12);
- CASE(13);
- CASE(14);
- CASE(15);
- CASE(16);
- CASE(17);
- CASE(18);
- CASE(19);
- CASE(20);
- default:
- panic("impossible frame number: %d\n", i);
- result = NULL;
- }
- return result;
-}
-
-void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
-{
- int i;
-
- memset(trace, 0, sizeof *trace);
- for (i = 0; i < sizeof_array(trace->frame); ++ i) {
- void *addr;
-
- addr = get_frame(i);
- trace->frame[i] = addr;
- if (is_last_frame(addr))
- break;
- }
-}
-
-void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
-{
- if (0 <= frame_no && frame_no < sizeof_array(trace->frame))
- return trace->frame[frame_no];
- else
- return NULL;
-}
-#endif /* !__DARWIN8__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <stdarg.h>
-#include <libcfs/kp30.h>
-#include <libcfs/libcfs.h>
-#include "tracefile.h"
-
-static char debug_file_name[1024];
-
-#ifdef __KERNEL__
-unsigned int libcfs_subsystem_debug = ~0;
-CFS_MODULE_PARM(libcfs_subsystem_debug, "i", int, 0644,
- "Lustre kernel debug subsystem mask");
-EXPORT_SYMBOL(libcfs_subsystem_debug);
-
-unsigned int libcfs_debug = (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE |
- D_NETERROR | D_HA | D_CONFIG | D_IOCTL);
-CFS_MODULE_PARM(libcfs_debug, "i", int, 0644,
- "Lustre kernel debug mask");
-EXPORT_SYMBOL(libcfs_debug);
-
-int libcfs_debug_mb = -1;
-CFS_MODULE_PARM(libcfs_debug_mb, "i", int, 0644,
- "Total debug buffer size.");
-EXPORT_SYMBOL(libcfs_debug_mb);
-
-unsigned int libcfs_printk = D_CANTMASK;
-CFS_MODULE_PARM(libcfs_printk, "i", uint, 0644,
- "Lustre kernel debug console mask");
-EXPORT_SYMBOL(libcfs_printk);
-
-unsigned int libcfs_console_ratelimit = 1;
-CFS_MODULE_PARM(libcfs_console_ratelimit, "i", uint, 0644,
- "Lustre kernel debug console ratelimit (0 to disable)");
-EXPORT_SYMBOL(libcfs_console_ratelimit);
-
-cfs_duration_t libcfs_console_max_delay;
-CFS_MODULE_PARM(libcfs_console_max_delay, "l", ulong, 0644,
- "Lustre kernel debug console max delay (jiffies)");
-EXPORT_SYMBOL(libcfs_console_max_delay);
-
-cfs_duration_t libcfs_console_min_delay;
-CFS_MODULE_PARM(libcfs_console_min_delay, "l", ulong, 0644,
- "Lustre kernel debug console min delay (jiffies)");
-EXPORT_SYMBOL(libcfs_console_min_delay);
-
-unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
-CFS_MODULE_PARM(libcfs_console_backoff, "i", uint, 0644,
- "Lustre kernel debug console backoff factor");
-EXPORT_SYMBOL(libcfs_console_backoff);
-
-unsigned int libcfs_debug_binary = 1;
-EXPORT_SYMBOL(libcfs_debug_binary);
-
-unsigned int libcfs_stack;
-EXPORT_SYMBOL(libcfs_stack);
-
-unsigned int portal_enter_debugger;
-EXPORT_SYMBOL(portal_enter_debugger);
-
-unsigned int libcfs_catastrophe;
-EXPORT_SYMBOL(libcfs_catastrophe);
-
-unsigned int libcfs_panic_on_lbug = 0;
-CFS_MODULE_PARM(libcfs_panic_on_lbug, "i", uint, 0644,
- "Lustre kernel panic on LBUG");
-EXPORT_SYMBOL(libcfs_panic_on_lbug);
-
-atomic_t libcfs_kmemory = ATOMIC_INIT(0);
-EXPORT_SYMBOL(libcfs_kmemory);
-
-static cfs_waitq_t debug_ctlwq;
-
-#ifdef __arch_um__
-char debug_file_path[1024] = "/r/tmp/lustre-log";
-#else
-char debug_file_path[1024] = "/tmp/lustre-log";
-#endif
-CFS_MODULE_PARM(debug_file_path, "s", charp, 0644,
- "Path for dumping debug logs, "
- "set 'NONE' to prevent log dumping");
-
-int libcfs_panic_in_progress;
-
-/* libcfs_debug_token2mask() expects the returned
- * string in lower-case */
-const char *
-libcfs_debug_subsys2str(int subsys)
-{
- switch (subsys) {
- default:
- return NULL;
- case S_UNDEFINED:
- return "undefined";
- case S_MDC:
- return "mdc";
- case S_MDS:
- return "mds";
- case S_OSC:
- return "osc";
- case S_OST:
- return "ost";
- case S_CLASS:
- return "class";
- case S_LOG:
- return "log";
- case S_LLITE:
- return "llite";
- case S_RPC:
- return "rpc";
- case S_LNET:
- return "lnet";
- case S_LND:
- return "lnd";
- case S_PINGER:
- return "pinger";
- case S_FILTER:
- return "filter";
- case S_ECHO:
- return "echo";
- case S_LDLM:
- return "ldlm";
- case S_LOV:
- return "lov";
- case S_LMV:
- return "lmv";
- case S_SEC:
- return "sec";
- case S_GSS:
- return "gss";
- case S_MGC:
- return "mgc";
- case S_MGS:
- return "mgs";
- case S_FID:
- return "fid";
- case S_FLD:
- return "fld";
- }
-}
-
-/* libcfs_debug_token2mask() expects the returned
- * string in lower-case */
-const char *
-libcfs_debug_dbg2str(int debug)
-{
- switch (debug) {
- default:
- return NULL;
- case D_TRACE:
- return "trace";
- case D_INODE:
- return "inode";
- case D_SUPER:
- return "super";
- case D_EXT2:
- return "ext2";
- case D_MALLOC:
- return "malloc";
- case D_CACHE:
- return "cache";
- case D_INFO:
- return "info";
- case D_IOCTL:
- return "ioctl";
- case D_NETERROR:
- return "neterror";
- case D_NET:
- return "net";
- case D_WARNING:
- return "warning";
- case D_BUFFS:
- return "buffs";
- case D_OTHER:
- return "other";
- case D_DENTRY:
- return "dentry";
- case D_NETTRACE:
- return "nettrace";
- case D_PAGE:
- return "page";
- case D_DLMTRACE:
- return "dlmtrace";
- case D_ERROR:
- return "error";
- case D_EMERG:
- return "emerg";
- case D_HA:
- return "ha";
- case D_RPCTRACE:
- return "rpctrace";
- case D_VFSTRACE:
- return "vfstrace";
- case D_READA:
- return "reada";
- case D_MMAP:
- return "mmap";
- case D_CONFIG:
- return "config";
- case D_CONSOLE:
- return "console";
- case D_QUOTA:
- return "quota";
- case D_SEC:
- return "sec";
- }
-}
-
-int
-libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
-{
- const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
- libcfs_debug_dbg2str;
- int len = 0;
- const char *token;
- int bit;
- int i;
-
- if (mask == 0) { /* "0" */
- if (size > 0)
- str[0] = '0';
- len = 1;
- } else { /* space-separated tokens */
- for (i = 0; i < 32; i++) {
- bit = 1 << i;
-
- if ((mask & bit) == 0)
- continue;
-
- token = fn(bit);
- if (token == NULL) /* unused bit */
- continue;
-
- if (len > 0) { /* separator? */
- if (len < size)
- str[len] = ' ';
- len++;
- }
-
- while (*token != 0) {
- if (len < size)
- str[len] = *token;
- token++;
- len++;
- }
- }
- }
-
- /* terminate 'str' */
- if (len < size)
- str[len] = 0;
- else
- str[size - 1] = 0;
-
- return len;
-}
-
-int
-libcfs_debug_token2mask(int *mask, const char *str, int len, int is_subsys)
-{
- const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
- libcfs_debug_dbg2str;
- int i;
- int j;
- int bit;
- const char *token;
-
- /* match against known tokens */
- for (i = 0; i < 32; i++) {
- bit = 1 << i;
-
- token = fn(bit);
- if (token == NULL) /* unused? */
- continue;
-
- /* strcasecmp */
- for (j = 0; ; j++) {
- if (j == len) { /* end of token */
- if (token[j] == 0) {
- *mask = bit;
- return 0;
- }
- break;
- }
-
- if (token[j] == 0)
- break;
-
- if (str[j] == token[j])
- continue;
-
- if (str[j] < 'A' || 'Z' < str[j])
- break;
-
- if (str[j] - 'A' + 'a' != token[j])
- break;
- }
- }
-
- return -EINVAL; /* no match */
-}
-
-int
-libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
-{
- int m = 0;
- char op = 0;
- int matched;
- int n;
- int t;
-
- /* Allow a number for backwards compatibility */
-
- for (n = strlen(str); n > 0; n--)
- if (!isspace(str[n-1]))
- break;
- matched = n;
-
- if ((t = sscanf(str, "%i%n", &m, &matched)) >= 1 &&
- matched == n) {
- *mask = m;
- return 0;
- }
-
- /* <str> must be a list of debug tokens or numbers separated by
- * whitespace and optionally an operator ('+' or '-'). If an operator
- * appears first in <str>, '*mask' is used as the starting point
- * (relative), otherwise 0 is used (absolute). An operator applies to
- * all following tokens up to the next operator. */
-
- matched = 0;
- while (*str != 0) {
- while (isspace(*str)) /* skip whitespace */
- str++;
-
- if (*str == 0)
- break;
-
- if (*str == '+' || *str == '-') {
- op = *str++;
-
- /* op on first token == relative */
- if (!matched)
- m = *mask;
-
- while (isspace(*str)) /* skip whitespace */
- str++;
-
- if (*str == 0) /* trailing op */
- return -EINVAL;
- }
-
- /* find token length */
- for (n = 0; str[n] != 0 && !isspace(str[n]); n++);
-
- /* match token */
- if (libcfs_debug_token2mask(&t, str, n, is_subsys) != 0)
- return -EINVAL;
-
- matched = 1;
- if (op == '-')
- m &= ~t;
- else
- m |= t;
-
- str += n;
- }
-
- if (!matched)
- return -EINVAL;
-
- *mask = m;
- return 0;
-}
-
-void libcfs_debug_dumplog_internal(void *arg)
-{
- CFS_DECL_JOURNAL_DATA;
-
- CFS_PUSH_JOURNAL;
-
- if (strncmp(debug_file_path, "NONE", 4) != 0) {
- snprintf(debug_file_name, sizeof(debug_file_name) - 1,
- "%s.%ld.%ld", debug_file_path, cfs_time_current_sec(),
- (long)arg);
- printk(KERN_ALERT "LustreError: dumping log to %s\n",
- debug_file_name);
- tracefile_dump_all_pages(debug_file_name);
- }
- CFS_POP_JOURNAL;
-}
-
-int libcfs_debug_dumplog_thread(void *arg)
-{
- cfs_daemonize("");
- libcfs_debug_dumplog_internal(arg);
- cfs_waitq_signal(&debug_ctlwq);
- return 0;
-}
-
-void libcfs_debug_dumplog(void)
-{
- int rc;
- cfs_waitlink_t wait;
- ENTRY;
-
- /* we're being careful to ensure that the kernel thread is
- * able to set our state to running as it exits before we
- * get to schedule() */
- cfs_waitlink_init(&wait);
- set_current_state(TASK_INTERRUPTIBLE);
- cfs_waitq_add(&debug_ctlwq, &wait);
-
- rc = cfs_kernel_thread(libcfs_debug_dumplog_thread,
- (void *)(long)cfs_curproc_pid(),
- CLONE_VM | CLONE_FS | CLONE_FILES);
- if (rc < 0)
- printk(KERN_ERR "LustreError: cannot start log dump thread: "
- "%d\n", rc);
- else
- cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE);
-
- /* be sure to teardown if kernel_thread() failed */
- cfs_waitq_del(&debug_ctlwq, &wait);
- set_current_state(TASK_RUNNING);
-}
-
-int libcfs_debug_init(unsigned long bufsize)
-{
- int rc = 0;
- int max = libcfs_debug_mb;
-
- cfs_waitq_init(&debug_ctlwq);
- libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
- libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
- /* If libcfs_debug_mb is set to an invalid value or uninitialized
- * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES */
- if (max > trace_max_debug_mb() || max < num_possible_cpus()) {
- max = TCD_MAX_PAGES;
- } else {
- max = (max / num_possible_cpus());
- max = (max << (20 - CFS_PAGE_SHIFT));
- }
- rc = tracefile_init(max);
-
- if (rc == 0)
- libcfs_register_panic_notifier();
-
- return rc;
-}
-
-int libcfs_debug_cleanup(void)
-{
- libcfs_unregister_panic_notifier();
- tracefile_exit();
- return 0;
-}
-
-int libcfs_debug_clear_buffer(void)
-{
- trace_flush_pages();
- return 0;
-}
-
-/* Debug markers, although printed by S_LNET
- * should not be be marked as such. */
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_UNDEFINED
-int libcfs_debug_mark_buffer(char *text)
-{
- CDEBUG(D_TRACE,"***************************************************\n");
- CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text);
- CDEBUG(D_TRACE,"***************************************************\n");
-
- return 0;
-}
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_LNET
-
-void libcfs_debug_set_level(unsigned int debug_level)
-{
- printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n",
- debug_level);
- libcfs_debug = debug_level;
-}
-
-EXPORT_SYMBOL(libcfs_debug_dumplog);
-EXPORT_SYMBOL(libcfs_debug_set_level);
-
-
-#else /* !__KERNEL__ */
-
-#include <libcfs/libcfs.h>
-
-#ifdef HAVE_CATAMOUNT_DATA_H
-#include <catamount/data.h>
-#include <catamount/lputs.h>
-
-static char source_nid[16];
-/* 0 indicates no messages to console, 1 is errors, > 1 is all debug messages */
-static int toconsole = 1;
-unsigned int libcfs_console_ratelimit = 1;
-cfs_duration_t libcfs_console_max_delay;
-cfs_duration_t libcfs_console_min_delay;
-unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
-#else /* !HAVE_CATAMOUNT_DATA_H */
-#ifdef HAVE_NETDB_H
-#include <sys/utsname.h>
-#endif /* HAVE_NETDB_H */
-struct utsname *tmp_utsname;
-static char source_nid[sizeof(tmp_utsname->nodename)];
-#endif /* HAVE_CATAMOUNT_DATA_H */
-
-static int source_pid;
-int smp_processor_id = 1;
-char debug_file_path[1024];
-FILE *debug_file_fd;
-
-int portals_do_debug_dumplog(void *arg)
-{
- printf("Look in %s\n", debug_file_name);
- return 0;
-}
-
-
-void portals_debug_print(void)
-{
- return;
-}
-
-
-void libcfs_debug_dumplog(void)
-{
- printf("Look in %s\n", debug_file_name);
- return;
-}
-
-int libcfs_debug_init(unsigned long bufsize)
-{
- char *debug_mask = NULL;
- char *debug_subsys = NULL;
- char *debug_filename;
-
-#ifdef HAVE_CATAMOUNT_DATA_H
- char *debug_console = NULL;
- char *debug_ratelimit = NULL;
- char *debug_max_delay = NULL;
- char *debug_min_delay = NULL;
- char *debug_backoff = NULL;
-
- libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
- libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
-
- snprintf(source_nid, sizeof(source_nid) - 1, "%u", _my_pnid);
- source_pid = _my_pid;
-
- debug_console = getenv("LIBLUSTRE_DEBUG_CONSOLE");
- if (debug_console != NULL) {
- toconsole = strtoul(debug_console, NULL, 0);
- CDEBUG(D_INFO, "set liblustre toconsole to %u\n", toconsole);
- }
- debug_ratelimit = getenv("LIBLUSTRE_DEBUG_CONSOLE_RATELIMIT");
- if (debug_ratelimit != NULL) {
- libcfs_console_ratelimit = strtoul(debug_ratelimit, NULL, 0);
- CDEBUG(D_INFO, "set liblustre console ratelimit to %u\n",
- libcfs_console_ratelimit);
- }
- debug_max_delay = getenv("LIBLUSTRE_DEBUG_CONSOLE_MAX_DELAY");
- if (debug_max_delay != NULL)
- libcfs_console_max_delay =
- cfs_time_seconds(strtoul(debug_max_delay, NULL, 0));
- debug_min_delay = getenv("LIBLUSTRE_DEBUG_CONSOLE_MIN_DELAY");
- if (debug_min_delay != NULL)
- libcfs_console_min_delay =
- cfs_time_seconds(strtoul(debug_min_delay, NULL, 0));
- if (debug_min_delay || debug_max_delay) {
- if (!libcfs_console_max_delay || !libcfs_console_min_delay ||
- libcfs_console_max_delay < libcfs_console_min_delay) {
- libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
- libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
- CDEBUG(D_INFO, "LIBLUSTRE_DEBUG_CONSOLE_MAX_DELAY "
- "should be greater than "
- "LIBLUSTRE_DEBUG_CONSOLE_MIN_DELAY "
- "and both parameters should be non-null"
- ": restore default values\n");
- } else {
- CDEBUG(D_INFO, "set liblustre console max delay to %lus"
- " and min delay to %lus\n",
- (cfs_duration_t)
- cfs_duration_sec(libcfs_console_max_delay),
- (cfs_duration_t)
- cfs_duration_sec(libcfs_console_min_delay));
- }
- }
- debug_backoff = getenv("LIBLUSTRE_DEBUG_CONSOLE_BACKOFF");
- if (debug_backoff != NULL) {
- libcfs_console_backoff = strtoul(debug_backoff, NULL, 0);
- if (libcfs_console_backoff <= 0) {
- libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
- CDEBUG(D_INFO, "LIBLUSTRE_DEBUG_CONSOLE_BACKOFF <= 0: "
- "restore default value\n");
- } else {
- CDEBUG(D_INFO, "set liblustre console backoff to %u\n",
- libcfs_console_backoff);
- }
- }
-#else
- struct utsname myname;
-
- if (uname(&myname) == 0)
- strcpy(source_nid, myname.nodename);
- source_pid = getpid();
-#endif
- /* debug masks */
- debug_mask = getenv("LIBLUSTRE_DEBUG_MASK");
- if (debug_mask)
- libcfs_debug = (unsigned int) strtol(debug_mask, NULL, 0);
-
- debug_subsys = getenv("LIBLUSTRE_DEBUG_SUBSYS");
- if (debug_subsys)
- libcfs_subsystem_debug =
- (unsigned int) strtol(debug_subsys, NULL, 0);
-
- debug_filename = getenv("LIBLUSTRE_DEBUG_BASE");
- if (debug_filename)
- strncpy(debug_file_path,debug_filename,sizeof(debug_file_path));
-
- debug_filename = getenv("LIBLUSTRE_DEBUG_FILE");
- if (debug_filename)
- strncpy(debug_file_name,debug_filename,sizeof(debug_file_name));
-
- if (debug_file_name[0] == '\0' && debug_file_path[0] != '\0')
- snprintf(debug_file_name, sizeof(debug_file_name) - 1,
- "%s-%s-"CFS_TIME_T".log", debug_file_path, source_nid, time(0));
-
- if (strcmp(debug_file_name, "stdout") == 0 ||
- strcmp(debug_file_name, "-") == 0) {
- debug_file_fd = stdout;
- } else if (strcmp(debug_file_name, "stderr") == 0) {
- debug_file_fd = stderr;
- } else if (debug_file_name[0] != '\0') {
- debug_file_fd = fopen(debug_file_name, "w");
- if (debug_file_fd == NULL)
- fprintf(stderr, "%s: unable to open '%s': %s\n",
- source_nid, debug_file_name, strerror(errno));
- }
-
- if (debug_file_fd == NULL)
- debug_file_fd = stdout;
-
- return 0;
-}
-
-int libcfs_debug_cleanup(void)
-{
- if (debug_file_fd != stdout && debug_file_fd != stderr)
- fclose(debug_file_fd);
- return 0;
-}
-
-int libcfs_debug_clear_buffer(void)
-{
- return 0;
-}
-
-int libcfs_debug_mark_buffer(char *text)
-{
-
- fprintf(debug_file_fd, "*******************************************************************************\n");
- fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text);
- fprintf(debug_file_fd, "*******************************************************************************\n");
-
- return 0;
-}
-
-#ifdef HAVE_CATAMOUNT_DATA_H
-#define CATAMOUNT_MAXLINE (256-4)
-void catamount_printline(char *buf, size_t size)
-{
- char *pos = buf;
- int prsize = size;
-
- while (prsize > 0){
- lputs(pos);
- pos += CATAMOUNT_MAXLINE;
- prsize -= CATAMOUNT_MAXLINE;
- }
-}
-#endif
-
-int
-libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls,
- int subsys, int mask,
- const char *file, const char *fn, const int line,
- const char *format1, va_list args,
- const char *format2, ...)
-{
- struct timeval tv;
- int nob;
- int remain;
- va_list ap;
- char buf[CFS_PAGE_SIZE]; /* size 4096 used for compatimble
- * with linux, where message can`t
- * be exceed PAGE_SIZE */
- int console = 0;
- char *prefix = "Lustre";
-
-#ifdef HAVE_CATAMOUNT_DATA_H
- /* toconsole == 0 - all messages to debug_file_fd
- * toconsole == 1 - warnings to console, all to debug_file_fd
- * toconsole > 1 - all debug to console */
- if (((mask & libcfs_printk) && toconsole == 1) || toconsole > 1)
- console = 1;
-#endif
-
- if ((!console) && (!debug_file_fd)) {
- return 0;
- }
-
- if (mask & (D_EMERG | D_ERROR))
- prefix = "LustreError";
-
- nob = snprintf(buf, sizeof(buf), "%s: %u-%s:(%s:%d:%s()): ", prefix,
- source_pid, source_nid, file, line, fn);
-
- remain = sizeof(buf) - nob;
- if (format1) {
- nob += vsnprintf(&buf[nob], remain, format1, args);
- }
-
- remain = sizeof(buf) - nob;
- if ((format2) && (remain > 0)) {
- va_start(ap, format2);
- nob += vsnprintf(&buf[nob], remain, format2, ap);
- va_end(ap);
- }
-
-#ifdef HAVE_CATAMOUNT_DATA_H
- if (console) {
- /* check rate limit for console */
- if (cdls != NULL) {
- if (libcfs_console_ratelimit &&
- cdls->cdls_next != 0 && /* not first time ever */
- !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
-
- /* skipping a console message */
- cdls->cdls_count++;
- goto out_file;
- }
-
- if (cfs_time_after(cfs_time_current(), cdls->cdls_next +
- libcfs_console_max_delay +
- cfs_time_seconds(10))) {
- /* last timeout was a long time ago */
- cdls->cdls_delay /= libcfs_console_backoff * 4;
- } else {
- cdls->cdls_delay *= libcfs_console_backoff;
-
- if (cdls->cdls_delay <
- libcfs_console_min_delay)
- cdls->cdls_delay =
- libcfs_console_min_delay;
- else if (cdls->cdls_delay >
- libcfs_console_max_delay)
- cdls->cdls_delay =
- libcfs_console_max_delay;
- }
-
- /* ensure cdls_next is never zero after it's been seen */
- cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
- }
-
- if (cdls != NULL && cdls->cdls_count != 0) {
- char buf2[100];
-
- nob = snprintf(buf2, sizeof(buf2),
- "Skipped %d previous similar message%s\n",
- cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : "");
-
- catamount_printline(buf2, nob);
- cdls->cdls_count = 0;
- goto out_file;
- }
- catamount_printline(buf, nob);
- }
-out_file:
- /* return on toconsole > 1, as we don't want the user getting
- * spammed by the debug data */
- if (toconsole > 1)
- return 0;
-#endif
- if (debug_file_fd == NULL)
- return 0;
-
- gettimeofday(&tv, NULL);
-
- fprintf(debug_file_fd, CFS_TIME_T".%06lu:%u:%s:(%s:%d:%s()): %s",
- tv.tv_sec, tv.tv_usec, source_pid, source_nid,
- file, line, fn, buf);
-
- return 0;
-}
-
-void
-libcfs_assertion_failed(const char *expr, const char *file, const char *func,
- const int line)
-{
- libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line,
- "ASSERTION(%s) failed\n", expr);
- abort();
-}
-
-#endif /* __KERNEL__ */
+++ /dev/null
-// !$*UTF8*$!
-{
- archiveVersion = 1;
- classes = {
- };
- objectVersion = 39;
- objects = {
- 06AA1262FFB20DD611CA28AA = {
- buildRules = (
- );
- buildSettings = {
- COPY_PHASE_STRIP = NO;
- GCC_DYNAMIC_NO_PIC = NO;
- GCC_ENABLE_FIX_AND_CONTINUE = YES;
- GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
- GCC_OPTIMIZATION_LEVEL = 0;
- OPTIMIZATION_CFLAGS = "-O0";
- ZERO_LINK = YES;
- };
- isa = PBXBuildStyle;
- name = Development;
- };
- 06AA1263FFB20DD611CA28AA = {
- buildRules = (
- );
- buildSettings = {
- COPY_PHASE_STRIP = YES;
- GCC_ENABLE_FIX_AND_CONTINUE = NO;
- ZERO_LINK = NO;
- };
- isa = PBXBuildStyle;
- name = Deployment;
- };
-//060
-//061
-//062
-//063
-//064
-//080
-//081
-//082
-//083
-//084
- 089C1669FE841209C02AAC07 = {
- buildSettings = {
- };
- buildStyles = (
- 06AA1262FFB20DD611CA28AA,
- 06AA1263FFB20DD611CA28AA,
- );
- hasScannedForEncodings = 1;
- isa = PBXProject;
- mainGroup = 089C166AFE841209C02AAC07;
- projectDirPath = "";
- targets = (
- 32A4FEB80562C75700D090E7,
- );
- };
- 089C166AFE841209C02AAC07 = {
- children = (
- 247142CAFF3F8F9811CA285C,
- 089C167CFE841241C02AAC07,
- 19C28FB6FE9D52B211CA2CBB,
- );
- isa = PBXGroup;
- name = libcfs;
- refType = 4;
- sourceTree = "<group>";
- };
- 089C167CFE841241C02AAC07 = {
- children = (
- 32A4FEC30562C75700D090E7,
- );
- isa = PBXGroup;
- name = Resources;
- refType = 4;
- sourceTree = "<group>";
- };
-//080
-//081
-//082
-//083
-//084
-//190
-//191
-//192
-//193
-//194
- 19444794072D07AD00DAF9BC = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = tracefile.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19444795072D07AD00DAF9BC = {
- fileRef = 19444794072D07AD00DAF9BC;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19444796072D08AA00DAF9BC = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = debug.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19444797072D08AA00DAF9BC = {
- fileRef = 19444796072D08AA00DAF9BC;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19509C03072CD5FF00A958C3 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = module.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19509C04072CD5FF00A958C3 = {
- fileRef = 19509C03072CD5FF00A958C3;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19713B76072E8274004E8469 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- name = cfs_prim.c;
- path = arch/xnu/cfs_prim.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19713B77072E8274004E8469 = {
- fileRef = 19713B76072E8274004E8469;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19713BB7072E8281004E8469 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- name = cfs_mem.c;
- path = arch/xnu/cfs_mem.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19713BB8072E8281004E8469 = {
- fileRef = 19713BB7072E8281004E8469;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19713BF7072E828E004E8469 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- name = cfs_proc.c;
- path = arch/xnu/cfs_proc.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19713BF8072E828E004E8469 = {
- fileRef = 19713BF7072E828E004E8469;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19713C7A072E82B2004E8469 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- name = cfs_utils.c;
- path = arch/xnu/cfs_utils.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19713C7B072E82B2004E8469 = {
- fileRef = 19713C7A072E82B2004E8469;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19713CD6072E8A56004E8469 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- name = cfs_module.c;
- path = arch/xnu/cfs_module.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19713CD7072E8A56004E8469 = {
- fileRef = 19713CD6072E8A56004E8469;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19713D1B072E8E39004E8469 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- name = cfs_fs.c;
- path = arch/xnu/cfs_fs.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19713D1C072E8E39004E8469 = {
- fileRef = 19713D1B072E8E39004E8469;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19713D60072E9109004E8469 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- name = xnu_sync.c;
- path = arch/xnu/xnu_sync.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19713D61072E9109004E8469 = {
- fileRef = 19713D60072E9109004E8469;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19713DC2072F994D004E8469 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- name = cfs_tracefile.c;
- path = arch/xnu/cfs_tracefile.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19713DC3072F994D004E8469 = {
- fileRef = 19713DC2072F994D004E8469;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19713E1C072FAFB5004E8469 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- name = cfs_debug.c;
- path = arch/xnu/cfs_debug.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19713E1D072FAFB5004E8469 = {
- fileRef = 19713E1C072FAFB5004E8469;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19C28FB6FE9D52B211CA2CBB = {
- children = (
- 32A4FEC40562C75800D090E7,
- );
- isa = PBXGroup;
- name = Products;
- refType = 4;
- sourceTree = "<group>";
- };
-//190
-//191
-//192
-//193
-//194
-//240
-//241
-//242
-//243
-//244
- 247142CAFF3F8F9811CA285C = {
- children = (
- 19713E1C072FAFB5004E8469,
- 19713DC2072F994D004E8469,
- 19713D60072E9109004E8469,
- 19713D1B072E8E39004E8469,
- 19713CD6072E8A56004E8469,
- 19713C7A072E82B2004E8469,
- 19713BF7072E828E004E8469,
- 19713BB7072E8281004E8469,
- 19713B76072E8274004E8469,
- 19444796072D08AA00DAF9BC,
- 19444794072D07AD00DAF9BC,
- 19509C03072CD5FF00A958C3,
- );
- isa = PBXGroup;
- name = Source;
- path = "";
- refType = 4;
- sourceTree = "<group>";
- };
-//240
-//241
-//242
-//243
-//244
-//320
-//321
-//322
-//323
-//324
- 32A4FEB80562C75700D090E7 = {
- buildPhases = (
- 32A4FEB90562C75700D090E7,
- 32A4FEBA0562C75700D090E7,
- 32A4FEBB0562C75700D090E7,
- 32A4FEBD0562C75700D090E7,
- 32A4FEBF0562C75700D090E7,
- 32A4FEC00562C75700D090E7,
- 32A4FEC10562C75700D090E7,
- );
- buildRules = (
- );
- buildSettings = {
- FRAMEWORK_SEARCH_PATHS = "";
- GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO;
- GCC_WARN_UNKNOWN_PRAGMAS = NO;
- HEADER_SEARCH_PATHS = ../include;
- INFOPLIST_FILE = Info.plist;
- INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
- LIBRARY_SEARCH_PATHS = "";
- MODULE_NAME = com.clusterfs.lustre.portals.libcfs;
- MODULE_START = libcfs_start;
- MODULE_STOP = libcfs_stop;
- MODULE_VERSION = 1.0.1;
- OTHER_CFLAGS = "-D__KERNEL__";
- OTHER_LDFLAGS = "";
- OTHER_REZFLAGS = "";
- PRODUCT_NAME = libcfs;
- SECTORDER_FLAGS = "";
- WARNING_CFLAGS = "-Wmost";
- WRAPPER_EXTENSION = kext;
- };
- dependencies = (
- );
- isa = PBXNativeTarget;
- name = libcfs;
- productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions";
- productName = libcfs;
- productReference = 32A4FEC40562C75800D090E7;
- productType = "com.apple.product-type.kernel-extension";
- };
- 32A4FEB90562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXShellScriptBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- shellPath = /bin/sh;
- shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
- };
- 32A4FEBA0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXHeadersBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBB0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXResourcesBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBD0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- 19509C04072CD5FF00A958C3,
- 19444795072D07AD00DAF9BC,
- 19444797072D08AA00DAF9BC,
- 19713B77072E8274004E8469,
- 19713BB8072E8281004E8469,
- 19713BF8072E828E004E8469,
- 19713C7B072E82B2004E8469,
- 19713CD7072E8A56004E8469,
- 19713D1C072E8E39004E8469,
- 19713D61072E9109004E8469,
- 19713DC3072F994D004E8469,
- 19713E1D072FAFB5004E8469,
- );
- isa = PBXSourcesBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBF0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXFrameworksBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEC00562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXRezBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEC10562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXShellScriptBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- shellPath = /bin/sh;
- shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
- };
- 32A4FEC30562C75700D090E7 = {
- isa = PBXFileReference;
- lastKnownFileType = text.plist.xml;
- path = Info.plist;
- refType = 4;
- sourceTree = "<group>";
- };
- 32A4FEC40562C75800D090E7 = {
- explicitFileType = wrapper.cfbundle;
- includeInIndex = 0;
- isa = PBXFileReference;
- path = libcfs.kext;
- refType = 3;
- sourceTree = BUILT_PRODUCTS_DIR;
- };
- };
- rootObject = 089C1669FE841209C02AAC07;
-}
+++ /dev/null
-Makefile
-Makefile.in
-*.o.cmd
+++ /dev/null
-EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \
- linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c \
- linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Lustre curproc API implementation for Linux kernel
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation. Lustre is distributed in the hope that it will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
- * Public License for more details. You should have received a copy of the GNU
- * General Public License along with Lustre; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/sched.h>
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-/*
- * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
- * for Linux kernel.
- */
-
-uid_t cfs_curproc_uid(void)
-{
- return current->uid;
-}
-
-gid_t cfs_curproc_gid(void)
-{
- return current->gid;
-}
-
-uid_t cfs_curproc_fsuid(void)
-{
- return current->fsuid;
-}
-
-gid_t cfs_curproc_fsgid(void)
-{
- return current->fsgid;
-}
-
-pid_t cfs_curproc_pid(void)
-{
- return current->pid;
-}
-
-int cfs_curproc_groups_nr(void)
-{
- int nr;
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
- task_lock(current);
- nr = current->group_info->ngroups;
- task_unlock(current);
-#else
- nr = current->ngroups;
-#endif
- return nr;
-}
-
-void cfs_curproc_groups_dump(gid_t *array, int size)
-{
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
- task_lock(current);
- size = min_t(int, size, current->group_info->ngroups);
- memcpy(array, current->group_info->blocks[0], size * sizeof(__u32));
- task_unlock(current);
-#else
- LASSERT(size <= NGROUPS);
- size = min_t(int, size, current->ngroups);
- memcpy(array, current->groups, size * sizeof(__u32));
-#endif
-}
-
-
-int cfs_curproc_is_in_groups(gid_t gid)
-{
- return in_group_p(gid);
-}
-
-mode_t cfs_curproc_umask(void)
-{
- return current->fs->umask;
-}
-
-char *cfs_curproc_comm(void)
-{
- return current->comm;
-}
-
-cfs_kernel_cap_t cfs_curproc_cap_get(void)
-{
- return current->cap_effective;
-}
-
-void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
-{
- current->cap_effective = cap;
-}
-
-EXPORT_SYMBOL(cfs_curproc_uid);
-EXPORT_SYMBOL(cfs_curproc_pid);
-EXPORT_SYMBOL(cfs_curproc_gid);
-EXPORT_SYMBOL(cfs_curproc_fsuid);
-EXPORT_SYMBOL(cfs_curproc_fsgid);
-EXPORT_SYMBOL(cfs_curproc_umask);
-EXPORT_SYMBOL(cfs_curproc_comm);
-EXPORT_SYMBOL(cfs_curproc_groups_nr);
-EXPORT_SYMBOL(cfs_curproc_groups_dump);
-EXPORT_SYMBOL(cfs_curproc_is_in_groups);
-EXPORT_SYMBOL(cfs_curproc_cap_get);
-EXPORT_SYMBOL(cfs_curproc_cap_set);
-
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/notifier.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/interrupt.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/completion.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <linux/miscdevice.h>
-#include <linux/version.h>
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/kp30.h>
-#include <libcfs/linux/portals_compat25.h>
-#include <libcfs/libcfs.h>
-
-#include "tracefile.h"
-
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <linux/kallsyms.h>
-#endif
-
-char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall";
-
-void libcfs_run_upcall(char **argv)
-{
- int rc;
- int argc;
- char *envp[] = {
- "HOME=/",
- "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
- NULL};
- ENTRY;
-
- argv[0] = lnet_upcall;
- argc = 1;
- while (argv[argc] != NULL)
- argc++;
-
- LASSERT(argc >= 2);
-
- rc = USERMODEHELPER(argv[0], argv, envp);
- if (rc < 0 && rc != -ENOENT) {
- CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; "
- "check /proc/sys/lnet/upcall\n",
- rc, argv[0], argv[1],
- argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
- argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
- argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
- argc < 6 ? "" : ",...");
- } else {
- CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n",
- argv[0], argv[1],
- argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
- argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
- argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
- argc < 6 ? "" : ",...");
- }
-}
-
-void libcfs_run_lbug_upcall(char *file, const char *fn, const int line)
-{
- char *argv[6];
- char buf[32];
-
- ENTRY;
- snprintf (buf, sizeof buf, "%d", line);
-
- argv[1] = "LBUG";
- argv[2] = file;
- argv[3] = (char *)fn;
- argv[4] = buf;
- argv[5] = NULL;
-
- libcfs_run_upcall (argv);
-}
-
-#ifdef __arch_um__
-void lbug_with_loc(char *file, const char *func, const int line)
-{
- libcfs_catastrophe = 1;
- libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line,
- "LBUG - trying to dump log to %s\n", debug_file_path);
- libcfs_debug_dumplog();
- libcfs_run_lbug_upcall(file, func, line);
- asm("int $3");
- panic("LBUG");
-}
-#else
-/* coverity[+kill] */
-void lbug_with_loc(char *file, const char *func, const int line)
-{
- libcfs_catastrophe = 1;
- libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, "LBUG\n");
-
- if (in_interrupt()) {
- panic("LBUG in interrupt.\n");
- /* not reached */
- }
-
- libcfs_debug_dumpstack(NULL);
- libcfs_debug_dumplog();
- libcfs_run_lbug_upcall(file, func, line);
- if (libcfs_panic_on_lbug)
- panic("LBUG");
- set_task_state(current, TASK_UNINTERRUPTIBLE);
- while (1)
- schedule();
-}
-#endif /* __arch_um__ */
-
-#ifdef __KERNEL__
-
-void libcfs_debug_dumpstack(struct task_struct *tsk)
-{
-#if defined(__arch_um__)
- if (tsk != NULL)
- CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n",
- tsk->pid, UML_PID(tsk));
- //asm("int $3");
-#elif defined(HAVE_SHOW_TASK)
- /* this is exported by lustre kernel version 42 */
- extern void show_task(struct task_struct *);
-
- if (tsk == NULL)
- tsk = current;
- CWARN("showing stack for process %d\n", tsk->pid);
- show_task(tsk);
-#else
- if ((tsk == NULL) || (tsk == current))
- dump_stack();
- else
- CWARN("can't show stack: kernel doesn't export show_task\n");
-#endif
-}
-
-cfs_task_t *libcfs_current(void)
-{
- CWARN("current task struct is %p\n", current);
- return current;
-}
-
-static int panic_notifier(struct notifier_block *self, unsigned long unused1,
- void *unused2)
-{
- if (libcfs_panic_in_progress)
- return 0;
-
- libcfs_panic_in_progress = 1;
- mb();
-
-#ifdef LNET_DUMP_ON_PANIC
- /* This is currently disabled because it spews far too much to the
- * console on the rare cases it is ever triggered. */
-
- if (in_interrupt()) {
- trace_debug_print();
- } else {
- while (current->lock_depth >= 0)
- unlock_kernel();
-
- libcfs_debug_dumplog_internal((void *)(long)cfs_curproc_pid());
- }
-#endif
- return 0;
-}
-
-static struct notifier_block libcfs_panic_notifier = {
- notifier_call : panic_notifier,
- next : NULL,
- priority : 10000
-};
-
-void libcfs_register_panic_notifier(void)
-{
-#ifdef HAVE_ATOMIC_PANIC_NOTIFIER
- atomic_notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier);
-#else
- notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier);
-#endif
-}
-
-void libcfs_unregister_panic_notifier(void)
-{
-#ifdef HAVE_ATOMIC_PANIC_NOTIFIER
- atomic_notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier);
-#else
- notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier);
-#endif
-}
-
-EXPORT_SYMBOL(libcfs_debug_dumpstack);
-EXPORT_SYMBOL(libcfs_current);
-
-#endif /* __KERNEL__ */
-
-EXPORT_SYMBOL(libcfs_run_upcall);
-EXPORT_SYMBOL(libcfs_run_lbug_upcall);
-EXPORT_SYMBOL(lbug_with_loc);
+++ /dev/null
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/fs.h>
-#include <linux/kdev_t.h>
-#include <linux/ctype.h>
-#include <asm/uaccess.h>
-
-#include <libcfs/libcfs.h>
-
-cfs_file_t *
-cfs_filp_open (const char *name, int flags, int mode, int *err)
-{
- /* XXX
- * Maybe we need to handle flags and mode in the future
- */
- cfs_file_t *filp = NULL;
-
- filp = filp_open(name, flags, mode);
- if (IS_ERR(filp)) {
- int rc;
-
- rc = PTR_ERR(filp);
- printk(KERN_ERR "LustreError: can't open %s file: err %d\n",
- name, rc);
- if (err)
- *err = rc;
- filp = NULL;
- }
- return filp;
-}
-
-/* write a userspace buffer to disk.
- * NOTE: this returns 0 on success, not the number of bytes written. */
-ssize_t
-cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset)
-{
- mm_segment_t fs;
- ssize_t size = 0;
-
- fs = get_fs();
- set_fs(KERNEL_DS);
- while (count > 0) {
- size = filp->f_op->write(filp, (char *)buf, count, offset);
- if (size < 0)
- break;
- count -= size;
- size = 0;
- }
- set_fs(fs);
-
- return size;
-}
-
-#if !(CFS_O_CREAT == O_CREAT && CFS_O_EXCL == O_EXCL && \
- CFS_O_TRUNC == O_TRUNC && CFS_O_APPEND == O_APPEND &&\
- CFS_O_NONBLOCK == O_NONBLOCK && CFS_O_NDELAY == O_NDELAY &&\
- CFS_O_SYNC == O_SYNC && CFS_O_ASYNC == FASYNC &&\
- CFS_O_DIRECT == O_DIRECT && CFS_O_LARGEFILE == O_LARGEFILE &&\
- CFS_O_DIRECTORY == O_DIRECTORY && CFS_O_NOFOLLOW == O_NOFOLLOW)
-
-int cfs_oflags2univ(int flags)
-{
- int f;
-
- f = flags & O_ACCMODE;
- f |= (flags & O_CREAT) ? CFS_O_CREAT: 0;
- f |= (flags & O_EXCL) ? CFS_O_EXCL: 0;
- f |= (flags & O_NOCTTY) ? CFS_O_NOCTTY: 0;
- f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0;
- f |= (flags & O_APPEND) ? CFS_O_APPEND: 0;
- f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0;
- f |= (flags & O_SYNC)? CFS_O_SYNC: 0;
- f |= (flags & FASYNC)? CFS_O_ASYNC: 0;
- f |= (flags & O_DIRECTORY)? CFS_O_DIRECTORY: 0;
- f |= (flags & O_DIRECT)? CFS_O_DIRECT: 0;
- f |= (flags & O_LARGEFILE)? CFS_O_LARGEFILE: 0;
- f |= (flags & O_NOFOLLOW)? CFS_O_NOFOLLOW: 0;
- f |= (flags & O_NOATIME)? CFS_O_NOATIME: 0;
- return f;
-}
-#else
-
-int cfs_oflags2univ(int flags)
-{
- return (flags);
-}
-#endif
-
-/*
- * XXX Liang: we don't need cfs_univ2oflags() now.
- */
-int cfs_univ2oflags(int flags)
-{
- return (flags);
-}
-
-EXPORT_SYMBOL(cfs_filp_open);
-EXPORT_SYMBOL(cfs_user_write);
-EXPORT_SYMBOL(cfs_oflags2univ);
-EXPORT_SYMBOL(cfs_univ2oflags);
+++ /dev/null
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <arch-linux/cfs_lock.h>
-#include <libcfs/libcfs.h>
+++ /dev/null
-# define DEBUG_SUBSYSTEM S_LNET
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <libcfs/libcfs.h>
-
-static unsigned int cfs_alloc_flags_to_gfp(u_int32_t flags)
-{
- unsigned int mflags = 0;
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- if (flags & CFS_ALLOC_ATOMIC)
- mflags |= __GFP_HIGH;
- else if (flags & CFS_ALLOC_WAIT)
- mflags |= __GFP_WAIT;
- else
- mflags |= (__GFP_HIGH | __GFP_WAIT);
- if (flags & CFS_ALLOC_IO)
- mflags |= __GFP_IO | __GFP_HIGHIO;
-#else
- if (flags & CFS_ALLOC_ATOMIC)
- mflags |= __GFP_HIGH;
- else
- mflags |= __GFP_WAIT;
- if (flags & CFS_ALLOC_NOWARN)
- mflags |= __GFP_NOWARN;
- if (flags & CFS_ALLOC_IO)
- mflags |= __GFP_IO;
-#endif
- if (flags & CFS_ALLOC_FS)
- mflags |= __GFP_FS;
- if (flags & CFS_ALLOC_HIGH)
- mflags |= __GFP_HIGH;
- return mflags;
-}
-
-void *
-cfs_alloc(size_t nr_bytes, u_int32_t flags)
-{
- void *ptr = NULL;
-
- ptr = kmalloc(nr_bytes, cfs_alloc_flags_to_gfp(flags));
- if (ptr != NULL && (flags & CFS_ALLOC_ZERO))
- memset(ptr, 0, nr_bytes);
- return ptr;
-}
-
-void
-cfs_free(void *addr)
-{
- kfree(addr);
-}
-
-void *
-cfs_alloc_large(size_t nr_bytes)
-{
- return vmalloc(nr_bytes);
-}
-
-void
-cfs_free_large(void *addr)
-{
- vfree(addr);
-}
-
-cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order)
-{
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
- return alloc_pages(cfs_alloc_flags_to_gfp(flags), order);
-}
-
-void __cfs_free_pages(cfs_page_t *page, unsigned int order)
-{
- __free_pages(page, order);
-}
-
-cfs_mem_cache_t *
-cfs_mem_cache_create (const char *name, size_t size, size_t offset,
- unsigned long flags)
-{
-#ifdef HAVE_KMEM_CACHE_CREATE_DTOR
- return kmem_cache_create(name, size, offset, flags, NULL, NULL);
-#else
- return kmem_cache_create(name, size, offset, flags, NULL);
-#endif
-}
-
-int
-cfs_mem_cache_destroy (cfs_mem_cache_t * cachep)
-{
-#ifdef HAVE_KMEM_CACHE_DESTROY_INT
- return kmem_cache_destroy(cachep);
-#else
- kmem_cache_destroy(cachep);
- return 0;
-#endif
-}
-
-void *
-cfs_mem_cache_alloc(cfs_mem_cache_t *cachep, int flags)
-{
- return kmem_cache_alloc(cachep, cfs_alloc_flags_to_gfp(flags));
-}
-
-void
-cfs_mem_cache_free(cfs_mem_cache_t *cachep, void *objp)
-{
- return kmem_cache_free(cachep, objp);
-}
-
-EXPORT_SYMBOL(cfs_alloc);
-EXPORT_SYMBOL(cfs_free);
-EXPORT_SYMBOL(cfs_alloc_large);
-EXPORT_SYMBOL(cfs_free_large);
-EXPORT_SYMBOL(cfs_alloc_pages);
-EXPORT_SYMBOL(__cfs_free_pages);
-EXPORT_SYMBOL(cfs_mem_cache_create);
-EXPORT_SYMBOL(cfs_mem_cache_destroy);
-EXPORT_SYMBOL(cfs_mem_cache_alloc);
-EXPORT_SYMBOL(cfs_mem_cache_free);
+++ /dev/null
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-#define LNET_MINOR 240
-
-int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
-{
- struct libcfs_ioctl_hdr *hdr;
- struct libcfs_ioctl_data *data;
- int err;
- ENTRY;
-
- hdr = (struct libcfs_ioctl_hdr *)buf;
- data = (struct libcfs_ioctl_data *)buf;
-
- err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
- if (err)
- RETURN(err);
-
- if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) {
- CERROR("PORTALS: version mismatch kernel vs application\n");
- RETURN(-EINVAL);
- }
-
- if (hdr->ioc_len + buf >= end) {
- CERROR("PORTALS: user buffer exceeds kernel buffer\n");
- RETURN(-EINVAL);
- }
-
-
- if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) {
- CERROR("PORTALS: user buffer too small for ioctl\n");
- RETURN(-EINVAL);
- }
-
- err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
- if (err)
- RETURN(err);
-
- if (libcfs_ioctl_is_invalid(data)) {
- CERROR("PORTALS: ioctl not correctly formatted\n");
- RETURN(-EINVAL);
- }
-
- if (data->ioc_inllen1)
- data->ioc_inlbuf1 = &data->ioc_bulk[0];
-
- if (data->ioc_inllen2)
- data->ioc_inlbuf2 = &data->ioc_bulk[0] +
- size_round(data->ioc_inllen1);
-
- RETURN(0);
-}
-
-int libcfs_ioctl_popdata(void *arg, void *data, int size)
-{
- if (copy_to_user((char *)arg, data, size))
- return -EFAULT;
- return 0;
-}
-
-extern struct cfs_psdev_ops libcfs_psdev_ops;
-
-static int
-libcfs_psdev_open(struct inode * inode, struct file * file)
-{
- struct libcfs_device_userstate **pdu = NULL;
- int rc = 0;
-
- if (!inode)
- return (-EINVAL);
- pdu = (struct libcfs_device_userstate **)&file->private_data;
- if (libcfs_psdev_ops.p_open != NULL)
- rc = libcfs_psdev_ops.p_open(0, (void *)pdu);
- else
- return (-EPERM);
- return rc;
-}
-
-/* called when closing /dev/device */
-static int
-libcfs_psdev_release(struct inode * inode, struct file * file)
-{
- struct libcfs_device_userstate *pdu;
- int rc = 0;
-
- if (!inode)
- return (-EINVAL);
- pdu = file->private_data;
- if (libcfs_psdev_ops.p_close != NULL)
- rc = libcfs_psdev_ops.p_close(0, (void *)pdu);
- else
- rc = -EPERM;
- return rc;
-}
-
-static int
-libcfs_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- struct cfs_psdev_file pfile;
- int rc = 0;
-
- if (current->fsuid != 0)
- return -EACCES;
-
- if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
- _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR ||
- _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) {
- CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
- _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
- return (-EINVAL);
- }
-
- /* Handle platform-dependent IOC requests */
- switch (cmd) {
- case IOC_LIBCFS_PANIC:
- if (!capable (CAP_SYS_BOOT))
- return (-EPERM);
- panic("debugctl-invoked panic");
- return (0);
- case IOC_LIBCFS_MEMHOG:
- if (!capable (CAP_SYS_ADMIN))
- return -EPERM;
- /* go thought */
- }
-
- pfile.off = 0;
- pfile.private_data = file->private_data;
- if (libcfs_psdev_ops.p_ioctl != NULL)
- rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
- else
- rc = -EPERM;
- return (rc);
-}
-
-static struct file_operations libcfs_fops = {
- ioctl: libcfs_ioctl,
- open: libcfs_psdev_open,
- release: libcfs_psdev_release
-};
-
-cfs_psdev_t libcfs_dev = {
- LNET_MINOR,
- "lnet",
- &libcfs_fops
-};
-
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <libcfs/libcfs.h>
-
-#if defined(CONFIG_KGDB)
-#include <asm/kgdb.h>
-#endif
-
-void cfs_enter_debugger(void)
-{
-#if defined(CONFIG_KGDB)
- BREAKPOINT();
-#elif defined(__arch_um__)
- asm("int $3");
-#else
- /* nothing */
-#endif
-}
-
-void cfs_daemonize(char *str) {
- unsigned long flags;
-
- lock_kernel();
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
- daemonize(str);
-#else
- daemonize();
- exit_files(current);
- reparent_to_init();
- snprintf (current->comm, sizeof (current->comm), "%s", str);
-#endif
- SIGNAL_MASK_LOCK(current, flags);
- sigfillset(¤t->blocked);
- RECALC_SIGPENDING;
- SIGNAL_MASK_UNLOCK(current, flags);
- unlock_kernel();
-}
-
-int cfs_daemonize_ctxt(char *str) {
- struct task_struct *tsk = current;
- struct fs_struct *fs = NULL;
-
- cfs_daemonize(str);
- fs = copy_fs_struct(tsk->fs);
- if (fs == NULL)
- return -ENOMEM;
- exit_fs(tsk);
- tsk->fs = fs;
- return 0;
-}
-
-
-sigset_t
-cfs_get_blockedsigs(void)
-{
- unsigned long flags;
- sigset_t old;
-
- SIGNAL_MASK_LOCK(current, flags);
- old = current->blocked;
- SIGNAL_MASK_UNLOCK(current, flags);
- return old;
-}
-
-sigset_t
-cfs_block_allsigs(void)
-{
- unsigned long flags;
- sigset_t old;
-
- SIGNAL_MASK_LOCK(current, flags);
- old = current->blocked;
- sigfillset(¤t->blocked);
- RECALC_SIGPENDING;
- SIGNAL_MASK_UNLOCK(current, flags);
-
- return old;
-}
-
-sigset_t
-cfs_block_sigs(sigset_t bits)
-{
- unsigned long flags;
- sigset_t old;
-
- SIGNAL_MASK_LOCK(current, flags);
- old = current->blocked;
- current->blocked = bits;
- RECALC_SIGPENDING;
- SIGNAL_MASK_UNLOCK(current, flags);
- return old;
-}
-
-void
-cfs_restore_sigs (cfs_sigset_t old)
-{
- unsigned long flags;
-
- SIGNAL_MASK_LOCK(current, flags);
- current->blocked = old;
- RECALC_SIGPENDING;
- SIGNAL_MASK_UNLOCK(current, flags);
-}
-
-int
-cfs_signal_pending(void)
-{
- return signal_pending(current);
-}
-
-void
-cfs_clear_sigpending(void)
-{
- unsigned long flags;
-
- SIGNAL_MASK_LOCK(current, flags);
- CLEAR_SIGPENDING;
- SIGNAL_MASK_UNLOCK(current, flags);
-}
-
-int
-libcfs_arch_init(void)
-{
- return 0;
-}
-
-void
-libcfs_arch_cleanup(void)
-{
- return;
-}
-
-EXPORT_SYMBOL(libcfs_arch_init);
-EXPORT_SYMBOL(libcfs_arch_cleanup);
-EXPORT_SYMBOL(cfs_daemonize);
-EXPORT_SYMBOL(cfs_daemonize_ctxt);
-EXPORT_SYMBOL(cfs_block_allsigs);
-EXPORT_SYMBOL(cfs_block_sigs);
-EXPORT_SYMBOL(cfs_get_blockedsigs);
-EXPORT_SYMBOL(cfs_restore_sigs);
-EXPORT_SYMBOL(cfs_signal_pending);
-EXPORT_SYMBOL(cfs_clear_sigpending);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <asm/uaccess.h>
-
-#include <linux/proc_fs.h>
-#include <linux/sysctl.h>
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/kp30.h>
-#include <asm/div64.h>
-#include "tracefile.h"
-
-static cfs_sysctl_table_header_t *lnet_table_header = NULL;
-extern char lnet_upcall[1024];
-
-#define PSDEV_LNET (0x100)
-enum {
- PSDEV_DEBUG = 1, /* control debugging */
- PSDEV_SUBSYSTEM_DEBUG, /* control debugging */
- PSDEV_PRINTK, /* force all messages to console */
- PSDEV_CONSOLE_RATELIMIT, /* ratelimit console messages */
- PSDEV_CONSOLE_MAX_DELAY_CS, /* maximum delay over which we skip messages */
- PSDEV_CONSOLE_MIN_DELAY_CS, /* initial delay over which we skip messages */
- PSDEV_CONSOLE_BACKOFF, /* delay increase factor */
- PSDEV_DEBUG_PATH, /* crashdump log location */
- PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */
- PSDEV_LNET_UPCALL, /* User mode upcall script */
- PSDEV_LNET_MEMUSED, /* bytes currently PORTAL_ALLOCated */
- PSDEV_LNET_CATASTROPHE, /* if we have LBUGged or panic'd */
- PSDEV_LNET_PANIC_ON_LBUG, /* flag to panic on LBUG */
- PSDEV_LNET_DUMP_KERNEL, /* snapshot kernel debug buffer to file */
- PSDEV_LNET_DAEMON_FILE, /* spool kernel debug buffer to file */
- PSDEV_LNET_DEBUG_MB, /* size of debug buffer */
-};
-
-static int
-proc_call_handler(void *data, int write,
- loff_t *ppos, void *buffer, size_t *lenp,
- int (*handler)(void *data, int write,
- loff_t pos, void *buffer, int len))
-{
- int rc = handler(data, write, *ppos, buffer, *lenp);
-
- if (rc < 0)
- return rc;
-
- if (write) {
- *ppos += *lenp;
- } else {
- *lenp = rc;
- *ppos += rc;
- }
- return 0;
-}
-
-#define DECLARE_PROC_HANDLER(name) \
-static int \
-LL_PROC_PROTO(name) \
-{ \
- DECLARE_LL_PROC_PPOS_DECL; \
- \
- return proc_call_handler(table->data, write, \
- ppos, buffer, lenp, \
- __##name); \
-}
-
-static int __proc_dobitmasks(void *data, int write,
- loff_t pos, void *buffer, int nob)
-{
- const int tmpstrlen = 512;
- char *tmpstr;
- int rc;
- unsigned int *mask = data;
- int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
- int is_printk = (mask == &libcfs_printk) ? 1 : 0;
-
- rc = trace_allocate_string_buffer(&tmpstr, tmpstrlen);
- if (rc < 0)
- return rc;
-
- if (!write) {
- libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys);
- rc = strlen(tmpstr);
-
- if (pos >= rc) {
- rc = 0;
- } else {
- rc = trace_copyout_string(buffer, nob,
- tmpstr + pos, "\n");
- }
- } else {
- rc = trace_copyin_string(tmpstr, tmpstrlen, buffer, nob);
- if (rc < 0)
- return rc;
-
- rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys);
- /* Always print LBUG/LASSERT to console, so keep this mask */
- if (is_printk)
- *mask |= D_EMERG;
- }
-
- trace_free_string_buffer(tmpstr, tmpstrlen);
- return rc;
-}
-
-DECLARE_PROC_HANDLER(proc_dobitmasks)
-
-static int __proc_dump_kernel(void *data, int write,
- loff_t pos, void *buffer, int nob)
-{
- if (!write)
- return 0;
-
- return trace_dump_debug_buffer_usrstr(buffer, nob);
-}
-
-DECLARE_PROC_HANDLER(proc_dump_kernel)
-
-static int __proc_daemon_file(void *data, int write,
- loff_t pos, void *buffer, int nob)
-{
- if (!write) {
- int len = strlen(tracefile);
-
- if (pos >= len)
- return 0;
-
- return trace_copyout_string(buffer, nob,
- tracefile + pos, "\n");
- }
-
- return trace_daemon_command_usrstr(buffer, nob);
-}
-
-DECLARE_PROC_HANDLER(proc_daemon_file)
-
-static int __proc_debug_mb(void *data, int write,
- loff_t pos, void *buffer, int nob)
-{
- if (!write) {
- char tmpstr[32];
- int len = snprintf(tmpstr, sizeof(tmpstr), "%d",
- trace_get_debug_mb());
-
- if (pos >= len)
- return 0;
-
- return trace_copyout_string(buffer, nob, tmpstr + pos, "\n");
- }
-
- return trace_set_debug_mb_usrstr(buffer, nob);
-}
-
-DECLARE_PROC_HANDLER(proc_debug_mb)
-
-int LL_PROC_PROTO(proc_console_max_delay_cs)
-{
- int rc, max_delay_cs;
- cfs_sysctl_table_t dummy = *table;
- cfs_duration_t d;
-
- dummy.data = &max_delay_cs;
- dummy.proc_handler = &proc_dointvec;
-
- if (!write) { /* read */
- max_delay_cs = cfs_duration_sec(libcfs_console_max_delay * 100);
- rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
- return rc;
- }
-
- /* write */
- max_delay_cs = 0;
- rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
- if (rc < 0)
- return rc;
- if (max_delay_cs <= 0)
- return -EINVAL;
-
- d = cfs_time_seconds(max_delay_cs) / 100;
- if (d == 0 || d < libcfs_console_min_delay)
- return -EINVAL;
- libcfs_console_max_delay = d;
-
- return rc;
-}
-
-int LL_PROC_PROTO(proc_console_min_delay_cs)
-{
- int rc, min_delay_cs;
- cfs_sysctl_table_t dummy = *table;
- cfs_duration_t d;
-
- dummy.data = &min_delay_cs;
- dummy.proc_handler = &proc_dointvec;
-
- if (!write) { /* read */
- min_delay_cs = cfs_duration_sec(libcfs_console_min_delay * 100);
- rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
- return rc;
- }
-
- /* write */
- min_delay_cs = 0;
- rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
- if (rc < 0)
- return rc;
- if (min_delay_cs <= 0)
- return -EINVAL;
-
- d = cfs_time_seconds(min_delay_cs) / 100;
- if (d == 0 || d > libcfs_console_max_delay)
- return -EINVAL;
- libcfs_console_min_delay = d;
-
- return rc;
-}
-
-int LL_PROC_PROTO(proc_console_backoff)
-{
- int rc, backoff;
- cfs_sysctl_table_t dummy = *table;
-
- dummy.data = &backoff;
- dummy.proc_handler = &proc_dointvec;
-
- if (!write) { /* read */
- backoff= libcfs_console_backoff;
- rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
- return rc;
- }
-
- /* write */
- backoff = 0;
- rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
- if (rc < 0)
- return rc;
- if (backoff <= 0)
- return -EINVAL;
-
- libcfs_console_backoff = backoff;
-
- return rc;
-}
-
-static cfs_sysctl_table_t lnet_table[] = {
- /*
- * NB No .strategy entries have been provided since sysctl(8) prefers
- * to go via /proc for portability.
- */
- {
- .ctl_name = PSDEV_DEBUG,
- .procname = "debug",
- .data = &libcfs_debug,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dobitmasks
- },
- {
- .ctl_name = PSDEV_SUBSYSTEM_DEBUG,
- .procname = "subsystem_debug",
- .data = &libcfs_subsystem_debug,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dobitmasks
- },
- {
- .ctl_name = PSDEV_PRINTK,
- .procname = "printk",
- .data = &libcfs_printk,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dobitmasks
- },
- {
- .ctl_name = PSDEV_CONSOLE_RATELIMIT,
- .procname = "console_ratelimit",
- .data = &libcfs_console_ratelimit,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = PSDEV_CONSOLE_MAX_DELAY_CS,
- .procname = "console_max_delay_centisecs",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_console_max_delay_cs
- },
- {
- .ctl_name = PSDEV_CONSOLE_MIN_DELAY_CS,
- .procname = "console_min_delay_centisecs",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_console_min_delay_cs
- },
- {
- .ctl_name = PSDEV_CONSOLE_BACKOFF,
- .procname = "console_backoff",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_console_backoff
- },
-
- {
- .ctl_name = PSDEV_DEBUG_PATH,
- .procname = "debug_path",
- .data = debug_file_path,
- .maxlen = sizeof(debug_file_path),
- .mode = 0644,
- .proc_handler = &proc_dostring,
- },
-
- {
- .ctl_name = PSDEV_LNET_UPCALL,
- .procname = "upcall",
- .data = lnet_upcall,
- .maxlen = sizeof(lnet_upcall),
- .mode = 0644,
- .proc_handler = &proc_dostring,
- },
- {
- .ctl_name = PSDEV_LNET_MEMUSED,
- .procname = "memused",
- .data = (int *)&libcfs_kmemory.counter,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = PSDEV_LNET_CATASTROPHE,
- .procname = "catastrophe",
- .data = &libcfs_catastrophe,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = PSDEV_LNET_PANIC_ON_LBUG,
- .procname = "panic_on_lbug",
- .data = &libcfs_panic_on_lbug,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = PSDEV_LNET_DUMP_KERNEL,
- .procname = "dump_kernel",
- .mode = 0200,
- .proc_handler = &proc_dump_kernel,
- },
- {
- .ctl_name = PSDEV_LNET_DAEMON_FILE,
- .procname = "daemon_file",
- .mode = 0644,
- .proc_handler = &proc_daemon_file,
- },
- {
- .ctl_name = PSDEV_LNET_DEBUG_MB,
- .procname = "debug_mb",
- .mode = 0644,
- .proc_handler = &proc_debug_mb,
- },
- {0}
-};
-
-static cfs_sysctl_table_t top_table[2] = {
- {
- .ctl_name = PSDEV_LNET,
- .procname = "lnet",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = lnet_table
- },
- {0}
-};
-
-int insert_proc(void)
-{
-#ifdef CONFIG_SYSCTL
- if (lnet_table_header == NULL)
- lnet_table_header = cfs_register_sysctl_table(top_table, 0);
-#endif
- return 0;
-}
-
-void remove_proc(void)
-{
-#ifdef CONFIG_SYSCTL
- if (lnet_table_header != NULL)
- cfs_unregister_sysctl_table(lnet_table_header);
-
- lnet_table_header = NULL;
-#endif
-}
+++ /dev/null
-# define DEBUG_SUBSYSTEM S_LNET
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/kp30.h>
-#include <libcfs/libcfs.h>
-
-#include <linux/if.h>
-#include <linux/in.h>
-#include <linux/file.h>
-/* For sys_open & sys_close */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-#include <linux/syscalls.h>
-#else
-#include <linux/fs.h>
-#endif
-
-int
-libcfs_sock_ioctl(int cmd, unsigned long arg)
-{
- mm_segment_t oldmm = get_fs();
- struct socket *sock;
- int fd;
- int rc;
- struct file *sock_filp;
-
- rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
- if (rc != 0) {
- CERROR ("Can't create socket: %d\n", rc);
- return rc;
- }
-
- fd = sock_map_fd(sock);
- if (fd < 0) {
- rc = fd;
- sock_release(sock);
- goto out;
- }
-
- sock_filp = fget(fd);
- if (!sock_filp) {
- rc = -ENOMEM;
- goto out_fd;
- }
-
- set_fs(KERNEL_DS);
-#ifdef HAVE_UNLOCKED_IOCTL
- if (sock_filp->f_op->unlocked_ioctl)
- rc = sock_filp->f_op->unlocked_ioctl(sock_filp, cmd, arg);
- else
-#endif
- {
- lock_kernel();
- rc =sock_filp->f_op->ioctl(sock_filp->f_dentry->d_inode,
- sock_filp, cmd, arg);
- unlock_kernel();
- }
- set_fs(oldmm);
-
- fput(sock_filp);
-
- out_fd:
- sys_close(fd);
- out:
- return rc;
-}
-
-int
-libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask)
-{
- struct ifreq ifr;
- int nob;
- int rc;
- __u32 val;
-
- nob = strnlen(name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- CERROR("Interface name %s too long\n", name);
- return -EINVAL;
- }
-
- CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
-
- strcpy(ifr.ifr_name, name);
- rc = libcfs_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
-
- if (rc != 0) {
- CERROR("Can't get flags for interface %s\n", name);
- return rc;
- }
-
- if ((ifr.ifr_flags & IFF_UP) == 0) {
- CDEBUG(D_NET, "Interface %s down\n", name);
- *up = 0;
- *ip = *mask = 0;
- return 0;
- }
-
- *up = 1;
-
- strcpy(ifr.ifr_name, name);
- ifr.ifr_addr.sa_family = AF_INET;
- rc = libcfs_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
-
- if (rc != 0) {
- CERROR("Can't get IP address for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *ip = ntohl(val);
-
- strcpy(ifr.ifr_name, name);
- ifr.ifr_addr.sa_family = AF_INET;
- rc = libcfs_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
-
- if (rc != 0) {
- CERROR("Can't get netmask for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
- *mask = ntohl(val);
-
- return 0;
-}
-
-EXPORT_SYMBOL(libcfs_ipif_query);
-
-int
-libcfs_ipif_enumerate (char ***namesp)
-{
- /* Allocate and fill in 'names', returning # interfaces/error */
- char **names;
- int toobig;
- int nalloc;
- int nfound;
- struct ifreq *ifr;
- struct ifconf ifc;
- int rc;
- int nob;
- int i;
-
-
- nalloc = 16; /* first guess at max interfaces */
- toobig = 0;
- for (;;) {
- if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) {
- toobig = 1;
- nalloc = CFS_PAGE_SIZE/sizeof(*ifr);
- CWARN("Too many interfaces: only enumerating first %d\n",
- nalloc);
- }
-
- LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
- if (ifr == NULL) {
- CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
- rc = -ENOMEM;
- goto out0;
- }
-
- ifc.ifc_buf = (char *)ifr;
- ifc.ifc_len = nalloc * sizeof(*ifr);
-
- rc = libcfs_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
-
- if (rc < 0) {
- CERROR ("Error %d enumerating interfaces\n", rc);
- goto out1;
- }
-
- LASSERT (rc == 0);
-
- nfound = ifc.ifc_len/sizeof(*ifr);
- LASSERT (nfound <= nalloc);
-
- if (nfound < nalloc || toobig)
- break;
-
- LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
- nalloc *= 2;
- }
-
- if (nfound == 0)
- goto out1;
-
- LIBCFS_ALLOC(names, nfound * sizeof(*names));
- if (names == NULL) {
- rc = -ENOMEM;
- goto out1;
- }
- /* NULL out all names[i] */
- memset (names, 0, nfound * sizeof(*names));
-
- for (i = 0; i < nfound; i++) {
-
- nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- /* no space for terminating NULL */
- CERROR("interface name %.*s too long (%d max)\n",
- nob, ifr[i].ifr_name, IFNAMSIZ);
- rc = -ENAMETOOLONG;
- goto out2;
- }
-
- LIBCFS_ALLOC(names[i], IFNAMSIZ);
- if (names[i] == NULL) {
- rc = -ENOMEM;
- goto out2;
- }
-
- memcpy(names[i], ifr[i].ifr_name, nob);
- names[i][nob] = 0;
- }
-
- *namesp = names;
- rc = nfound;
-
- out2:
- if (rc < 0)
- libcfs_ipif_free_enumeration(names, nfound);
- out1:
- LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
- out0:
- return rc;
-}
-
-EXPORT_SYMBOL(libcfs_ipif_enumerate);
-
-void
-libcfs_ipif_free_enumeration (char **names, int n)
-{
- int i;
-
- LASSERT (n > 0);
-
- for (i = 0; i < n && names[i] != NULL; i++)
- LIBCFS_FREE(names[i], IFNAMSIZ);
-
- LIBCFS_FREE(names, n * sizeof(*names));
-}
-
-EXPORT_SYMBOL(libcfs_ipif_free_enumeration);
-
-int
-libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- mm_segment_t oldmm = get_fs();
- long ticks = timeout * HZ;
- unsigned long then;
- struct timeval tv;
-
- LASSERT (nob > 0);
- /* Caller may pass a zero timeout if she thinks the socket buffer is
- * empty enough to take the whole message immediately */
-
- for (;;) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0
- };
-
- if (timeout != 0) {
- /* Set send timeout to remaining time */
- tv = (struct timeval) {
- .tv_sec = ticks / HZ,
- .tv_usec = ((ticks % HZ) * 1000000) / HZ
- };
- set_fs(KERNEL_DS);
- rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
- (char *)&tv, sizeof(tv));
- set_fs(oldmm);
- if (rc != 0) {
- CERROR("Can't set socket send timeout "
- "%ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
- }
-
- set_fs (KERNEL_DS);
- then = jiffies;
- rc = sock_sendmsg (sock, &msg, iov.iov_len);
- ticks -= jiffies - then;
- set_fs (oldmm);
-
- if (rc == nob)
- return 0;
-
- if (rc < 0)
- return rc;
-
- if (rc == 0) {
- CERROR ("Unexpected zero rc\n");
- return (-ECONNABORTED);
- }
-
- if (ticks <= 0)
- return -EAGAIN;
-
- buffer = ((char *)buffer) + rc;
- nob -= rc;
- }
-
- return (0);
-}
-EXPORT_SYMBOL(libcfs_sock_write);
-
-int
-libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- mm_segment_t oldmm = get_fs();
- long ticks = timeout * HZ;
- unsigned long then;
- struct timeval tv;
-
- LASSERT (nob > 0);
- LASSERT (ticks > 0);
-
- for (;;) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
-
- /* Set receive timeout to remaining time */
- tv = (struct timeval) {
- .tv_sec = ticks / HZ,
- .tv_usec = ((ticks % HZ) * 1000000) / HZ
- };
- set_fs(KERNEL_DS);
- rc = sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
- (char *)&tv, sizeof(tv));
- set_fs(oldmm);
- if (rc != 0) {
- CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
-
- set_fs(KERNEL_DS);
- then = jiffies;
- rc = sock_recvmsg(sock, &msg, iov.iov_len, 0);
- ticks -= jiffies - then;
- set_fs(oldmm);
-
- if (rc < 0)
- return rc;
-
- if (rc == 0)
- return -ECONNRESET;
-
- buffer = ((char *)buffer) + rc;
- nob -= rc;
-
- if (nob == 0)
- return 0;
-
- if (ticks <= 0)
- return -ETIMEDOUT;
- }
-}
-
-EXPORT_SYMBOL(libcfs_sock_read);
-
-static int
-libcfs_sock_create (struct socket **sockp, int *fatal,
- __u32 local_ip, int local_port)
-{
- struct sockaddr_in locaddr;
- struct socket *sock;
- int rc;
- int option;
- mm_segment_t oldmm = get_fs();
-
- /* All errors are fatal except bind failure if the port is in use */
- *fatal = 1;
-
- rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
- *sockp = sock;
- if (rc != 0) {
- CERROR ("Can't create socket: %d\n", rc);
- return (rc);
- }
-
- set_fs (KERNEL_DS);
- option = 1;
- rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
- goto failed;
- }
-
- if (local_ip != 0 || local_port != 0) {
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons(local_port);
- locaddr.sin_addr.s_addr = (local_ip == 0) ?
- INADDR_ANY : htonl(local_ip);
-
- rc = sock->ops->bind(sock, (struct sockaddr *)&locaddr,
- sizeof(locaddr));
- if (rc == -EADDRINUSE) {
- CDEBUG(D_NET, "Port %d already in use\n", local_port);
- *fatal = 0;
- goto failed;
- }
- if (rc != 0) {
- CERROR("Error trying to bind to port %d: %d\n",
- local_port, rc);
- goto failed;
- }
- }
-
- return 0;
-
- failed:
- sock_release(sock);
- return rc;
-}
-
-int
-libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize)
-{
- mm_segment_t oldmm = get_fs();
- int option;
- int rc;
-
- if (txbufsize != 0) {
- option = txbufsize;
- set_fs (KERNEL_DS);
- rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set send buffer %d: %d\n",
- option, rc);
- return (rc);
- }
- }
-
- if (rxbufsize != 0) {
- option = rxbufsize;
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set receive buffer %d: %d\n",
- option, rc);
- return (rc);
- }
- }
-
- return 0;
-}
-
-EXPORT_SYMBOL(libcfs_sock_setbuf);
-
-int
-libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port)
-{
- struct sockaddr_in sin;
- int len = sizeof (sin);
- int rc;
-
- rc = sock->ops->getname (sock, (struct sockaddr *)&sin, &len,
- remote ? 2 : 0);
- if (rc != 0) {
- CERROR ("Error %d getting sock %s IP/port\n",
- rc, remote ? "peer" : "local");
- return rc;
- }
-
- if (ip != NULL)
- *ip = ntohl (sin.sin_addr.s_addr);
-
- if (port != NULL)
- *port = ntohs (sin.sin_port);
-
- return 0;
-}
-
-EXPORT_SYMBOL(libcfs_sock_getaddr);
-
-int
-libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize)
-{
-
- if (txbufsize != NULL) {
- *txbufsize = sock->sk->sk_sndbuf;
- }
-
- if (rxbufsize != NULL) {
- *rxbufsize = sock->sk->sk_rcvbuf;
- }
-
- return 0;
-}
-
-EXPORT_SYMBOL(libcfs_sock_getbuf);
-
-int
-libcfs_sock_listen (struct socket **sockp,
- __u32 local_ip, int local_port, int backlog)
-{
- int fatal;
- int rc;
-
- rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port);
- if (rc != 0) {
- if (!fatal)
- CERROR("Can't create socket: port %d already in use\n",
- local_port);
- return rc;
- }
-
- rc = (*sockp)->ops->listen(*sockp, backlog);
- if (rc == 0)
- return 0;
-
- CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
- sock_release(*sockp);
- return rc;
-}
-
-EXPORT_SYMBOL(libcfs_sock_listen);
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12)
-int sock_create_lite(int family, int type, int protocol, struct socket **res)
-{
- struct socket *sock;
-
- sock = sock_alloc();
- if (sock == NULL)
- return -ENOMEM;
-
- sock->type = type;
- *res = sock;
-
- return 0;
-}
-#endif
-
-int
-libcfs_sock_accept (struct socket **newsockp, struct socket *sock)
-{
- wait_queue_t wait;
- struct socket *newsock;
- int rc;
-
- init_waitqueue_entry(&wait, current);
-
- /* XXX this should add a ref to sock->ops->owner, if
- * TCP could be a module */
- rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
- if (rc) {
- CERROR("Can't allocate socket\n");
- return rc;
- }
-
- newsock->ops = sock->ops;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(sock->sk->sk_sleep, &wait);
-
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
- if (rc == -EAGAIN) {
- /* Nothing ready, so wait for activity */
- schedule();
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
- }
-
- remove_wait_queue(sock->sk->sk_sleep, &wait);
- set_current_state(TASK_RUNNING);
-
- if (rc != 0)
- goto failed;
-
- *newsockp = newsock;
- return 0;
-
- failed:
- sock_release(newsock);
- return rc;
-}
-
-EXPORT_SYMBOL(libcfs_sock_accept);
-
-void
-libcfs_sock_abort_accept (struct socket *sock)
-{
- wake_up_all(sock->sk->sk_sleep);
-}
-
-EXPORT_SYMBOL(libcfs_sock_abort_accept);
-
-int
-libcfs_sock_connect (struct socket **sockp, int *fatal,
- __u32 local_ip, int local_port,
- __u32 peer_ip, int peer_port)
-{
- struct sockaddr_in srvaddr;
- int rc;
-
- rc = libcfs_sock_create(sockp, fatal, local_ip, local_port);
- if (rc != 0)
- return rc;
-
- memset (&srvaddr, 0, sizeof (srvaddr));
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons(peer_port);
- srvaddr.sin_addr.s_addr = htonl(peer_ip);
-
- rc = (*sockp)->ops->connect(*sockp,
- (struct sockaddr *)&srvaddr, sizeof(srvaddr),
- 0);
- if (rc == 0)
- return 0;
-
- /* EADDRNOTAVAIL probably means we're already connected to the same
- * peer/port on the same local port on a differently typed
- * connection. Let our caller retry with a different local
- * port... */
- *fatal = !(rc == -EADDRNOTAVAIL);
-
- CDEBUG(*fatal ? D_NETERROR : D_NET,
- "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
- HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
-
- sock_release(*sockp);
- return rc;
-}
-
-EXPORT_SYMBOL(libcfs_sock_connect);
-
-void
-libcfs_sock_release (struct socket *sock)
-{
- sock_release(sock);
-}
-
-EXPORT_SYMBOL(libcfs_sock_release);
+++ /dev/null
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-#include "tracefile.h"
-
-#ifndef get_cpu
-#define get_cpu() smp_processor_id()
-#define put_cpu() do { } while (0)
-#endif
-
-/* three types of trace_data in linux */
-enum {
- TCD_TYPE_PROC = 0,
- TCD_TYPE_SOFTIRQ,
- TCD_TYPE_IRQ,
- TCD_TYPE_MAX
-};
-
-/* percents to share the total debug memory for each type */
-static unsigned int pages_factor[TCD_TYPE_MAX] = {
- 80, /* 80% pages for TCD_TYPE_PROC */
- 10, /* 10% pages for TCD_TYPE_SOFTIRQ */
- 10 /* 10% pages for TCD_TYPE_IRQ */
-};
-
-char *trace_console_buffers[NR_CPUS][3];
-
-struct rw_semaphore tracefile_sem;
-
-int tracefile_init_arch()
-{
- int i;
- int j;
- struct trace_cpu_data *tcd;
-
- init_rwsem(&tracefile_sem);
-
- /* initialize trace_data */
- memset(trace_data, 0, sizeof(trace_data));
- for (i = 0; i < TCD_TYPE_MAX; i++) {
- trace_data[i]=kmalloc(sizeof(union trace_data_union)*NR_CPUS,
- GFP_KERNEL);
- if (trace_data[i] == NULL)
- goto out;
-
- }
-
- /* arch related info initialized */
- tcd_for_each(tcd, i, j) {
- tcd->tcd_pages_factor = pages_factor[i];
- tcd->tcd_type = i;
- tcd->tcd_cpu = j;
- }
-
- for (i = 0; i < num_possible_cpus(); i++)
- for (j = 0; j < 3; j++) {
- trace_console_buffers[i][j] =
- kmalloc(TRACE_CONSOLE_BUFFER_SIZE,
- GFP_KERNEL);
-
- if (trace_console_buffers[i][j] == NULL)
- goto out;
- }
-
- return 0;
-
-out:
- tracefile_fini_arch();
- printk(KERN_ERR "lnet: No enough memory\n");
- return -ENOMEM;
-
-}
-
-void tracefile_fini_arch()
-{
- int i;
- int j;
-
- for (i = 0; i < num_possible_cpus(); i++)
- for (j = 0; j < 3; j++)
- if (trace_console_buffers[i][j] != NULL) {
- kfree(trace_console_buffers[i][j]);
- trace_console_buffers[i][j] = NULL;
- }
-
- for (i = 0; trace_data[i] != NULL; i++) {
- kfree(trace_data[i]);
- trace_data[i] = NULL;
- }
-}
-
-void tracefile_read_lock()
-{
- down_read(&tracefile_sem);
-}
-
-void tracefile_read_unlock()
-{
- up_read(&tracefile_sem);
-}
-
-void tracefile_write_lock()
-{
- down_write(&tracefile_sem);
-}
-
-void tracefile_write_unlock()
-{
- up_write(&tracefile_sem);
-}
-
-char *
-trace_get_console_buffer(void)
-{
- int cpu = get_cpu();
- int idx;
-
- if (in_irq()) {
- idx = 0;
- } else if (in_softirq()) {
- idx = 1;
- } else {
- idx = 2;
- }
-
- return trace_console_buffers[cpu][idx];
-}
-
-void
-trace_put_console_buffer(char *buffer)
-{
- put_cpu();
-}
-
-struct trace_cpu_data *
-trace_get_tcd(void)
-{
- int cpu;
-
- cpu = get_cpu();
- if (in_irq())
- return &(*trace_data[TCD_TYPE_IRQ])[cpu].tcd;
- else if (in_softirq())
- return &(*trace_data[TCD_TYPE_SOFTIRQ])[cpu].tcd;
- return &(*trace_data[TCD_TYPE_PROC])[cpu].tcd;
-}
-
-void
-trace_put_tcd (struct trace_cpu_data *tcd)
-{
- put_cpu();
-}
-
-int trace_lock_tcd(struct trace_cpu_data *tcd)
-{
- __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
- if (tcd->tcd_type == TCD_TYPE_IRQ)
- local_irq_disable();
- else if (tcd->tcd_type == TCD_TYPE_SOFTIRQ)
- local_bh_disable();
- return 1;
-}
-
-void trace_unlock_tcd(struct trace_cpu_data *tcd)
-{
- __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
- if (tcd->tcd_type == TCD_TYPE_IRQ)
- local_irq_enable();
- else if (tcd->tcd_type == TCD_TYPE_SOFTIRQ)
- local_bh_enable();
-}
-
-int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
-{
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
- return tcd->tcd_cpu == tage->cpu;
-}
-
-void
-set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
- const int line, unsigned long stack)
-{
- struct timeval tv;
-
- do_gettimeofday(&tv);
-
- header->ph_subsys = subsys;
- header->ph_mask = mask;
- header->ph_cpu_id = smp_processor_id();
- header->ph_sec = (__u32)tv.tv_sec;
- header->ph_usec = tv.tv_usec;
- header->ph_stack = stack;
- header->ph_pid = current->pid;
- header->ph_line_num = line;
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
- header->ph_extern_pid = current->thread.extern_pid;
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- header->ph_extern_pid = current->thread.mode.tt.extern_pid;
-#else
- header->ph_extern_pid = 0;
-#endif
- return;
-}
-
-void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf,
- int len, const char *file, const char *fn)
-{
- char *prefix = "Lustre", *ptype = NULL;
-
- if ((mask & D_EMERG) != 0) {
- prefix = "LustreError";
- ptype = KERN_EMERG;
- } else if ((mask & D_ERROR) != 0) {
- prefix = "LustreError";
- ptype = KERN_ERR;
- } else if ((mask & D_WARNING) != 0) {
- prefix = "Lustre";
- ptype = KERN_WARNING;
- } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) {
- prefix = "Lustre";
- ptype = KERN_INFO;
- }
-
- if ((mask & D_CONSOLE) != 0) {
- printk("%s%s: %.*s", ptype, prefix, len, buf);
- } else {
- printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
- hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
- }
- return;
-}
-
-int trace_max_debug_mb(void)
-{
- int total_mb = (num_physpages >> (20 - CFS_PAGE_SHIFT));
-
- return MAX(512, (total_mb * 80)/100);
-}
-
-void
-trace_call_on_all_cpus(void (*fn)(void *arg), void *arg)
-{
- cpumask_t cpus_allowed = current->cpus_allowed;
- /* use cpus_allowed to quiet 2.4 UP kernel warning only */
- cpumask_t m = cpus_allowed;
- int cpu;
-
- /* Run the given routine on every CPU in thread context */
- for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
- if (!cpu_online(cpu))
- continue;
-
- cpus_clear(m);
- cpu_set(cpu, m);
- set_cpus_allowed(current, m);
-
- fn(arg);
-
- set_cpus_allowed(current, cpus_allowed);
- }
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-/*
- * miscellaneous libcfs stuff
- */
-#define DEBUG_SUBSYSTEM S_LNET
-#include <lnet/types.h>
-
-/*
- * Convert server error code to client format. Error codes are from
- * Linux errno.h, so for Linux client---identity.
- */
-int convert_server_error(__u64 ecode)
-{
- return ecode;
-}
-EXPORT_SYMBOL(convert_server_error);
-
-/*
- * convert <fcntl.h> flag from client to server.
- */
-int convert_client_oflag(int cflag, int *result)
-{
- *result = cflag;
- return 0;
-}
-EXPORT_SYMBOL(convert_client_oflag);
-
-void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
-{}
-
-EXPORT_SYMBOL(cfs_stack_trace_fill);
-
-void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
-{
- return NULL;
-}
-EXPORT_SYMBOL(cfs_stack_trace_frame);
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2003 Cluster File Systems, Inc.
- * Author: Eric Barton <eeb@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/kernel.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/interrupt.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/kp30.h>
-
-#if LWT_SUPPORT
-
-#if !KLWT_SUPPORT
-int lwt_enabled;
-lwt_cpu_t lwt_cpus[NR_CPUS];
-#endif
-
-int lwt_pages_per_cpu;
-
-/* NB only root is allowed to retrieve LWT info; it's an open door into the
- * kernel... */
-
-int
-lwt_lookup_string (int *size, char *knl_ptr,
- char *user_ptr, int user_size)
-{
- int maxsize = 128;
-
- /* knl_ptr was retrieved from an LWT snapshot and the caller wants to
- * turn it into a string. NB we can crash with an access violation
- * trying to determine the string length, so we're trusting our
- * caller... */
-
- if (!capable(CAP_SYS_ADMIN))
- return (-EPERM);
-
- if (user_size > 0 &&
- maxsize > user_size)
- maxsize = user_size;
-
- *size = strnlen (knl_ptr, maxsize - 1) + 1;
-
- if (user_ptr != NULL) {
- if (user_size < 4)
- return (-EINVAL);
-
- if (copy_to_user (user_ptr, knl_ptr, *size))
- return (-EFAULT);
-
- /* Did I truncate the string? */
- if (knl_ptr[*size - 1] != 0)
- copy_to_user (user_ptr + *size - 4, "...", 4);
- }
-
- return (0);
-}
-
-int
-lwt_control (int enable, int clear)
-{
- lwt_page_t *p;
- int i;
- int j;
-
- if (!capable(CAP_SYS_ADMIN))
- return (-EPERM);
-
- if (!enable) {
- LWT_EVENT(0,0,0,0);
- lwt_enabled = 0;
- mb();
- /* give people some time to stop adding traces */
- schedule_timeout(10);
- }
-
- for (i = 0; i < num_online_cpus(); i++) {
- p = lwt_cpus[i].lwtc_current_page;
-
- if (p == NULL)
- return (-ENODATA);
-
- if (!clear)
- continue;
-
- for (j = 0; j < lwt_pages_per_cpu; j++) {
- memset (p->lwtp_events, 0, CFS_PAGE_SIZE);
-
- p = list_entry (p->lwtp_list.next,
- lwt_page_t, lwtp_list);
- }
- }
-
- if (enable) {
- lwt_enabled = 1;
- mb();
- LWT_EVENT(0,0,0,0);
- }
-
- return (0);
-}
-
-int
-lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
- void *user_ptr, int user_size)
-{
- const int events_per_page = CFS_PAGE_SIZE / sizeof(lwt_event_t);
- const int bytes_per_page = events_per_page * sizeof(lwt_event_t);
- lwt_page_t *p;
- int i;
- int j;
-
- if (!capable(CAP_SYS_ADMIN))
- return (-EPERM);
-
- *ncpu = num_online_cpus();
- *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page;
- *now = get_cycles();
-
- if (user_ptr == NULL)
- return (0);
-
- for (i = 0; i < num_online_cpus(); i++) {
- p = lwt_cpus[i].lwtc_current_page;
-
- if (p == NULL)
- return (-ENODATA);
-
- for (j = 0; j < lwt_pages_per_cpu; j++) {
- if (copy_to_user(user_ptr, p->lwtp_events,
- bytes_per_page))
- return (-EFAULT);
-
- user_ptr = ((char *)user_ptr) + bytes_per_page;
- p = list_entry(p->lwtp_list.next,
- lwt_page_t, lwtp_list);
-
- }
- }
-
- return (0);
-}
-
-int
-lwt_init ()
-{
- int i;
- int j;
-
- for (i = 0; i < num_online_cpus(); i++)
- if (lwt_cpus[i].lwtc_current_page != NULL)
- return (-EALREADY);
-
- LASSERT (!lwt_enabled);
-
- /* NULL pointers, zero scalars */
- memset (lwt_cpus, 0, sizeof (lwt_cpus));
- lwt_pages_per_cpu = LWT_MEMORY / (num_online_cpus() * CFS_PAGE_SIZE);
-
- for (i = 0; i < num_online_cpus(); i++)
- for (j = 0; j < lwt_pages_per_cpu; j++) {
- struct page *page = alloc_page (GFP_KERNEL);
- lwt_page_t *lwtp;
-
- if (page == NULL) {
- CERROR ("Can't allocate page\n");
- lwt_fini ();
- return (-ENOMEM);
- }
-
- LIBCFS_ALLOC(lwtp, sizeof (*lwtp));
- if (lwtp == NULL) {
- CERROR ("Can't allocate lwtp\n");
- __free_page(page);
- lwt_fini ();
- return (-ENOMEM);
- }
-
- lwtp->lwtp_page = page;
- lwtp->lwtp_events = page_address(page);
- memset (lwtp->lwtp_events, 0, CFS_PAGE_SIZE);
-
- if (j == 0) {
- INIT_LIST_HEAD (&lwtp->lwtp_list);
- lwt_cpus[i].lwtc_current_page = lwtp;
- } else {
- list_add (&lwtp->lwtp_list,
- &lwt_cpus[i].lwtc_current_page->lwtp_list);
- }
- }
-
- lwt_enabled = 1;
- mb();
-
- LWT_EVENT(0,0,0,0);
-
- return (0);
-}
-
-void
-lwt_fini ()
-{
- int i;
-
- lwt_control(0, 0);
-
- for (i = 0; i < num_online_cpus(); i++)
- while (lwt_cpus[i].lwtc_current_page != NULL) {
- lwt_page_t *lwtp = lwt_cpus[i].lwtc_current_page;
-
- if (list_empty (&lwtp->lwtp_list)) {
- lwt_cpus[i].lwtc_current_page = NULL;
- } else {
- lwt_cpus[i].lwtc_current_page =
- list_entry (lwtp->lwtp_list.next,
- lwt_page_t, lwtp_list);
-
- list_del (&lwtp->lwtp_list);
- }
-
- __free_page (lwtp->lwtp_page);
- LIBCFS_FREE (lwtp, sizeof (*lwtp));
- }
-}
-
-EXPORT_SYMBOL(lwt_enabled);
-EXPORT_SYMBOL(lwt_cpus);
-
-EXPORT_SYMBOL(lwt_init);
-EXPORT_SYMBOL(lwt_fini);
-EXPORT_SYMBOL(lwt_lookup_string);
-EXPORT_SYMBOL(lwt_control);
-EXPORT_SYMBOL(lwt_snapshot);
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <lnet/lib-lnet.h>
-#include <lnet/lnet.h>
-#include <libcfs/kp30.h>
-#include "tracefile.h"
-
-void
-kportal_memhog_free (struct libcfs_device_userstate *ldu)
-{
- cfs_page_t **level0p = &ldu->ldu_memhog_root_page;
- cfs_page_t **level1p;
- cfs_page_t **level2p;
- int count1;
- int count2;
-
- if (*level0p != NULL) {
-
- level1p = (cfs_page_t **)cfs_page_address(*level0p);
- count1 = 0;
-
- while (count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) &&
- *level1p != NULL) {
-
- level2p = (cfs_page_t **)cfs_page_address(*level1p);
- count2 = 0;
-
- while (count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) &&
- *level2p != NULL) {
-
- cfs_free_page(*level2p);
- ldu->ldu_memhog_pages--;
- level2p++;
- count2++;
- }
-
- cfs_free_page(*level1p);
- ldu->ldu_memhog_pages--;
- level1p++;
- count1++;
- }
-
- cfs_free_page(*level0p);
- ldu->ldu_memhog_pages--;
-
- *level0p = NULL;
- }
-
- LASSERT (ldu->ldu_memhog_pages == 0);
-}
-
-int
-kportal_memhog_alloc (struct libcfs_device_userstate *ldu, int npages, int flags)
-{
- cfs_page_t **level0p;
- cfs_page_t **level1p;
- cfs_page_t **level2p;
- int count1;
- int count2;
-
- LASSERT (ldu->ldu_memhog_pages == 0);
- LASSERT (ldu->ldu_memhog_root_page == NULL);
-
- if (npages < 0)
- return -EINVAL;
-
- if (npages == 0)
- return 0;
-
- level0p = &ldu->ldu_memhog_root_page;
- *level0p = cfs_alloc_page(flags);
- if (*level0p == NULL)
- return -ENOMEM;
- ldu->ldu_memhog_pages++;
-
- level1p = (cfs_page_t **)cfs_page_address(*level0p);
- count1 = 0;
- memset(level1p, 0, CFS_PAGE_SIZE);
-
- while (ldu->ldu_memhog_pages < npages &&
- count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) {
-
- if (cfs_signal_pending())
- return (-EINTR);
-
- *level1p = cfs_alloc_page(flags);
- if (*level1p == NULL)
- return -ENOMEM;
- ldu->ldu_memhog_pages++;
-
- level2p = (cfs_page_t **)cfs_page_address(*level1p);
- count2 = 0;
- memset(level2p, 0, CFS_PAGE_SIZE);
-
- while (ldu->ldu_memhog_pages < npages &&
- count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) {
-
- if (cfs_signal_pending())
- return (-EINTR);
-
- *level2p = cfs_alloc_page(flags);
- if (*level2p == NULL)
- return (-ENOMEM);
- ldu->ldu_memhog_pages++;
-
- level2p++;
- count2++;
- }
-
- level1p++;
- count1++;
- }
-
- return 0;
-}
-
-/* called when opening /dev/device */
-static int libcfs_psdev_open(unsigned long flags, void *args)
-{
- struct libcfs_device_userstate *ldu;
- ENTRY;
-
- PORTAL_MODULE_USE;
-
- LIBCFS_ALLOC(ldu, sizeof(*ldu));
- if (ldu != NULL) {
- ldu->ldu_memhog_pages = 0;
- ldu->ldu_memhog_root_page = NULL;
- }
- *(struct libcfs_device_userstate **)args = ldu;
-
- RETURN(0);
-}
-
-/* called when closing /dev/device */
-static int libcfs_psdev_release(unsigned long flags, void *args)
-{
- struct libcfs_device_userstate *ldu;
- ENTRY;
-
- ldu = (struct libcfs_device_userstate *)args;
- if (ldu != NULL) {
- kportal_memhog_free(ldu);
- LIBCFS_FREE(ldu, sizeof(*ldu));
- }
-
- PORTAL_MODULE_UNUSE;
- RETURN(0);
-}
-
-static struct rw_semaphore ioctl_list_sem;
-static struct list_head ioctl_list;
-
-int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
-{
- int rc = 0;
-
- down_write(&ioctl_list_sem);
- if (!list_empty(&hand->item))
- rc = -EBUSY;
- else
- list_add_tail(&hand->item, &ioctl_list);
- up_write(&ioctl_list_sem);
-
- return rc;
-}
-EXPORT_SYMBOL(libcfs_register_ioctl);
-
-int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
-{
- int rc = 0;
-
- down_write(&ioctl_list_sem);
- if (list_empty(&hand->item))
- rc = -ENOENT;
- else
- list_del_init(&hand->item);
- up_write(&ioctl_list_sem);
-
- return rc;
-}
-EXPORT_SYMBOL(libcfs_deregister_ioctl);
-
-static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *arg)
-{
- char buf[1024];
- int err = -EINVAL;
- struct libcfs_ioctl_data *data;
- ENTRY;
-
- /* 'cmd' and permissions get checked in our arch-specific caller */
-
- if (libcfs_ioctl_getdata(buf, buf + 800, (void *)arg)) {
- CERROR("PORTALS ioctl: data error\n");
- RETURN(-EINVAL);
- }
- data = (struct libcfs_ioctl_data *)buf;
-
- switch (cmd) {
- case IOC_LIBCFS_CLEAR_DEBUG:
- libcfs_debug_clear_buffer();
- RETURN(0);
- /*
- * case IOC_LIBCFS_PANIC:
- * Handled in arch/cfs_module.c
- */
- case IOC_LIBCFS_MARK_DEBUG:
- if (data->ioc_inlbuf1 == NULL ||
- data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
- RETURN(-EINVAL);
- libcfs_debug_mark_buffer(data->ioc_inlbuf1);
- RETURN(0);
-#if LWT_SUPPORT
- case IOC_LIBCFS_LWT_CONTROL:
- err = lwt_control ((data->ioc_flags & 1) != 0,
- (data->ioc_flags & 2) != 0);
- break;
-
- case IOC_LIBCFS_LWT_SNAPSHOT: {
- cycles_t now;
- int ncpu;
- int total_size;
-
- err = lwt_snapshot (&now, &ncpu, &total_size,
- data->ioc_pbuf1, data->ioc_plen1);
- data->ioc_u64[0] = now;
- data->ioc_u32[0] = ncpu;
- data->ioc_u32[1] = total_size;
-
- /* Hedge against broken user/kernel typedefs (e.g. cycles_t) */
- data->ioc_u32[2] = sizeof(lwt_event_t);
- data->ioc_u32[3] = offsetof(lwt_event_t, lwte_where);
-
- if (err == 0 &&
- libcfs_ioctl_popdata(arg, data, sizeof (*data)))
- err = -EFAULT;
- break;
- }
-
- case IOC_LIBCFS_LWT_LOOKUP_STRING:
- err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1,
- data->ioc_pbuf2, data->ioc_plen2);
- if (err == 0 &&
- libcfs_ioctl_popdata(arg, data, sizeof (*data)))
- err = -EFAULT;
- break;
-#endif
- case IOC_LIBCFS_MEMHOG:
- if (pfile->private_data == NULL) {
- err = -EINVAL;
- } else {
- kportal_memhog_free(pfile->private_data);
- /* XXX The ioc_flags is not GFP flags now, need to be fixed */
- err = kportal_memhog_alloc(pfile->private_data,
- data->ioc_count,
- data->ioc_flags);
- if (err != 0)
- kportal_memhog_free(pfile->private_data);
- }
- break;
-
- case IOC_LIBCFS_PING_TEST: {
- extern void (kping_client)(struct libcfs_ioctl_data *);
- void (*ping)(struct libcfs_ioctl_data *);
-
- CDEBUG(D_IOCTL, "doing %d pings to nid %s (%s)\n",
- data->ioc_count, libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(data->ioc_nid));
- ping = PORTAL_SYMBOL_GET(kping_client);
- if (!ping)
- CERROR("PORTAL_SYMBOL_GET failed\n");
- else {
- ping(data);
- PORTAL_SYMBOL_PUT(kping_client);
- }
- RETURN(0);
- }
-
- default: {
- struct libcfs_ioctl_handler *hand;
- err = -EINVAL;
- down_read(&ioctl_list_sem);
- list_for_each_entry(hand, &ioctl_list, item) {
- err = hand->handle_ioctl(cmd, data);
- if (err != -EINVAL) {
- if (err == 0)
- err = libcfs_ioctl_popdata(arg,
- data, sizeof (*data));
- break;
- }
- }
- up_read(&ioctl_list_sem);
- break;
- }
- }
-
- RETURN(err);
-}
-
-struct cfs_psdev_ops libcfs_psdev_ops = {
- libcfs_psdev_open,
- libcfs_psdev_release,
- NULL,
- NULL,
- libcfs_ioctl
-};
-
-extern int insert_proc(void);
-extern void remove_proc(void);
-MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
-MODULE_DESCRIPTION("Portals v3.1");
-MODULE_LICENSE("GPL");
-
-extern cfs_psdev_t libcfs_dev;
-extern struct rw_semaphore tracefile_sem;
-extern struct semaphore trace_thread_sem;
-
-extern void libcfs_init_nidstrings(void);
-extern int libcfs_arch_init(void);
-extern void libcfs_arch_cleanup(void);
-
-static int init_libcfs_module(void)
-{
- int rc;
-
- libcfs_arch_init();
- libcfs_init_nidstrings();
- init_rwsem(&tracefile_sem);
- init_mutex(&trace_thread_sem);
- init_rwsem(&ioctl_list_sem);
- CFS_INIT_LIST_HEAD(&ioctl_list);
-
- rc = libcfs_debug_init(5 * 1024 * 1024);
- if (rc < 0) {
- printk(KERN_ERR "LustreError: libcfs_debug_init: %d\n", rc);
- return (rc);
- }
-
-#if LWT_SUPPORT
- rc = lwt_init();
- if (rc != 0) {
- CERROR("lwt_init: error %d\n", rc);
- goto cleanup_debug;
- }
-#endif
- rc = cfs_psdev_register(&libcfs_dev);
- if (rc) {
- CERROR("misc_register: error %d\n", rc);
- goto cleanup_lwt;
- }
-
- rc = insert_proc();
- if (rc) {
- CERROR("insert_proc: error %d\n", rc);
- goto cleanup_deregister;
- }
-
- CDEBUG (D_OTHER, "portals setup OK\n");
- return (0);
-
- cleanup_deregister:
- cfs_psdev_deregister(&libcfs_dev);
- cleanup_lwt:
-#if LWT_SUPPORT
- lwt_fini();
- cleanup_debug:
-#endif
- libcfs_debug_cleanup();
- return rc;
-}
-
-static void exit_libcfs_module(void)
-{
- int rc;
-
- remove_proc();
-
- CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
- atomic_read(&libcfs_kmemory));
-
- rc = cfs_psdev_deregister(&libcfs_dev);
- if (rc)
- CERROR("misc_deregister error %d\n", rc);
-
-#if LWT_SUPPORT
- lwt_fini();
-#endif
-
- if (atomic_read(&libcfs_kmemory) != 0)
- CERROR("Portals memory leaked: %d bytes\n",
- atomic_read(&libcfs_kmemory));
-
- rc = libcfs_debug_cleanup();
- if (rc)
- printk(KERN_ERR "LustreError: libcfs_debug_cleanup: %d\n", rc);
- libcfs_arch_cleanup();
-}
-
-cfs_module(libcfs, "1.0.0", init_libcfs_module, exit_libcfs_module);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <lnet/lnet.h>
-#include <libcfs/kp30.h>
-#ifndef __KERNEL__
-#ifdef HAVE_GETHOSTBYNAME
-# include <netdb.h>
-#endif
-#endif
-
-/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids
- * consistent in all conversion functions. Some code fragments are copied
- * around for the sake of clarity...
- */
-
-/* CAVEAT EMPTOR! Racey temporary buffer allocation!
- * Choose the number of nidstrings to support the MAXIMUM expected number of
- * concurrent users. If there are more, the returned string will be volatile.
- * NB this number must allow for a process to be descheduled for a timeslice
- * between getting its string and using it.
- */
-
-#define LNET_NIDSTR_COUNT 128 /* # of nidstrings */
-#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */
-
-static char libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE];
-static int libcfs_nidstring_idx = 0;
-
-#ifdef __KERNEL__
-static spinlock_t libcfs_nidstring_lock;
-
-void libcfs_init_nidstrings (void)
-{
- spin_lock_init(&libcfs_nidstring_lock);
-}
-
-# define NIDSTR_LOCK(f) spin_lock_irqsave(&libcfs_nidstring_lock, f)
-# define NIDSTR_UNLOCK(f) spin_unlock_irqrestore(&libcfs_nidstring_lock, f)
-#else
-# define NIDSTR_LOCK(f) (f=0) /* avoid unused var warnings */
-# define NIDSTR_UNLOCK(f) (f=0)
-#endif
-
-static char *
-libcfs_next_nidstring (void)
-{
- char *str;
- unsigned long flags;
-
- NIDSTR_LOCK(flags);
-
- str = libcfs_nidstrings[libcfs_nidstring_idx++];
- if (libcfs_nidstring_idx ==
- sizeof(libcfs_nidstrings)/sizeof(libcfs_nidstrings[0]))
- libcfs_nidstring_idx = 0;
-
- NIDSTR_UNLOCK(flags);
- return str;
-}
-
-static int libcfs_lo_str2addr(const char *str, int nob, __u32 *addr);
-static void libcfs_ip_addr2str(__u32 addr, char *str);
-static int libcfs_ip_str2addr(const char *str, int nob, __u32 *addr);
-static void libcfs_decnum_addr2str(__u32 addr, char *str);
-static void libcfs_hexnum_addr2str(__u32 addr, char *str);
-static int libcfs_num_str2addr(const char *str, int nob, __u32 *addr);
-
-struct netstrfns {
- int nf_type;
- char *nf_name;
- char *nf_modname;
- void (*nf_addr2str)(__u32 addr, char *str);
- int (*nf_str2addr)(const char *str, int nob, __u32 *addr);
-};
-
-static struct netstrfns libcfs_netstrfns[] = {
- {/* .nf_type */ LOLND,
- /* .nf_name */ "lo",
- /* .nf_modname */ "klolnd",
- /* .nf_addr2str */ libcfs_decnum_addr2str,
- /* .nf_str2addr */ libcfs_lo_str2addr},
- {/* .nf_type */ SOCKLND,
- /* .nf_name */ "tcp",
- /* .nf_modname */ "ksocklnd",
- /* .nf_addr2str */ libcfs_ip_addr2str,
- /* .nf_str2addr */ libcfs_ip_str2addr},
- {/* .nf_type */ O2IBLND,
- /* .nf_name */ "o2ib",
- /* .nf_modname */ "ko2iblnd",
- /* .nf_addr2str */ libcfs_ip_addr2str,
- /* .nf_str2addr */ libcfs_ip_str2addr},
- {/* .nf_type */ CIBLND,
- /* .nf_name */ "cib",
- /* .nf_modname */ "kciblnd",
- /* .nf_addr2str */ libcfs_ip_addr2str,
- /* .nf_str2addr */ libcfs_ip_str2addr},
- {/* .nf_type */ OPENIBLND,
- /* .nf_name */ "openib",
- /* .nf_modname */ "kopeniblnd",
- /* .nf_addr2str */ libcfs_ip_addr2str,
- /* .nf_str2addr */ libcfs_ip_str2addr},
- {/* .nf_type */ IIBLND,
- /* .nf_name */ "iib",
- /* .nf_modname */ "kiiblnd",
- /* .nf_addr2str */ libcfs_ip_addr2str,
- /* .nf_str2addr */ libcfs_ip_str2addr},
- {/* .nf_type */ VIBLND,
- /* .nf_name */ "vib",
- /* .nf_modname */ "kviblnd",
- /* .nf_addr2str */ libcfs_ip_addr2str,
- /* .nf_str2addr */ libcfs_ip_str2addr},
- {/* .nf_type */ RALND,
- /* .nf_name */ "ra",
- /* .nf_modname */ "kralnd",
- /* .nf_addr2str */ libcfs_ip_addr2str,
- /* .nf_str2addr */ libcfs_ip_str2addr},
- {/* .nf_type */ QSWLND,
- /* .nf_name */ "elan",
- /* .nf_modname */ "kqswlnd",
- /* .nf_addr2str */ libcfs_decnum_addr2str,
- /* .nf_str2addr */ libcfs_num_str2addr},
- {/* .nf_type */ GMLND,
- /* .nf_name */ "gm",
- /* .nf_modname */ "kgmlnd",
- /* .nf_addr2str */ libcfs_hexnum_addr2str,
- /* .nf_str2addr */ libcfs_num_str2addr},
- {/* .nf_type */ MXLND,
- /* .nf_name */ "mx",
- /* .nf_modname */ "kmxlnd",
- /* .nf_addr2str */ libcfs_ip_addr2str,
- /* .nf_str2addr */ libcfs_ip_str2addr},
- {/* .nf_type */ PTLLND,
- /* .nf_name */ "ptl",
- /* .nf_modname */ "kptllnd",
- /* .nf_addr2str */ libcfs_decnum_addr2str,
- /* .nf_str2addr */ libcfs_num_str2addr},
- /* placeholder for net0 alias. It MUST BE THE LAST ENTRY */
- {/* .nf_type */ -1},
-};
-
-const int libcfs_nnetstrfns = sizeof(libcfs_netstrfns)/sizeof(libcfs_netstrfns[0]);
-
-int
-libcfs_lo_str2addr(const char *str, int nob, __u32 *addr)
-{
- *addr = 0;
- return 1;
-}
-
-void
-libcfs_ip_addr2str(__u32 addr, char *str)
-{
-#if 0 /* never lookup */
-#if !defined(__KERNEL__) && defined HAVE_GETHOSTBYNAME
- __u32 netip = htonl(addr);
- struct hostent *he = gethostbyaddr(&netip, sizeof(netip), AF_INET);
-
- if (he != NULL) {
- snprintf(str, LNET_NIDSTR_SIZE, "%s", he->h_name);
- return;
- }
-#endif
-#endif
- snprintf(str, LNET_NIDSTR_SIZE, "%u.%u.%u.%u",
- (addr >> 24) & 0xff, (addr >> 16) & 0xff,
- (addr >> 8) & 0xff, addr & 0xff);
-}
-
-/* CAVEAT EMPTOR XscanfX
- * I use "%n" at the end of a sscanf format to detect trailing junk. However
- * sscanf may return immediately if it sees the terminating '0' in a string, so
- * I initialise the %n variable to the expected length. If sscanf sets it;
- * fine, if it doesn't, then the scan ended at the end of the string, which is
- * fine too :) */
-
-int
-libcfs_ip_str2addr(const char *str, int nob, __u32 *addr)
-{
- int a;
- int b;
- int c;
- int d;
- int n = nob; /* XscanfX */
-
- /* numeric IP? */
- if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 &&
- n == nob &&
- (a & ~0xff) == 0 && (b & ~0xff) == 0 &&
- (c & ~0xff) == 0 && (d & ~0xff) == 0) {
- *addr = ((a<<24)|(b<<16)|(c<<8)|d);
- return 1;
- }
-
-#if !defined(__KERNEL__) && defined HAVE_GETHOSTBYNAME
- /* known hostname? */
- if (('a' <= str[0] && str[0] <= 'z') ||
- ('A' <= str[0] && str[0] <= 'Z')) {
- char *tmp;
-
- LIBCFS_ALLOC(tmp, nob + 1);
- if (tmp != NULL) {
- struct hostent *he;
-
- memcpy(tmp, str, nob);
- tmp[nob] = 0;
-
- he = gethostbyname(tmp);
-
- LIBCFS_FREE(tmp, nob);
-
- if (he != NULL) {
- __u32 ip = *(__u32 *)he->h_addr;
-
- *addr = ntohl(ip);
- return 1;
- }
- }
- }
-#endif
- return 0;
-}
-
-void
-libcfs_decnum_addr2str(__u32 addr, char *str)
-{
- snprintf(str, LNET_NIDSTR_SIZE, "%u", addr);
-}
-
-void
-libcfs_hexnum_addr2str(__u32 addr, char *str)
-{
- snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr);
-}
-
-int
-libcfs_num_str2addr(const char *str, int nob, __u32 *addr)
-{
- int n;
-
- n = nob;
- if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- n = nob;
- if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- n = nob;
- if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- return 0;
-}
-
-struct netstrfns *
-libcfs_lnd2netstrfns(int lnd)
-{
- int i;
-
- if (lnd >= 0)
- for (i = 0; i < libcfs_nnetstrfns; i++)
- if (lnd == libcfs_netstrfns[i].nf_type)
- return &libcfs_netstrfns[i];
-
- return NULL;
-}
-
-struct netstrfns *
-libcfs_name2netstrfns(const char *name)
-{
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++)
- if (libcfs_netstrfns[i].nf_type >= 0 &&
- !strcmp(libcfs_netstrfns[i].nf_name, name))
- return &libcfs_netstrfns[i];
-
- return NULL;
-}
-
-int
-libcfs_isknown_lnd(int type)
-{
- return libcfs_lnd2netstrfns(type) != NULL;
-}
-
-char *
-libcfs_lnd2modname(int lnd)
-{
- struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
-
- return (nf == NULL) ? NULL : nf->nf_modname;
-}
-
-char *
-libcfs_lnd2str(int lnd)
-{
- char *str;
- struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
-
- if (nf != NULL)
- return nf->nf_name;
-
- str = libcfs_next_nidstring();
- snprintf(str, LNET_NIDSTR_SIZE, "?%u?", lnd);
- return str;
-}
-
-int
-libcfs_str2lnd(const char *str)
-{
- struct netstrfns *nf = libcfs_name2netstrfns(str);
-
- if (nf != NULL)
- return nf->nf_type;
-
- return -1;
-}
-
-char *
-libcfs_net2str(__u32 net)
-{
- int lnd = LNET_NETTYP(net);
- int num = LNET_NETNUM(net);
- struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
- char *str = libcfs_next_nidstring();
-
- if (nf == NULL)
- snprintf(str, LNET_NIDSTR_SIZE, "<%u:%u>", lnd, num);
- else if (num == 0)
- snprintf(str, LNET_NIDSTR_SIZE, "%s", nf->nf_name);
- else
- snprintf(str, LNET_NIDSTR_SIZE, "%s%u", nf->nf_name, num);
-
- return str;
-}
-
-char *
-libcfs_nid2str(lnet_nid_t nid)
-{
- __u32 addr = LNET_NIDADDR(nid);
- __u32 net = LNET_NIDNET(nid);
- int lnd = LNET_NETTYP(net);
- int nnum = LNET_NETNUM(net);
- struct netstrfns *nf;
- char *str;
- int nob;
-
- if (nid == LNET_NID_ANY)
- return "LNET_NID_ANY";
-
- nf = libcfs_lnd2netstrfns(lnd);
- str = libcfs_next_nidstring();
-
- if (nf == NULL)
- snprintf(str, LNET_NIDSTR_SIZE, "%x@<%u:%u>", addr, lnd, nnum);
- else {
- nf->nf_addr2str(addr, str);
- nob = strlen(str);
- if (nnum == 0)
- snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s",
- nf->nf_name);
- else
- snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s%u",
- nf->nf_name, nnum);
- }
-
- return str;
-}
-
-static struct netstrfns *
-libcfs_str2net_internal(const char *str, __u32 *net)
-{
- struct netstrfns *nf;
- int nob;
- int netnum;
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++) {
- nf = &libcfs_netstrfns[i];
- if (nf->nf_type >= 0 &&
- !strncmp(str, nf->nf_name, strlen(nf->nf_name)))
- break;
- }
-
- if (i == libcfs_nnetstrfns)
- return NULL;
-
- nob = strlen(nf->nf_name);
-
- if (strlen(str) == (unsigned int)nob) {
- netnum = 0;
- } else {
- if (nf->nf_type == LOLND) /* net number not allowed */
- return NULL;
-
- str += nob;
- i = strlen(str);
- if (sscanf(str, "%u%n", &netnum, &i) < 1 ||
- i != (int)strlen(str))
- return NULL;
- }
-
- *net = LNET_MKNET(nf->nf_type, netnum);
- return nf;
-}
-
-__u32
-libcfs_str2net(const char *str)
-{
- __u32 net;
-
- if (libcfs_str2net_internal(str, &net) != NULL)
- return net;
-
- return LNET_NIDNET(LNET_NID_ANY);
-}
-
-lnet_nid_t
-libcfs_str2nid(const char *str)
-{
- const char *sep = strchr(str, '@');
- struct netstrfns *nf;
- __u32 net;
- __u32 addr;
-
- if (sep != NULL) {
- nf = libcfs_str2net_internal(sep + 1, &net);
- if (nf == NULL)
- return LNET_NID_ANY;
- } else {
- sep = str + strlen(str);
- net = LNET_MKNET(SOCKLND, 0);
- nf = libcfs_lnd2netstrfns(SOCKLND);
- LASSERT (nf != NULL);
- }
-
- if (!nf->nf_str2addr(str, sep - str, &addr))
- return LNET_NID_ANY;
-
- return LNET_MKNID(net, addr);
-}
-
-char *
-libcfs_id2str(lnet_process_id_t id)
-{
- char *str = libcfs_next_nidstring();
-
- if (id.pid == LNET_PID_ANY) {
- snprintf(str, LNET_NIDSTR_SIZE,
- "LNET_PID_ANY-%s", libcfs_nid2str(id.nid));
- return str;
- }
-
- snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s",
- ((id.pid & LNET_PID_USERFLAG) != 0) ? "U" : "",
- (id.pid & ~LNET_PID_USERFLAG), libcfs_nid2str(id.nid));
- return str;
-}
-
-int
-libcfs_str2anynid(lnet_nid_t *nidp, const char *str)
-{
- if (!strcmp(str, "*")) {
- *nidp = LNET_NID_ANY;
- return 1;
- }
-
- *nidp = libcfs_str2nid(str);
- return *nidp != LNET_NID_ANY;
-}
-
-#ifdef __KERNEL__
-void
-libcfs_setnet0alias(int lnd)
-{
- struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
- struct netstrfns *nf0 = &libcfs_netstrfns[libcfs_nnetstrfns - 1];
-
- /* Ghastly hack to allow LNET to inter-operate with portals.
- * NET type 0 becomes an alias for whatever local network we have, and
- * this assignment here means we can parse and print its NIDs */
-
- LASSERT (nf != NULL);
- LASSERT (nf0->nf_type < 0);
-
- nf0->nf_name = "zero";//nf->nf_name;
- nf0->nf_modname = nf->nf_modname;
- nf0->nf_addr2str = nf->nf_addr2str;
- nf0->nf_str2addr = nf->nf_str2addr;
- mb();
- nf0->nf_type = 0;
-}
-
-EXPORT_SYMBOL(libcfs_isknown_lnd);
-EXPORT_SYMBOL(libcfs_lnd2modname);
-EXPORT_SYMBOL(libcfs_lnd2str);
-EXPORT_SYMBOL(libcfs_str2lnd);
-EXPORT_SYMBOL(libcfs_net2str);
-EXPORT_SYMBOL(libcfs_nid2str);
-EXPORT_SYMBOL(libcfs_str2net);
-EXPORT_SYMBOL(libcfs_str2nid);
-EXPORT_SYMBOL(libcfs_id2str);
-EXPORT_SYMBOL(libcfs_str2anynid);
-EXPORT_SYMBOL(libcfs_setnet0alias);
-#else /* __KERNEL__ */
-void
-libcfs_setnet0alias(int lnd)
-{
- LCONSOLE_ERROR_MSG(0x125, "Liblustre cannot interoperate with old "
- "Portals.\nportals_compatibility must be set to "
- "'none'.\n");
-}
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Zach Brown <zab@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-#include "tracefile.h"
-
-#include <libcfs/kp30.h>
-#include <libcfs/libcfs.h>
-
-/* XXX move things up to the top, comment */
-union trace_data_union (*trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
-
-char tracefile[TRACEFILE_NAME_SIZE];
-long long tracefile_size = TRACEFILE_SIZE;
-static struct tracefiled_ctl trace_tctl;
-struct semaphore trace_thread_sem;
-static int thread_running = 0;
-
-atomic_t tage_allocated = ATOMIC_INIT(0);
-
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
- struct trace_cpu_data *tcd);
-
-static inline struct trace_page *tage_from_list(struct list_head *list)
-{
- return list_entry(list, struct trace_page, linkage);
-}
-
-static struct trace_page *tage_alloc(int gfp)
-{
- cfs_page_t *page;
- struct trace_page *tage;
-
- /*
- * Don't spam console with allocation failures: they will be reported
- * by upper layer anyway.
- */
- gfp |= CFS_ALLOC_NOWARN;
- page = cfs_alloc_page(gfp);
- if (page == NULL)
- return NULL;
-
- tage = cfs_alloc(sizeof(*tage), gfp);
- if (tage == NULL) {
- cfs_free_page(page);
- return NULL;
- }
-
- tage->page = page;
- atomic_inc(&tage_allocated);
- return tage;
-}
-
-static void tage_free(struct trace_page *tage)
-{
- __LASSERT(tage != NULL);
- __LASSERT(tage->page != NULL);
-
- cfs_free_page(tage->page);
- cfs_free(tage);
- atomic_dec(&tage_allocated);
-}
-
-static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
-{
- __LASSERT(tage != NULL);
- __LASSERT(queue != NULL);
-
- list_move_tail(&tage->linkage, queue);
-}
-
-int trace_refill_stock(struct trace_cpu_data *tcd, int gfp,
- struct list_head *stock)
-{
- int i;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
- struct trace_page *tage;
-
- tage = tage_alloc(gfp);
- if (tage == NULL)
- break;
- list_add_tail(&tage->linkage, stock);
- }
- return i;
-}
-
-/* return a page that has 'len' bytes left at the end */
-static struct trace_page *trace_get_tage_try(struct trace_cpu_data *tcd,
- unsigned long len)
-{
- struct trace_page *tage;
-
- if (tcd->tcd_cur_pages > 0) {
- __LASSERT(!list_empty(&tcd->tcd_pages));
- tage = tage_from_list(tcd->tcd_pages.prev);
- if (tage->used + len <= CFS_PAGE_SIZE)
- return tage;
- }
-
- if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
- if (tcd->tcd_cur_stock_pages > 0) {
- tage = tage_from_list(tcd->tcd_stock_pages.prev);
- -- tcd->tcd_cur_stock_pages;
- list_del_init(&tage->linkage);
- } else {
- tage = tage_alloc(CFS_ALLOC_ATOMIC);
- if (tage == NULL) {
- printk(KERN_WARNING
- "failure to allocate a tage (%ld)\n",
- tcd->tcd_cur_pages);
- return NULL;
- }
- }
-
- tage->used = 0;
- tage->cpu = smp_processor_id();
- tage->type = tcd->tcd_type;
- list_add_tail(&tage->linkage, &tcd->tcd_pages);
- tcd->tcd_cur_pages++;
-
- if (tcd->tcd_cur_pages > 8 && thread_running) {
- struct tracefiled_ctl *tctl = &trace_tctl;
- /*
- * wake up tracefiled to process some pages.
- */
- cfs_waitq_signal(&tctl->tctl_waitq);
- }
- return tage;
- }
- return NULL;
-}
-
-static void tcd_shrink(struct trace_cpu_data *tcd)
-{
- int pgcount = tcd->tcd_cur_pages / 10;
- struct page_collection pc;
- struct trace_page *tage;
- struct trace_page *tmp;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
- " 10%% of pages (%d of %ld)\n", pgcount + 1, tcd->tcd_cur_pages);
-
- CFS_INIT_LIST_HEAD(&pc.pc_pages);
- spin_lock_init(&pc.pc_lock);
-
- list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
- if (pgcount-- == 0)
- break;
-
- list_move_tail(&tage->linkage, &pc.pc_pages);
- tcd->tcd_cur_pages--;
- }
- put_pages_on_tcd_daemon_list(&pc, tcd);
-}
-
-/* return a page that has 'len' bytes left at the end */
-static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
- unsigned long len)
-{
- struct trace_page *tage;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- if (len > CFS_PAGE_SIZE) {
- printk(KERN_ERR
- "cowardly refusing to write %lu bytes in a page\n", len);
- return NULL;
- }
-
- tage = trace_get_tage_try(tcd, len);
- if (tage != NULL)
- return tage;
- if (thread_running)
- tcd_shrink(tcd);
- if (tcd->tcd_cur_pages > 0) {
- tage = tage_from_list(tcd->tcd_pages.next);
- tage->used = 0;
- tage_to_tail(tage, &tcd->tcd_pages);
- }
- return tage;
-}
-
-int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, int subsys, int mask,
- const char *file, const char *fn, const int line,
- const char *format1, va_list args,
- const char *format2, ...)
-{
- struct trace_cpu_data *tcd = NULL;
- struct ptldebug_header header;
- struct trace_page *tage;
- /* string_buf is used only if tcd != NULL, and is always set then */
- char *string_buf = NULL;
- char *debug_buf;
- int known_size;
- int needed = 85; /* average message length */
- int max_nob;
- va_list ap;
- int depth;
- int i;
- int remain;
-
- if (strchr(file, '/'))
- file = strrchr(file, '/') + 1;
-
-
- set_ptldebug_header(&header, subsys, mask, line, CDEBUG_STACK());
-
- tcd = trace_get_tcd();
- if (tcd == NULL) /* arch may not log in IRQ context */
- goto console;
-
- if (tcd->tcd_shutting_down) {
- trace_put_tcd(tcd);
- tcd = NULL;
- goto console;
- }
-
- depth = __current_nesting_level();
- known_size = strlen(file) + 1 + depth;
- if (fn)
- known_size += strlen(fn) + 1;
-
- if (libcfs_debug_binary)
- known_size += sizeof(header);
-
- /*/
- * '2' used because vsnprintf return real size required for output
- * _without_ terminating NULL.
- * if needed is to small for this format.
- */
- for (i=0;i<2;i++) {
- tage = trace_get_tage(tcd, needed + known_size + 1);
- if (tage == NULL) {
- if (needed + known_size > CFS_PAGE_SIZE)
- mask |= D_ERROR;
-
- trace_put_tcd(tcd);
- tcd = NULL;
- goto console;
- }
-
- string_buf = (char *)cfs_page_address(tage->page)+tage->used+known_size;
-
- max_nob = CFS_PAGE_SIZE - tage->used - known_size;
- if (max_nob <= 0) {
- printk(KERN_EMERG "negative max_nob: %i\n", max_nob);
- mask |= D_ERROR;
- trace_put_tcd(tcd);
- tcd = NULL;
- goto console;
- }
-
- needed = 0;
- if (format1) {
- va_copy(ap, args);
- needed = vsnprintf(string_buf, max_nob, format1, ap);
- va_end(ap);
- }
-
-
- if (format2) {
- remain = max_nob - needed;
- if (remain < 0)
- remain = 0;
-
- va_start(ap, format2);
- needed += vsnprintf(string_buf+needed, remain, format2, ap);
- va_end(ap);
- }
-
- if (needed < max_nob) /* well. printing ok.. */
- break;
- }
-
- if (*(string_buf+needed-1) != '\n')
- printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
- file, line, fn);
-
- header.ph_len = known_size + needed;
- debug_buf = (char *)cfs_page_address(tage->page) + tage->used;
-
- if (libcfs_debug_binary) {
- memcpy(debug_buf, &header, sizeof(header));
- tage->used += sizeof(header);
- debug_buf += sizeof(header);
- }
-
- /* indent message according to the nesting level */
- while (depth-- > 0) {
- *(debug_buf++) = '.';
- ++ tage->used;
- }
-
- strcpy(debug_buf, file);
- tage->used += strlen(file) + 1;
- debug_buf += strlen(file) + 1;
-
- if (fn) {
- strcpy(debug_buf, fn);
- tage->used += strlen(fn) + 1;
- debug_buf += strlen(fn) + 1;
- }
-
- __LASSERT(debug_buf == string_buf);
-
- tage->used += needed;
- __LASSERT (tage->used <= CFS_PAGE_SIZE);
-
-console:
- if ((mask & libcfs_printk) == 0) {
- /* no console output requested */
- if (tcd != NULL)
- trace_put_tcd(tcd);
- return 1;
- }
-
- if (cdls != NULL) {
- if (libcfs_console_ratelimit &&
- cdls->cdls_next != 0 && /* not first time ever */
- !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
- /* skipping a console message */
- cdls->cdls_count++;
- if (tcd != NULL)
- trace_put_tcd(tcd);
- return 1;
- }
-
- if (cfs_time_after(cfs_time_current(), cdls->cdls_next +
- libcfs_console_max_delay
- + cfs_time_seconds(10))) {
- /* last timeout was a long time ago */
- cdls->cdls_delay /= libcfs_console_backoff * 4;
- } else {
- cdls->cdls_delay *= libcfs_console_backoff;
-
- if (cdls->cdls_delay < libcfs_console_min_delay)
- cdls->cdls_delay = libcfs_console_min_delay;
- else if (cdls->cdls_delay > libcfs_console_max_delay)
- cdls->cdls_delay = libcfs_console_max_delay;
- }
-
- /* ensure cdls_next is never zero after it's been seen */
- cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
- }
-
- if (tcd != NULL) {
- print_to_console(&header, mask, string_buf, needed, file, fn);
- trace_put_tcd(tcd);
- } else {
- string_buf = trace_get_console_buffer();
-
- needed = 0;
- if (format1 != NULL) {
- va_copy(ap, args);
- needed = vsnprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, format1, ap);
- va_end(ap);
- }
- if (format2 != NULL) {
- remain = TRACE_CONSOLE_BUFFER_SIZE - needed;
- if (remain > 0) {
- va_start(ap, format2);
- needed += vsnprintf(string_buf+needed, remain, format2, ap);
- va_end(ap);
- }
- }
- print_to_console(&header, mask,
- string_buf, needed, file, fn);
-
- trace_put_console_buffer(string_buf);
- }
-
- if (cdls != NULL && cdls->cdls_count != 0) {
- string_buf = trace_get_console_buffer();
-
- needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE,
- "Skipped %d previous similar message%s\n",
- cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : "");
-
- print_to_console(&header, mask,
- string_buf, needed, file, fn);
-
- trace_put_console_buffer(string_buf);
- cdls->cdls_count = 0;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(libcfs_debug_vmsg2);
-
-void
-libcfs_assertion_failed(const char *expr, const char *file,
- const char *func, const int line)
-{
- libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line,
- "ASSERTION(%s) failed\n", expr);
- LBUG();
-}
-EXPORT_SYMBOL(libcfs_assertion_failed);
-
-void
-trace_assertion_failed(const char *str,
- const char *fn, const char *file, int line)
-{
- struct ptldebug_header hdr;
-
- libcfs_panic_in_progress = 1;
- libcfs_catastrophe = 1;
- mb();
-
- set_ptldebug_header(&hdr, DEBUG_SUBSYSTEM, D_EMERG, line,
- CDEBUG_STACK());
-
- print_to_console(&hdr, D_EMERG, str, strlen(str), file, fn);
-
- LIBCFS_PANIC("Lustre debug assertion failure\n");
-
- /* not reached */
-}
-
-static void
-panic_collect_pages(struct page_collection *pc)
-{
- /* Do the collect_pages job on a single CPU: assumes that all other
- * CPUs have been stopped during a panic. If this isn't true for some
- * arch, this will have to be implemented separately in each arch. */
- int i;
- int j;
- struct trace_cpu_data *tcd;
-
- CFS_INIT_LIST_HEAD(&pc->pc_pages);
-
- tcd_for_each(tcd, i, j) {
- list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
- tcd->tcd_cur_pages = 0;
-
- if (pc->pc_want_daemon_pages) {
- list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
- tcd->tcd_cur_daemon_pages = 0;
- }
- }
-}
-
-static void collect_pages_on_cpu(void *info)
-{
- struct trace_cpu_data *tcd;
- struct page_collection *pc = info;
- int i;
-
- spin_lock(&pc->pc_lock);
- tcd_for_each_type_lock(tcd, i) {
- list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
- tcd->tcd_cur_pages = 0;
- if (pc->pc_want_daemon_pages) {
- list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
- tcd->tcd_cur_daemon_pages = 0;
- }
- }
- spin_unlock(&pc->pc_lock);
-}
-
-static void collect_pages(struct page_collection *pc)
-{
- CFS_INIT_LIST_HEAD(&pc->pc_pages);
-
- if (libcfs_panic_in_progress)
- panic_collect_pages(pc);
- else
- trace_call_on_all_cpus(collect_pages_on_cpu, pc);
-}
-
-static void put_pages_back_on_cpu(void *info)
-{
- struct page_collection *pc = info;
- struct trace_cpu_data *tcd;
- struct list_head *cur_head;
- struct trace_page *tage;
- struct trace_page *tmp;
- int i;
-
- spin_lock(&pc->pc_lock);
- tcd_for_each_type_lock(tcd, i) {
- cur_head = tcd->tcd_pages.next;
-
- list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
-
- __LASSERT_TAGE_INVARIANT(tage);
-
- if (tage->cpu != smp_processor_id() || tage->type != i)
- continue;
-
- tage_to_tail(tage, cur_head);
- tcd->tcd_cur_pages++;
- }
- }
- spin_unlock(&pc->pc_lock);
-}
-
-static void put_pages_back(struct page_collection *pc)
-{
- if (!libcfs_panic_in_progress)
- trace_call_on_all_cpus(put_pages_back_on_cpu, pc);
-}
-
-/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
- * we have a good amount of data at all times for dumping during an LBUG, even
- * if we have been steadily writing (and otherwise discarding) pages via the
- * debug daemon. */
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
- struct trace_cpu_data *tcd)
-{
- struct trace_page *tage;
- struct trace_page *tmp;
-
- spin_lock(&pc->pc_lock);
- list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
-
- __LASSERT_TAGE_INVARIANT(tage);
-
- if (tage->cpu != smp_processor_id() ||
- tage->type != tcd->tcd_type)
- continue;
-
- tage_to_tail(tage, &tcd->tcd_daemon_pages);
- tcd->tcd_cur_daemon_pages++;
-
- if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
- struct trace_page *victim;
-
- __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
- victim = tage_from_list(tcd->tcd_daemon_pages.next);
-
- __LASSERT_TAGE_INVARIANT(victim);
-
- list_del(&victim->linkage);
- tage_free(victim);
- tcd->tcd_cur_daemon_pages--;
- }
- }
- spin_unlock(&pc->pc_lock);
-}
-
-static void put_pages_on_daemon_list_on_cpu(void *info)
-{
- struct trace_cpu_data *tcd;
- int i;
-
- tcd_for_each_type_lock(tcd, i)
- put_pages_on_tcd_daemon_list(info, tcd);
-}
-
-static void put_pages_on_daemon_list(struct page_collection *pc)
-{
- trace_call_on_all_cpus(put_pages_on_daemon_list_on_cpu, pc);
-}
-
-void trace_debug_print(void)
-{
- struct page_collection pc;
- struct trace_page *tage;
- struct trace_page *tmp;
-
- spin_lock_init(&pc.pc_lock);
-
- pc.pc_want_daemon_pages = 1;
- collect_pages(&pc);
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- char *p, *file, *fn;
- cfs_page_t *page;
-
- __LASSERT_TAGE_INVARIANT(tage);
-
- page = tage->page;
- p = cfs_page_address(page);
- while (p < ((char *)cfs_page_address(page) + tage->used)) {
- struct ptldebug_header *hdr;
- int len;
- hdr = (void *)p;
- p += sizeof(*hdr);
- file = p;
- p += strlen(file) + 1;
- fn = p;
- p += strlen(fn) + 1;
- len = hdr->ph_len - (p - (char *)hdr);
-
- print_to_console(hdr, D_EMERG, p, len, file, fn);
-
- p += len;
- }
-
- list_del(&tage->linkage);
- tage_free(tage);
- }
-}
-
-int tracefile_dump_all_pages(char *filename)
-{
- struct page_collection pc;
- cfs_file_t *filp;
- struct trace_page *tage;
- struct trace_page *tmp;
- int rc;
-
- CFS_DECL_MMSPACE;
-
- tracefile_write_lock();
-
- filp = cfs_filp_open(filename,
- O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
- if (!filp) {
- if (rc != -EEXIST)
- printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
- filename, rc);
- goto out;
- }
-
- spin_lock_init(&pc.pc_lock);
- pc.pc_want_daemon_pages = 1;
- collect_pages(&pc);
- if (list_empty(&pc.pc_pages)) {
- rc = 0;
- goto close;
- }
-
- /* ok, for now, just write the pages. in the future we'll be building
- * iobufs with the pages and calling generic_direct_IO */
- CFS_MMSPACE_OPEN;
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-
- __LASSERT_TAGE_INVARIANT(tage);
-
- rc = cfs_filp_write(filp, cfs_page_address(tage->page),
- tage->used, cfs_filp_poff(filp));
- if (rc != (int)tage->used) {
- printk(KERN_WARNING "wanted to write %u but wrote "
- "%d\n", tage->used, rc);
- put_pages_back(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- break;
- }
- list_del(&tage->linkage);
- tage_free(tage);
- }
- CFS_MMSPACE_CLOSE;
- rc = cfs_filp_fsync(filp);
- if (rc)
- printk(KERN_ERR "sync returns %d\n", rc);
- close:
- cfs_filp_close(filp);
- out:
- tracefile_write_unlock();
- return rc;
-}
-
-void trace_flush_pages(void)
-{
- struct page_collection pc;
- struct trace_page *tage;
- struct trace_page *tmp;
-
- spin_lock_init(&pc.pc_lock);
-
- pc.pc_want_daemon_pages = 1;
- collect_pages(&pc);
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-
- __LASSERT_TAGE_INVARIANT(tage);
-
- list_del(&tage->linkage);
- tage_free(tage);
- }
-}
-
-int trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
- const char *usr_buffer, int usr_buffer_nob)
-{
- int nob;
-
- if (usr_buffer_nob > knl_buffer_nob)
- return -EOVERFLOW;
-
- if (copy_from_user((void *)knl_buffer,
- (void *)usr_buffer, usr_buffer_nob))
- return -EFAULT;
-
- nob = strnlen(knl_buffer, usr_buffer_nob);
- while (nob-- >= 0) /* strip trailing whitespace */
- if (!isspace(knl_buffer[nob]))
- break;
-
- if (nob < 0) /* empty string */
- return -EINVAL;
-
- if (nob == knl_buffer_nob) /* no space to terminate */
- return -EOVERFLOW;
-
- knl_buffer[nob + 1] = 0; /* terminate */
- return 0;
-}
-
-int trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
- const char *knl_buffer, char *append)
-{
- /* NB if 'append' != NULL, it's a single character to append to the
- * copied out string - usually "\n", for /proc entries and "" (i.e. a
- * terminating zero byte) for sysctl entries */
- int nob = strlen(knl_buffer);
-
- if (nob > usr_buffer_nob)
- nob = usr_buffer_nob;
-
- if (copy_to_user(usr_buffer, knl_buffer, nob))
- return -EFAULT;
-
- if (append != NULL && nob < usr_buffer_nob) {
- if (copy_to_user(usr_buffer + nob, append, 1))
- return -EFAULT;
-
- nob++;
- }
-
- return nob;
-}
-
-int trace_allocate_string_buffer(char **str, int nob)
-{
- if (nob > 2 * CFS_PAGE_SIZE) /* string must be "sensible" */
- return -EINVAL;
-
- *str = cfs_alloc(nob, CFS_ALLOC_STD | CFS_ALLOC_ZERO);
- if (*str == NULL)
- return -ENOMEM;
-
- return 0;
-}
-
-void trace_free_string_buffer(char *str, int nob)
-{
- cfs_free(str);
-}
-
-int trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob)
-{
- char *str;
- int rc;
-
- rc = trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc != 0)
- return rc;
-
- rc = trace_copyin_string(str, usr_str_nob + 1,
- usr_str, usr_str_nob);
- if (rc != 0)
- goto out;
-
-#if !defined(__WINNT__)
- if (str[0] != '/') {
- rc = -EINVAL;
- goto out;
- }
-#endif
- rc = tracefile_dump_all_pages(str);
-out:
- trace_free_string_buffer(str, usr_str_nob + 1);
- return rc;
-}
-
-int trace_daemon_command(char *str)
-{
- int rc = 0;
-
- tracefile_write_lock();
-
- if (strcmp(str, "stop") == 0) {
- trace_stop_thread();
- memset(tracefile, 0, sizeof(tracefile));
-
- } else if (strncmp(str, "size=", 5) == 0) {
- tracefile_size = simple_strtoul(str + 5, NULL, 0);
- if (tracefile_size < 10 || tracefile_size > 20480)
- tracefile_size = TRACEFILE_SIZE;
- else
- tracefile_size <<= 20;
-
- } else if (strlen(str) >= sizeof(tracefile)) {
- rc = -ENAMETOOLONG;
-#ifndef __WINNT__
- } else if (str[0] != '/') {
- rc = -EINVAL;
-#endif
- } else {
- strcpy(tracefile, str);
-
- printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
- "to %s (%lukB max)\n", tracefile,
- (long)(tracefile_size >> 10));
-
- trace_start_thread();
- }
-
- tracefile_write_unlock();
- return rc;
-}
-
-int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob)
-{
- char *str;
- int rc;
-
- rc = trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc != 0)
- return rc;
-
- rc = trace_copyin_string(str, usr_str_nob + 1,
- usr_str, usr_str_nob);
- if (rc == 0)
- rc = trace_daemon_command(str);
-
- trace_free_string_buffer(str, usr_str_nob + 1);
- return rc;
-}
-
-int trace_set_debug_mb(int mb)
-{
- int i;
- int j;
- int pages;
- int limit = trace_max_debug_mb();
- struct trace_cpu_data *tcd;
-
- if (mb < num_possible_cpus())
- return -EINVAL;
-
- if (mb > limit) {
- printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
- "%dMB - limit is %d\n", mb, limit);
- return -EINVAL;
- }
-
- mb /= num_possible_cpus();
- pages = mb << (20 - CFS_PAGE_SHIFT);
-
- tracefile_write_lock();
-
- tcd_for_each(tcd, i, j)
- tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
-
- tracefile_write_unlock();
-
- return 0;
-}
-
-int trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob)
-{
- char str[32];
- int rc;
-
- rc = trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob);
- if (rc < 0)
- return rc;
-
- return trace_set_debug_mb(simple_strtoul(str, NULL, 0));
-}
-
-int trace_get_debug_mb(void)
-{
- int i;
- int j;
- struct trace_cpu_data *tcd;
- int total_pages = 0;
-
- tracefile_read_lock();
-
- tcd_for_each(tcd, i, j)
- total_pages += tcd->tcd_max_pages;
-
- tracefile_read_unlock();
-
- return (total_pages >> (20 - CFS_PAGE_SHIFT)) + 1;
-}
-
-static int tracefiled(void *arg)
-{
- struct page_collection pc;
- struct tracefiled_ctl *tctl = arg;
- struct trace_page *tage;
- struct trace_page *tmp;
- struct ptldebug_header *hdr;
- cfs_file_t *filp;
- int rc;
-
- CFS_DECL_MMSPACE;
-
- /* we're started late enough that we pick up init's fs context */
- /* this is so broken in uml? what on earth is going on? */
- cfs_daemonize("ktracefiled");
-
- spin_lock_init(&pc.pc_lock);
- complete(&tctl->tctl_start);
-
- while (1) {
- cfs_waitlink_t __wait;
-
- cfs_waitlink_init(&__wait);
- cfs_waitq_add(&tctl->tctl_waitq, &__wait);
- set_current_state(TASK_INTERRUPTIBLE);
- cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE,
- cfs_time_seconds(1));
- cfs_waitq_del(&tctl->tctl_waitq, &__wait);
-
- if (atomic_read(&tctl->tctl_shutdown))
- break;
-
- pc.pc_want_daemon_pages = 0;
- collect_pages(&pc);
- if (list_empty(&pc.pc_pages))
- continue;
-
- filp = NULL;
- tracefile_read_lock();
- if (tracefile[0] != 0) {
- filp = cfs_filp_open(tracefile,
- O_CREAT | O_RDWR | O_LARGEFILE,
- 0600, &rc);
- if (!(filp))
- printk(KERN_WARNING "couldn't open %s: %d\n",
- tracefile, rc);
- }
- tracefile_read_unlock();
- if (filp == NULL) {
- put_pages_on_daemon_list(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- continue;
- }
-
- CFS_MMSPACE_OPEN;
-
- /* mark the first header, so we can sort in chunks */
- tage = tage_from_list(pc.pc_pages.next);
- __LASSERT_TAGE_INVARIANT(tage);
-
- hdr = cfs_page_address(tage->page);
- hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
-
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- static loff_t f_pos;
-
- __LASSERT_TAGE_INVARIANT(tage);
-
- if (f_pos >= (off_t)tracefile_size)
- f_pos = 0;
- else if (f_pos > cfs_filp_size(filp))
- f_pos = cfs_filp_size(filp);
-
- rc = cfs_filp_write(filp, cfs_page_address(tage->page),
- tage->used, &f_pos);
- if (rc != (int)tage->used) {
- printk(KERN_WARNING "wanted to write %u but "
- "wrote %d\n", tage->used, rc);
- put_pages_back(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- }
- }
- CFS_MMSPACE_CLOSE;
-
- cfs_filp_close(filp);
- put_pages_on_daemon_list(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- }
- complete(&tctl->tctl_stop);
- return 0;
-}
-
-int trace_start_thread(void)
-{
- struct tracefiled_ctl *tctl = &trace_tctl;
- int rc = 0;
-
- mutex_down(&trace_thread_sem);
- if (thread_running)
- goto out;
-
- init_completion(&tctl->tctl_start);
- init_completion(&tctl->tctl_stop);
- cfs_waitq_init(&tctl->tctl_waitq);
- atomic_set(&tctl->tctl_shutdown, 0);
-
- if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
- rc = -ECHILD;
- goto out;
- }
-
- wait_for_completion(&tctl->tctl_start);
- thread_running = 1;
-out:
- mutex_up(&trace_thread_sem);
- return rc;
-}
-
-void trace_stop_thread(void)
-{
- struct tracefiled_ctl *tctl = &trace_tctl;
-
- mutex_down(&trace_thread_sem);
- if (thread_running) {
- printk(KERN_INFO "Lustre: shutting down debug daemon thread...\n");
- atomic_set(&tctl->tctl_shutdown, 1);
- wait_for_completion(&tctl->tctl_stop);
- thread_running = 0;
- }
- mutex_up(&trace_thread_sem);
-}
-
-int tracefile_init(int max_pages)
-{
- struct trace_cpu_data *tcd;
- int i;
- int j;
- int rc;
- int factor;
-
- rc = tracefile_init_arch();
- if (rc != 0)
- return rc;
-
- tcd_for_each(tcd, i, j) {
- /* tcd_pages_factor is initialized int tracefile_init_arch. */
- factor = tcd->tcd_pages_factor;
- CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
- CFS_INIT_LIST_HEAD(&tcd->tcd_stock_pages);
- CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
- tcd->tcd_cur_pages = 0;
- tcd->tcd_cur_stock_pages = 0;
- tcd->tcd_cur_daemon_pages = 0;
- tcd->tcd_max_pages = (max_pages * factor) / 100;
- LASSERT(tcd->tcd_max_pages > 0);
- tcd->tcd_shutting_down = 0;
- }
-
- return 0;
-}
-
-static void trace_cleanup_on_cpu(void *info)
-{
- struct trace_cpu_data *tcd;
- struct trace_page *tage;
- struct trace_page *tmp;
- int i;
-
- tcd_for_each_type_lock(tcd, i) {
- tcd->tcd_shutting_down = 1;
-
- list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- list_del(&tage->linkage);
- tage_free(tage);
- }
- tcd->tcd_cur_pages = 0;
- }
-}
-
-static void trace_cleanup(void)
-{
- struct page_collection pc;
-
- CFS_INIT_LIST_HEAD(&pc.pc_pages);
- spin_lock_init(&pc.pc_lock);
-
- trace_call_on_all_cpus(trace_cleanup_on_cpu, &pc);
-
- tracefile_fini_arch();
-}
-
-void tracefile_exit(void)
-{
- trace_stop_thread();
- trace_cleanup();
-}
+++ /dev/null
-#ifndef __LIBCFS_TRACEFILE_H__
-#define __LIBCFS_TRACEFILE_H__
-
-#include <libcfs/libcfs.h>
-
-/* trace file lock routines */
-
-#define TRACEFILE_NAME_SIZE 1024
-extern char tracefile[TRACEFILE_NAME_SIZE];
-extern long long tracefile_size;
-
-int tracefile_init_arch(void);
-void tracefile_fini_arch(void);
-
-void tracefile_read_lock(void);
-void tracefile_read_unlock(void);
-void tracefile_write_lock(void);
-void tracefile_write_unlock(void);
-
-int tracefile_dump_all_pages(char *filename);
-void trace_debug_print(void);
-void trace_flush_pages(void);
-int trace_start_thread(void);
-void trace_stop_thread(void);
-int tracefile_init(int max_pages);
-void tracefile_exit(void);
-
-
-
-int trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
- const char *usr_buffer, int usr_buffer_nob);
-int trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
- const char *knl_str, char *append);
-int trace_allocate_string_buffer(char **str, int nob);
-void trace_free_string_buffer(char *str, int nob);
-int trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob);
-int trace_daemon_command(char *str);
-int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob);
-int trace_set_debug_mb(int mb);
-int trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob);
-int trace_get_debug_mb(void);
-
-extern void libcfs_debug_dumplog_internal(void *arg);
-extern void libcfs_register_panic_notifier(void);
-extern void libcfs_unregister_panic_notifier(void);
-extern int libcfs_panic_in_progress;
-extern int trace_max_debug_mb(void);
-
-#define TCD_MAX_PAGES (5 << (20 - CFS_PAGE_SHIFT))
-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
-#define TRACEFILE_SIZE (500 << 20)
-
-#ifdef LUSTRE_TRACEFILE_PRIVATE
-
-/*
- * Private declare for tracefile
- */
-#define TCD_MAX_PAGES (5 << (20 - CFS_PAGE_SHIFT))
-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
-
-#define TRACEFILE_SIZE (500 << 20)
-
-/* Size of a buffer for sprinting console messages if we can't get a page
- * from system */
-#define TRACE_CONSOLE_BUFFER_SIZE 1024
-
-union trace_data_union {
- struct trace_cpu_data {
- /*
- * pages with trace records not yet processed by tracefiled.
- */
- struct list_head tcd_pages;
- /* number of pages on ->tcd_pages */
- unsigned long tcd_cur_pages;
-
- /*
- * pages with trace records already processed by
- * tracefiled. These pages are kept in memory, so that some
- * portion of log can be written in the event of LBUG. This
- * list is maintained in LRU order.
- *
- * Pages are moved to ->tcd_daemon_pages by tracefiled()
- * (put_pages_on_daemon_list()). LRU pages from this list are
- * discarded when list grows too large.
- */
- struct list_head tcd_daemon_pages;
- /* number of pages on ->tcd_daemon_pages */
- unsigned long tcd_cur_daemon_pages;
-
- /*
- * Maximal number of pages allowed on ->tcd_pages and
- * ->tcd_daemon_pages each.
- * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
- * implementation.
- */
- unsigned long tcd_max_pages;
-
- /*
- * preallocated pages to write trace records into. Pages from
- * ->tcd_stock_pages are moved to ->tcd_pages by
- * portals_debug_msg().
- *
- * This list is necessary, because on some platforms it's
- * impossible to perform efficient atomic page allocation in a
- * non-blockable context.
- *
- * Such platforms fill ->tcd_stock_pages "on occasion", when
- * tracing code is entered in blockable context.
- *
- * trace_get_tage_try() tries to get a page from
- * ->tcd_stock_pages first and resorts to atomic page
- * allocation only if this queue is empty. ->tcd_stock_pages
- * is replenished when tracing code is entered in blocking
- * context (darwin-tracefile.c:trace_get_tcd()). We try to
- * maintain TCD_STOCK_PAGES (40 by default) pages in this
- * queue. Atomic allocation is only required if more than
- * TCD_STOCK_PAGES pagesful are consumed by trace records all
- * emitted in non-blocking contexts. Which is quite unlikely.
- */
- struct list_head tcd_stock_pages;
- /* number of pages on ->tcd_stock_pages */
- unsigned long tcd_cur_stock_pages;
-
- unsigned short tcd_shutting_down;
- unsigned short tcd_cpu;
- unsigned short tcd_type;
- /* The factors to share debug memory. */
- unsigned short tcd_pages_factor;
- } tcd;
- char __pad[L1_CACHE_ALIGN(sizeof(struct trace_cpu_data))];
-};
-
-#define TCD_MAX_TYPES 8
-extern union trace_data_union (*trace_data[TCD_MAX_TYPES])[NR_CPUS];
-
-#define tcd_for_each(tcd, i, j) \
- for (i = 0; trace_data[i] != NULL; i++) \
- for (j = 0, ((tcd) = &(*trace_data[i])[j].tcd); \
- j < num_possible_cpus(); j++, (tcd) = &(*trace_data[i])[j].tcd)
-
-#define tcd_for_each_type_lock(tcd, i) \
- for (i = 0; trace_data[i] && \
- (tcd = &(*trace_data[i])[smp_processor_id()].tcd) && \
- trace_lock_tcd(tcd); trace_unlock_tcd(tcd), i++)
-
-/* XXX nikita: this declaration is internal to tracefile.c and should probably
- * be moved there */
-struct page_collection {
- struct list_head pc_pages;
- /*
- * spin-lock protecting ->pc_pages. It is taken by smp_call_function()
- * call-back functions. XXX nikita: Which is horrible: all processors
- * receive NMI at the same time only to be serialized by this
- * lock. Probably ->pc_pages should be replaced with an array of
- * NR_CPUS elements accessed locklessly.
- */
- spinlock_t pc_lock;
- /*
- * if this flag is set, collect_pages() will spill both
- * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
- * only ->tcd_pages are spilled.
- */
- int pc_want_daemon_pages;
-};
-
-/* XXX nikita: this declaration is internal to tracefile.c and should probably
- * be moved there */
-struct tracefiled_ctl {
- struct completion tctl_start;
- struct completion tctl_stop;
- cfs_waitq_t tctl_waitq;
- pid_t tctl_pid;
- atomic_t tctl_shutdown;
-};
-
-/*
- * small data-structure for each page owned by tracefiled.
- */
-/* XXX nikita: this declaration is internal to tracefile.c and should probably
- * be moved there */
-struct trace_page {
- /*
- * page itself
- */
- cfs_page_t *page;
- /*
- * linkage into one of the lists in trace_data_union or
- * page_collection
- */
- struct list_head linkage;
- /*
- * number of bytes used within this page
- */
- unsigned int used;
- /*
- * cpu that owns this page
- */
- unsigned short cpu;
- /*
- * type(context) of this page
- */
- unsigned short type;
-};
-
-extern void set_ptldebug_header(struct ptldebug_header *header,
- int subsys, int mask, const int line,
- unsigned long stack);
-extern void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf,
- int len, const char *file, const char *fn);
-
-extern struct trace_cpu_data *trace_get_tcd(void);
-extern void trace_put_tcd(struct trace_cpu_data *tcd);
-extern int trace_lock_tcd(struct trace_cpu_data *tcd);
-extern void trace_unlock_tcd(struct trace_cpu_data *tcd);
-extern char *trace_get_console_buffer(void);
-extern void trace_put_console_buffer(char *buffer);
-
-extern void trace_call_on_all_cpus(void (*fn)(void *arg), void *arg);
-
-int trace_refill_stock(struct trace_cpu_data *tcd, int gfp,
- struct list_head *stock);
-
-
-int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage);
-
-extern void trace_assertion_failed(const char *str, const char *fn,
- const char *file, int line);
-
-/* ASSERTION that is safe to use within the debug system */
-#define __LASSERT(cond) \
-({ \
- if (unlikely(!(cond))) { \
- trace_assertion_failed("ASSERTION("#cond") failed", \
- __FUNCTION__, __FILE__, __LINE__); \
- } \
-})
-
-#define __LASSERT_TAGE_INVARIANT(tage) \
-({ \
- __LASSERT(tage != NULL); \
- __LASSERT(tage->page != NULL); \
- __LASSERT(tage->used <= CFS_PAGE_SIZE); \
- __LASSERT(cfs_page_count(tage->page) > 0); \
-})
-
-#endif /* LUSTRE_TRACEFILE_PRIVATE */
-
-#endif /* __LIBCFS_TRACEFILE_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2007 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- */
-#ifndef __KERNEL__
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-#include <libcfs/user-bitops.h>
-
-#define OFF_BY_START(start) ((start)/BITS_PER_LONG)
-
-unsigned long find_next_bit(unsigned long *addr,
- unsigned long size, unsigned long offset)
-{
- unsigned long *word, *last;
- unsigned long first_bit, bit, base;
-
- word = addr + OFF_BY_START(offset);
- last = addr + OFF_BY_START(size-1);
- first_bit = offset % BITS_PER_LONG;
- base = offset - first_bit;
-
- if (offset >= size)
- return size;
- if (first_bit != 0) {
- int tmp = (*word++) & (~0UL << first_bit);
- bit = __ffs(tmp);
- if (bit < BITS_PER_LONG)
- goto found;
- word++;
- base += BITS_PER_LONG;
- }
- while (word <= last) {
- if (*word != 0UL) {
- bit = __ffs(*word);
- goto found;
- }
- word++;
- base += BITS_PER_LONG;
- }
- return size;
-found:
- return base + bit;
-}
-
-unsigned long find_next_zero_bit(unsigned long *addr,
- unsigned long size, unsigned long offset)
-{
- unsigned long *word, *last;
- unsigned long first_bit, bit, base;
-
- word = addr + OFF_BY_START(offset);
- last = addr + OFF_BY_START(size-1);
- first_bit = offset % BITS_PER_LONG;
- base = offset - first_bit;
-
- if (offset >= size)
- return size;
- if (first_bit != 0) {
- int tmp = (*word++) & (~0UL << first_bit);
- bit = __ffz(tmp);
- if (bit < BITS_PER_LONG)
- goto found;
- word++;
- base += BITS_PER_LONG;
- }
- while (word <= last) {
- if (*word != ~0UL) {
- bit = __ffz(*word);
- goto found;
- }
- word++;
- base += BITS_PER_LONG;
- }
- return size;
-found:
- return base + bit;
-}
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- * Implementation of portable time API for user-level.
- *
- */
-
-/* Implementations of portable synchronization APIs for liblustre */
-
-/*
- * liblustre is single-threaded, so most "synchronization" APIs are trivial.
- *
- * XXX Liang: There are several branches share lnet with b_hd_newconfig,
- * if we define lock APIs at here, there will be conflict with liblustre
- * in other branches.
- */
-
-#ifndef __KERNEL__
-
-#include <stdlib.h>
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-/*
- * Optional debugging (magic stamping and checking ownership) can be added.
- */
-
-#if 0
-/*
- * spin_lock
- *
- * - spin_lock_init(x)
- * - spin_lock(x)
- * - spin_unlock(x)
- * - spin_trylock(x)
- *
- * - spin_lock_irqsave(x, f)
- * - spin_unlock_irqrestore(x, f)
- *
- * No-op implementation.
- */
-
-void spin_lock_init(spinlock_t *lock)
-{
- LASSERT(lock != NULL);
- (void)lock;
-}
-
-void spin_lock(spinlock_t *lock)
-{
- (void)lock;
-}
-
-void spin_unlock(spinlock_t *lock)
-{
- (void)lock;
-}
-
-int spin_trylock(spinlock_t *lock)
-{
- (void)lock;
- return 1;
-}
-
-void spin_lock_bh_init(spinlock_t *lock)
-{
- LASSERT(lock != NULL);
- (void)lock;
-}
-
-void spin_lock_bh(spinlock_t *lock)
-{
- LASSERT(lock != NULL);
- (void)lock;
-}
-
-void spin_unlock_bh(spinlock_t *lock)
-{
- LASSERT(lock != NULL);
- (void)lock;
-}
-
-/*
- * Semaphore
- *
- * - sema_init(x, v)
- * - __down(x)
- * - __up(x)
- */
-struct semaphore {};
-
-void sema_init(struct semaphore *s, int val)
-{
- LASSERT(s != NULL);
- (void)s;
- (void)val;
-}
-
-void __down(struct semaphore *s)
-{
- LASSERT(s != NULL);
- (void)s;
-}
-
-void __up(struct semaphore *s)
-{
- LASSERT(s != NULL);
- (void)s;
-}
-
-/*
- * Mutex:
- *
- * - init_mutex(x)
- * - init_mutex_locked(x)
- * - mutex_up(x)
- * - mutex_down(x)
- */
-
-#define mutex_up(s) __up(s)
-#define mutex_down(s) __down(s)
-
-#define init_mutex(x) sema_init(x, 1)
-#define init_mutex_locked(x) sema_init(x, 0)
-
-/*
- * Completion:
- *
- * - init_completion(c)
- * - complete(c)
- * - wait_for_completion(c)
- */
-struct completion {};
-
-void init_completion(struct completion *c)
-{
- LASSERT(c != NULL);
- (void)c;
-}
-
-void complete(struct completion *c)
-{
- LASSERT(c != NULL);
- (void)c;
-}
-
-void wait_for_completion(struct completion *c)
-{
- LASSERT(c != NULL);
- (void)c;
-}
-
-/*
- * rw_semaphore:
- *
- * - DECLARE_RWSEM(x)
- * - init_rwsem(x)
- * - down_read(x)
- * - up_read(x)
- * - down_write(x)
- * - up_write(x)
- */
-struct rw_semaphore {};
-
-void init_rwsem(struct rw_semaphore *s)
-{
- LASSERT(s != NULL);
- (void)s;
-}
-
-void down_read(struct rw_semaphore *s)
-{
- LASSERT(s != NULL);
- (void)s;
-}
-
-int down_read_trylock(struct rw_semaphore *s)
-{
- LASSERT(s != NULL);
- (void)s;
- return 1;
-}
-
-void down_write(struct rw_semaphore *s)
-{
- LASSERT(s != NULL);
- (void)s;
-}
-
-int down_write_trylock(struct rw_semaphore *s)
-{
- LASSERT(s != NULL);
- (void)s;
- return 1;
-}
-
-void up_read(struct rw_semaphore *s)
-{
- LASSERT(s != NULL);
- (void)s;
-}
-
-void up_write(struct rw_semaphore *s)
-{
- LASSERT(s != NULL);
- (void)s;
-}
-#endif
-
-#ifdef HAVE_LIBPTHREAD
-
-/*
- * Completion
- */
-
-void cfs_init_completion(struct cfs_completion *c)
-{
- LASSERT(c != NULL);
- c->c_done = 0;
- pthread_mutex_init(&c->c_mut, NULL);
- pthread_cond_init(&c->c_cond, NULL);
-}
-
-void cfs_fini_completion(struct cfs_completion *c)
-{
- LASSERT(c != NULL);
- pthread_mutex_destroy(&c->c_mut);
- pthread_cond_destroy(&c->c_cond);
-}
-
-void cfs_complete(struct cfs_completion *c)
-{
- LASSERT(c != NULL);
- pthread_mutex_lock(&c->c_mut);
- c->c_done++;
- pthread_cond_signal(&c->c_cond);
- pthread_mutex_unlock(&c->c_mut);
-}
-
-void cfs_wait_for_completion(struct cfs_completion *c)
-{
- LASSERT(c != NULL);
- pthread_mutex_lock(&c->c_mut);
- while (c->c_done == 0)
- pthread_cond_wait(&c->c_cond, &c->c_mut);
- c->c_done--;
- pthread_mutex_unlock(&c->c_mut);
-}
-
-/*
- * atomic primitives
- */
-
-static pthread_mutex_t atomic_guard_lock = PTHREAD_MUTEX_INITIALIZER;
-
-int cfs_atomic_read(cfs_atomic_t *a)
-{
- int r;
-
- pthread_mutex_lock(&atomic_guard_lock);
- r = a->counter;
- pthread_mutex_unlock(&atomic_guard_lock);
- return r;
-}
-
-void cfs_atomic_set(cfs_atomic_t *a, int b)
-{
- pthread_mutex_lock(&atomic_guard_lock);
- a->counter = b;
- pthread_mutex_unlock(&atomic_guard_lock);
-}
-
-int cfs_atomic_dec_and_test(cfs_atomic_t *a)
-{
- int r;
-
- pthread_mutex_lock(&atomic_guard_lock);
- r = --a->counter;
- pthread_mutex_unlock(&atomic_guard_lock);
- return (r == 0);
-}
-
-void cfs_atomic_inc(cfs_atomic_t *a)
-{
- pthread_mutex_lock(&atomic_guard_lock);
- ++a->counter;
- pthread_mutex_unlock(&atomic_guard_lock);
-}
-
-void cfs_atomic_dec(cfs_atomic_t *a)
-{
- pthread_mutex_lock(&atomic_guard_lock);
- --a->counter;
- pthread_mutex_unlock(&atomic_guard_lock);
-}
-void cfs_atomic_add(int b, cfs_atomic_t *a)
-
-{
- pthread_mutex_lock(&atomic_guard_lock);
- a->counter += b;
- pthread_mutex_unlock(&atomic_guard_lock);
-}
-
-void cfs_atomic_sub(int b, cfs_atomic_t *a)
-{
- pthread_mutex_lock(&atomic_guard_lock);
- a->counter -= b;
- pthread_mutex_unlock(&atomic_guard_lock);
-}
-
-#endif /* HAVE_LIBPTHREAD */
-
-
-/* !__KERNEL__ */
-#endif
-
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Nikita Danilov <nikita@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under the
- * terms of version 2 of the GNU General Public License as published by the
- * Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along
- * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
- * Ave, Cambridge, MA 02139, USA.
- *
- * Implementation of portable APIs for user-level.
- *
- */
-
-/* Implementations of portable APIs for liblustre */
-
-/*
- * liblustre is single-threaded, so most "synchronization" APIs are trivial.
- */
-
-#ifndef __KERNEL__
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-#include <sys/mman.h>
-#ifndef __CYGWIN__
-#include <stdint.h>
-#ifdef HAVE_ASM_PAGE_H
-#include <asm/page.h>
-#endif
-#ifdef HAVE_SYS_USER_H
-#include <sys/user.h>
-#endif
-#else
-#include <sys/types.h>
-#endif
-#include <stdlib.h>
-#include <string.h>
-#include <signal.h>
-#include <errno.h>
-#include <sys/stat.h>
-#ifdef HAVE_SYS_VFS_H
-#include <sys/vfs.h>
-#endif
-
-/*
- * Sleep channel. No-op implementation.
- */
-
-void cfs_waitq_init(struct cfs_waitq *waitq)
-{
- LASSERT(waitq != NULL);
- (void)waitq;
-}
-
-void cfs_waitlink_init(struct cfs_waitlink *link)
-{
- LASSERT(link != NULL);
- (void)link;
-}
-
-void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link)
-{
- LASSERT(waitq != NULL);
- LASSERT(link != NULL);
- (void)waitq;
- (void)link;
-}
-
-void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, struct cfs_waitlink *link)
-{
- LASSERT(waitq != NULL);
- LASSERT(link != NULL);
- (void)waitq;
- (void)link;
-}
-
-void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq)
-{
- LASSERT(waitq != NULL);
- LASSERT(link != NULL);
- (void)waitq;
- (void)link;
-}
-
-void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link)
-{
- LASSERT(waitq != NULL);
- LASSERT(link != NULL);
- (void)waitq;
- (void)link;
-}
-
-int cfs_waitq_active(struct cfs_waitq *waitq)
-{
- LASSERT(waitq != NULL);
- (void)waitq;
- return 0;
-}
-
-void cfs_waitq_signal(struct cfs_waitq *waitq)
-{
- LASSERT(waitq != NULL);
- (void)waitq;
-}
-
-void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr)
-{
- LASSERT(waitq != NULL);
- (void)waitq;
-}
-
-void cfs_waitq_broadcast(struct cfs_waitq *waitq)
-{
- LASSERT(waitq != NULL);
- (void)waitq;
-}
-
-void cfs_waitq_wait(struct cfs_waitlink *link, int state)
-{
- LASSERT(link != NULL);
- (void)link;
-}
-
-int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout)
-{
- LASSERT(link != NULL);
- (void)link;
- return 0;
-}
-
-#ifdef HAVE_LIBPTHREAD
-
-/*
- * Threads
- */
-
-struct lustre_thread_arg {
- cfs_thread_t f;
- void *arg;
-};
-static void *cfs_thread_helper(void *data)
-{
- struct lustre_thread_arg *targ = data;
- cfs_thread_t f = targ->f;
- void *arg = targ->arg;
-
- free(targ);
-
- (void)f(arg);
- return NULL;
-}
-int cfs_create_thread(cfs_thread_t func, void *arg)
-{
- pthread_t tid;
- pthread_attr_t tattr;
- int rc;
- struct lustre_thread_arg *targ_p = malloc(sizeof(struct lustre_thread_arg));
-
- if ( targ_p == NULL )
- return -ENOMEM;
-
- targ_p->f = func;
- targ_p->arg = arg;
-
- pthread_attr_init(&tattr);
- pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED);
- rc = pthread_create(&tid, &tattr, cfs_thread_helper, targ_p);
- pthread_attr_destroy(&tattr);
- return -rc;
-}
-#endif
-
-uid_t cfs_curproc_uid(void)
-{
- return getuid();
-}
-
-int cfs_parse_int_tunable(int *value, char *name)
-{
- char *env = getenv(name);
- char *end;
-
- if (env == NULL)
- return 0;
-
- *value = strtoull(env, &end, 0);
- if (*end == 0)
- return 0;
-
- CERROR("Can't parse tunable %s=%s\n", name, env);
- return -EINVAL;
-}
-
-/*
- * Allocator
- */
-
-cfs_page_t *cfs_alloc_page(unsigned int flags)
-{
- cfs_page_t *pg = malloc(sizeof(*pg));
-
- if (!pg)
- return NULL;
- pg->addr = malloc(CFS_PAGE_SIZE);
-
- if (!pg->addr) {
- free(pg);
- return NULL;
- }
- return pg;
-}
-
-void cfs_free_page(cfs_page_t *pg)
-{
- free(pg->addr);
- free(pg);
-}
-
-void *cfs_page_address(cfs_page_t *pg)
-{
- return pg->addr;
-}
-
-void *cfs_kmap(cfs_page_t *pg)
-{
- return pg->addr;
-}
-
-void cfs_kunmap(cfs_page_t *pg)
-{
-}
-
-/*
- * SLAB allocator
- */
-
-cfs_mem_cache_t *
-cfs_mem_cache_create(const char *name, size_t objsize, size_t off, unsigned long flags)
-{
- cfs_mem_cache_t *c;
-
- c = malloc(sizeof(*c));
- if (!c)
- return NULL;
- c->size = objsize;
- CDEBUG(D_MALLOC, "alloc slab cache %s at %p, objsize %d\n",
- name, c, (int)objsize);
- return c;
-}
-
-int cfs_mem_cache_destroy(cfs_mem_cache_t *c)
-{
- CDEBUG(D_MALLOC, "destroy slab cache %p, objsize %u\n", c, c->size);
- free(c);
- return 0;
-}
-
-void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp)
-{
- return cfs_alloc(c->size, gfp);
-}
-
-void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr)
-{
- cfs_free(addr);
-}
-
-void cfs_enter_debugger(void)
-{
- /*
- * nothing for now.
- */
-}
-
-void cfs_daemonize(char *str)
-{
- return;
-}
-
-int cfs_daemonize_ctxt(char *str)
-{
- return 0;
-}
-
-cfs_sigset_t cfs_block_allsigs(void)
-{
- cfs_sigset_t all;
- cfs_sigset_t old;
- int rc;
-
- sigfillset(&all);
- rc = sigprocmask(SIG_SETMASK, &all, &old);
- LASSERT(rc == 0);
-
- return old;
-}
-
-cfs_sigset_t cfs_block_sigs(cfs_sigset_t blocks)
-{
- cfs_sigset_t old;
- int rc;
-
- rc = sigprocmask(SIG_SETMASK, &blocks, &old);
- LASSERT (rc == 0);
-
- return old;
-}
-
-void cfs_restore_sigs(cfs_sigset_t old)
-{
- int rc = sigprocmask(SIG_SETMASK, &old, NULL);
-
- LASSERT (rc == 0);
-}
-
-int cfs_signal_pending(void)
-{
- cfs_sigset_t empty;
- cfs_sigset_t set;
- int rc;
-
- rc = sigpending(&set);
- LASSERT (rc == 0);
-
- sigemptyset(&empty);
-
- return !memcmp(&empty, &set, sizeof(set));
-}
-
-void cfs_clear_sigpending(void)
-{
- return;
-}
-
-#ifdef __linux__
-
-/*
- * In glibc (NOT in Linux, so check above is not right), implement
- * stack-back-tracing through backtrace() function.
- */
-#include <execinfo.h>
-
-void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
-{
- backtrace(trace->frame, sizeof_array(trace->frame));
-}
-
-void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
-{
- if (0 <= frame_no && frame_no < sizeof_array(trace->frame))
- return trace->frame[frame_no];
- else
- return NULL;
-}
-
-#else
-
-void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
-{}
-void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
-{
- return NULL;
-}
-
-/* __linux__ */
-#endif
-
-void lbug_with_loc(char *file, const char *func, const int line)
-{
- /* No libcfs_catastrophe in userspace! */
- libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, "LBUG\n");
- abort();
-}
-
-/* !__KERNEL__ */
-#endif
-
-/*
- * Local variables:
- * c-indentation-style: "K&R"
- * c-basic-offset: 8
- * tab-width: 8
- * fill-column: 80
- * scroll-step: 1
- * End:
- */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#if !defined(__KERNEL__) || !defined(REDSTORM)
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-#include <sys/socket.h>
-#ifdef HAVE_NETINET_IN_H
-#include <netinet/in.h>
-#endif
-#include <netinet/tcp.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <string.h>
-#include <unistd.h>
-#include <poll.h>
-#include <net/if.h>
-#include <arpa/inet.h>
-#include <errno.h>
-#if defined(__sun__) || defined(__sun)
-#include <sys/sockio.h>
-#endif
-#ifndef __CYGWIN__
-#include <sys/syscall.h>
-#endif
-
-/*
- * Functions to get network interfaces info
- */
-
-int
-libcfs_sock_ioctl(int cmd, unsigned long arg)
-{
- int fd, rc;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
-
- if (fd < 0) {
- rc = -errno;
- CERROR("socket() failed: errno==%d\n", errno);
- return rc;
- }
-
- rc = ioctl(fd, cmd, arg);
-
- close(fd);
- return rc;
-}
-
-int
-libcfs_ipif_query (char *name, int *up, __u32 *ip)
-{
- struct ifreq ifr;
- int nob;
- int rc;
- __u32 val;
-
- nob = strlen(name);
- if (nob >= IFNAMSIZ) {
- CERROR("Interface name %s too long\n", name);
- return -EINVAL;
- }
-
- CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
-
- strcpy(ifr.ifr_name, name);
- rc = libcfs_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
-
- if (rc != 0) {
- CERROR("Can't get flags for interface %s\n", name);
- return rc;
- }
-
- if ((ifr.ifr_flags & IFF_UP) == 0) {
- CDEBUG(D_NET, "Interface %s down\n", name);
- *up = 0;
- *ip = 0;
- return 0;
- }
-
- *up = 1;
-
- strcpy(ifr.ifr_name, name);
- ifr.ifr_addr.sa_family = AF_INET;
- rc = libcfs_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
-
- if (rc != 0) {
- CERROR("Can't get IP address for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *ip = ntohl(val);
-
- return 0;
-}
-
-void
-libcfs_ipif_free_enumeration (char **names, int n)
-{
- int i;
-
- LASSERT (n > 0);
-
- for (i = 0; i < n && names[i] != NULL; i++)
- LIBCFS_FREE(names[i], IFNAMSIZ);
-
- LIBCFS_FREE(names, n * sizeof(*names));
-}
-
-int
-libcfs_ipif_enumerate (char ***namesp)
-{
- /* Allocate and fill in 'names', returning # interfaces/error */
- char **names;
- int nalloc;
- int nfound;
- struct ifreq *ifr;
- struct ifconf ifc;
- int rc;
- int nob;
- int i;
-
-
- nalloc = 16; /* first guess at max interfaces */
- for (;;) {
- LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
- if (ifr == NULL) {
- CERROR ("ENOMEM enumerating up to %d interfaces\n",
- nalloc);
- rc = -ENOMEM;
- goto out0;
- }
-
- ifc.ifc_buf = (char *)ifr;
- ifc.ifc_len = nalloc * sizeof(*ifr);
-
- rc = libcfs_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
-
- if (rc < 0) {
- CERROR ("Error %d enumerating interfaces\n", rc);
- goto out1;
- }
-
- LASSERT (rc == 0);
-
- nfound = ifc.ifc_len/sizeof(*ifr);
- LASSERT (nfound <= nalloc);
-
- if (nfound < nalloc)
- break;
-
- LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
- nalloc *= 2;
- }
-
- if (nfound == 0)
- goto out1;
-
- LIBCFS_ALLOC(names, nfound * sizeof(*names));
- if (names == NULL) {
- rc = -ENOMEM;
- goto out1;
- }
- /* NULL out all names[i] */
- memset (names, 0, nfound * sizeof(*names));
-
- for (i = 0; i < nfound; i++) {
-
- nob = strlen (ifr[i].ifr_name);
- if (nob >= IFNAMSIZ) {
- /* no space for terminating NULL */
- CERROR("interface name %.*s too long (%d max)\n",
- nob, ifr[i].ifr_name, IFNAMSIZ);
- rc = -ENAMETOOLONG;
- goto out2;
- }
-
- LIBCFS_ALLOC(names[i], IFNAMSIZ);
- if (names[i] == NULL) {
- rc = -ENOMEM;
- goto out2;
- }
-
- memcpy(names[i], ifr[i].ifr_name, nob);
- names[i][nob] = 0;
- }
-
- *namesp = names;
- rc = nfound;
-
- out2:
- if (rc < 0)
- libcfs_ipif_free_enumeration(names, nfound);
- out1:
- LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
- out0:
- return rc;
-}
-
-/*
- * Network functions used by user-land lnet acceptor
- */
-
-int
-libcfs_sock_listen (int *sockp, __u32 local_ip, int local_port, int backlog)
-{
- int rc;
- int option;
- struct sockaddr_in locaddr;
-
- *sockp = socket(AF_INET, SOCK_STREAM, 0);
- if (*sockp < 0) {
- rc = -errno;
- CERROR("socket() failed: errno==%d\n", errno);
- return rc;
- }
-
- option = 1;
- if ( setsockopt(*sockp, SOL_SOCKET, SO_REUSEADDR,
- (char *)&option, sizeof (option)) ) {
- rc = -errno;
- CERROR("setsockopt(SO_REUSEADDR) failed: errno==%d\n", errno);
- goto failed;
- }
-
- if (local_ip != 0 || local_port != 0) {
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons(local_port);
- locaddr.sin_addr.s_addr = (local_ip == 0) ?
- INADDR_ANY : htonl(local_ip);
-
- if ( bind(*sockp, (struct sockaddr *)&locaddr, sizeof(locaddr)) ) {
- rc = -errno;
- if ( errno == -EADDRINUSE )
- CDEBUG(D_NET, "Port %d already in use\n",
- local_port);
- else
- CERROR("bind() to port %d failed: errno==%d\n",
- local_port, errno);
- goto failed;
- }
- }
-
- if ( listen(*sockp, backlog) ) {
- rc = -errno;
- CERROR("listen() with backlog==%d failed: errno==%d\n",
- backlog, errno);
- goto failed;
- }
-
- return 0;
-
- failed:
- close(*sockp);
- return rc;
-}
-
-int
-libcfs_sock_accept (int *newsockp, int sock, __u32 *peer_ip, int *peer_port)
-{
- struct sockaddr_in accaddr;
- socklen_t accaddr_len = sizeof(struct sockaddr_in);
-
- *newsockp = accept(sock, (struct sockaddr *)&accaddr, &accaddr_len);
-
- if ( *newsockp < 0 ) {
- CERROR("accept() failed: errno==%d\n", errno);
- return -errno;
- }
-
- *peer_ip = ntohl(accaddr.sin_addr.s_addr);
- *peer_port = ntohs(accaddr.sin_port);
-
- return 0;
-}
-
-int
-libcfs_sock_read (int sock, void *buffer, int nob, int timeout)
-{
- int rc;
- struct pollfd pfd;
- cfs_time_t start_time = cfs_time_current();
-
- pfd.fd = sock;
- pfd.events = POLLIN;
- pfd.revents = 0;
-
- /* poll(2) measures timeout in msec */
- timeout *= 1000;
-
- while (nob != 0 && timeout > 0) {
- cfs_time_t current_time;
-
- rc = poll(&pfd, 1, timeout);
- if (rc < 0)
- return -errno;
- if (rc == 0)
- return -ETIMEDOUT;
- if ((pfd.revents & POLLIN) == 0)
- return -EIO;
-
- rc = read(sock, buffer, nob);
- if (rc < 0)
- return -errno;
- if (rc == 0)
- return -EIO;
-
- buffer = ((char *)buffer) + rc;
- nob -= rc;
-
- current_time = cfs_time_current();
- timeout -= cfs_duration_sec(cfs_time_sub(cfs_time_current(),
- start_time));
- }
-
- if (nob == 0)
- return 0;
- else
- return -ETIMEDOUT;
-}
-
-/* Just try to connect to localhost to wake up entity that are
- * sleeping in accept() */
-void
-libcfs_sock_abort_accept(__u16 port)
-{
- int fd, rc;
- struct sockaddr_in locaddr;
-
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons(port);
- locaddr.sin_addr.s_addr = inet_addr("127.0.0.1");
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if ( fd < 0 ) {
- CERROR("socket() failed: errno==%d\n", errno);
- return;
- }
-
- rc = connect(fd, (struct sockaddr *)&locaddr, sizeof(locaddr));
- if ( rc != 0 ) {
- if ( errno != ECONNREFUSED )
- CERROR("connect() failed: errno==%d\n", errno);
- else
- CDEBUG(D_NET, "Nobody to wake up at %d\n", port);
- }
-
- close(fd);
-}
-
-/*
- * Network functions of common use
- */
-
-int
-libcfs_getpeername(int sock_fd, __u32 *ipaddr_p, __u16 *port_p)
-{
- int rc;
- struct sockaddr_in peer_addr;
- socklen_t peer_addr_len = sizeof(peer_addr);
-
- rc = getpeername(sock_fd, (struct sockaddr *)&peer_addr, &peer_addr_len);
- if (rc != 0)
- return -errno;
-
- if (ipaddr_p != NULL)
- *ipaddr_p = ntohl(peer_addr.sin_addr.s_addr);
- if (port_p != NULL)
- *port_p = ntohs(peer_addr.sin_port);
-
- return 0;
-}
-
-int
-libcfs_socketpair(int *fdp)
-{
- int rc, i;
-
- rc = socketpair(AF_UNIX, SOCK_STREAM, 0, fdp);
- if (rc != 0) {
- rc = -errno;
- CERROR ("Cannot create socket pair\n");
- return rc;
- }
-
- for (i = 0; i < 2; i++) {
- rc = libcfs_fcntl_nonblock(fdp[i]);
- if (rc) {
- close(fdp[0]);
- close(fdp[1]);
- return rc;
- }
- }
-
- return 0;
-}
-
-int
-libcfs_fcntl_nonblock(int fd)
-{
- int rc, flags;
-
- flags = fcntl(fd, F_GETFL, 0);
- if (flags == -1) {
- rc = -errno;
- CERROR ("Cannot get socket flags\n");
- return rc;
- }
-
- rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
- if (rc != 0) {
- rc = -errno;
- CERROR ("Cannot set socket flags\n");
- return rc;
- }
-
- return 0;
-}
-
-int
-libcfs_sock_set_nagle(int fd, int nagle)
-{
- int rc;
- int option = nagle ? 0 : 1;
-
-#if defined(__sun__) || defined(__sun)
- rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option));
-#else
- rc = setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
-#endif
-
- if (rc != 0) {
- rc = -errno;
- CERROR ("Cannot set NODELAY socket option\n");
- return rc;
- }
-
- return 0;
-}
-
-int
-libcfs_sock_set_bufsiz(int fd, int bufsiz)
-{
- int rc, option;
-
- LASSERT (bufsiz != 0);
-
- option = bufsiz;
- rc = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
- if (rc != 0) {
- rc = -errno;
- CERROR ("Cannot set SNDBUF socket option\n");
- return rc;
- }
-
- option = bufsiz;
- rc = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
- if (rc != 0) {
- rc = -errno;
- CERROR ("Cannot set RCVBUF socket option\n");
- return rc;
- }
-
- return 0;
-}
-
-int
-libcfs_sock_create(int *fdp)
-{
- int rc, fd, option;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- rc = -errno;
- CERROR ("Cannot create socket\n");
- return rc;
- }
-
- option = 1;
- rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
- &option, sizeof(option));
- if (rc != 0) {
- rc = -errno;
- CERROR ("Cannot set SO_REUSEADDR for socket\n");
- close(fd);
- return rc;
- }
-
- *fdp = fd;
- return 0;
-}
-
-int
-libcfs_sock_bind_to_port(int fd, __u16 port)
-{
- int rc;
- struct sockaddr_in locaddr;
-
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_addr.s_addr = INADDR_ANY;
- locaddr.sin_port = htons(port);
-
- rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr));
- if (rc != 0) {
- rc = -errno;
- CERROR ("Cannot bind to port %d\n", port);
- return rc;
- }
-
- return 0;
-}
-
-int
-libcfs_sock_connect(int fd, __u32 ip, __u16 port)
-{
- int rc;
- struct sockaddr_in addr;
-
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(ip);
- addr.sin_port = htons(port);
-
- rc = connect(fd, (struct sockaddr *)&addr,
- sizeof(struct sockaddr_in));
-
- if(rc != 0 && errno != EINPROGRESS) {
- rc = -errno;
- if (rc != -EADDRINUSE && rc != -EADDRNOTAVAIL)
- CERROR ("Cannot connect to %u.%u.%u.%u:%d (err=%d)\n",
- HIPQUAD(ip), port, errno);
- return rc;
- }
-
- return 0;
-}
-
-/* NB: EPIPE and ECONNRESET are considered as non-fatal
- * because:
- * 1) it still makes sense to continue reading &&
- * 2) anyway, poll() will set up POLLHUP|POLLERR flags */
-int libcfs_sock_writev(int fd, const struct iovec *vector, int count)
-{
- int rc;
-
- rc = syscall(SYS_writev, fd, vector, count);
-
- if (rc == 0) /* write nothing */
- return 0;
-
- if (rc < 0) {
- if (errno == EAGAIN || /* write nothing */
- errno == EPIPE || /* non-fatal error */
- errno == ECONNRESET) /* non-fatal error */
- return 0;
- else
- return -errno;
- }
-
- return rc;
-}
-
-int libcfs_sock_readv(int fd, const struct iovec *vector, int count)
-{
- int rc;
-
- rc = syscall(SYS_readv, fd, vector, count);
-
- if (rc == 0) /* EOF */
- return -EIO;
-
- if (rc < 0) {
- if (errno == EAGAIN) /* read nothing */
- return 0;
- else
- return -errno;
- }
-
- return rc;
-}
-
-#endif /* !__KERNEL__ || !defined(REDSTORM) */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- * Author: Jacob Berkman <jacob@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/kp30.h>
-#include <libcfs/libcfs.h>
-#include "tracefile.h"
-
-struct lc_watchdog {
- cfs_timer_t lcw_timer; /* kernel timer */
- struct list_head lcw_list;
- struct timeval lcw_last_touched;
- cfs_task_t *lcw_task;
-
- void (*lcw_callback)(pid_t, void *);
- void *lcw_data;
-
- pid_t lcw_pid;
- cfs_duration_t lcw_time; /* time until watchdog fires, jiffies */
-
- enum {
- LC_WATCHDOG_DISABLED,
- LC_WATCHDOG_ENABLED,
- LC_WATCHDOG_EXPIRED
- } lcw_state;
-};
-
-#ifdef WITH_WATCHDOG
-/*
- * The dispatcher will complete lcw_start_completion when it starts,
- * and lcw_stop_completion when it exits.
- * Wake lcw_event_waitq to signal timer callback dispatches.
- */
-static struct completion lcw_start_completion;
-static struct completion lcw_stop_completion;
-static wait_queue_head_t lcw_event_waitq;
-
-/*
- * Set this and wake lcw_event_waitq to stop the dispatcher.
- */
-enum {
- LCW_FLAG_STOP = 0
-};
-static unsigned long lcw_flags = 0;
-
-/*
- * Number of outstanding watchdogs.
- * When it hits 1, we start the dispatcher.
- * When it hits 0, we stop the distpatcher.
- */
-static __u32 lcw_refcount = 0;
-static DECLARE_MUTEX(lcw_refcount_sem);
-
-/*
- * List of timers that have fired that need their callbacks run by the
- * dispatcher.
- */
-static spinlock_t lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED; /* BH lock! */
-static struct list_head lcw_pending_timers = \
- LIST_HEAD_INIT(lcw_pending_timers);
-
-#ifdef HAVE_TASKLIST_LOCK
-static void
-lcw_dump(struct lc_watchdog *lcw)
-{
- cfs_task_t *tsk;
- ENTRY;
-
- read_lock(&tasklist_lock);
- tsk = find_task_by_pid(lcw->lcw_pid);
-
- if (tsk == NULL) {
- CWARN("Process %d was not found in the task list; "
- "watchdog callback may be incomplete\n", (int)lcw->lcw_pid);
- } else if (tsk != lcw->lcw_task) {
- CWARN("The current process %d did not set the watchdog; "
- "watchdog callback may be incomplete\n", (int)lcw->lcw_pid);
- } else {
- libcfs_debug_dumpstack(tsk);
- }
-
- read_unlock(&tasklist_lock);
- EXIT;
-}
-#else
-static void
-lcw_dump(struct lc_watchdog *lcw)
-{
- CERROR("unable to dump stack because of missing export\n");
-}
-#endif
-
-static void lcw_cb(unsigned long data)
-{
- struct lc_watchdog *lcw = (struct lc_watchdog *)data;
-
- ENTRY;
-
- if (lcw->lcw_state != LC_WATCHDOG_ENABLED) {
- EXIT;
- return;
- }
-
- lcw->lcw_state = LC_WATCHDOG_EXPIRED;
-
- /* NB this warning should appear on the console, but may not get into
- * the logs since we're running in a softirq handler */
-
- CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n",
- (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time));
- lcw_dump(lcw);
-
- spin_lock_bh(&lcw_pending_timers_lock);
-
- if (list_empty(&lcw->lcw_list)) {
- list_add(&lcw->lcw_list, &lcw_pending_timers);
- wake_up(&lcw_event_waitq);
- }
-
- spin_unlock_bh(&lcw_pending_timers_lock);
-
- EXIT;
-}
-
-static int is_watchdog_fired(void)
-{
- int rc;
-
- if (test_bit(LCW_FLAG_STOP, &lcw_flags))
- return 1;
-
- spin_lock_bh(&lcw_pending_timers_lock);
- rc = !list_empty(&lcw_pending_timers);
- spin_unlock_bh(&lcw_pending_timers_lock);
- return rc;
-}
-
-static int lcw_dispatch_main(void *data)
-{
- int rc = 0;
- unsigned long flags;
- struct lc_watchdog *lcw;
-
- ENTRY;
-
- cfs_daemonize("lc_watchdogd");
-
- SIGNAL_MASK_LOCK(current, flags);
- sigfillset(¤t->blocked);
- RECALC_SIGPENDING;
- SIGNAL_MASK_UNLOCK(current, flags);
-
- complete(&lcw_start_completion);
-
- while (1) {
- wait_event_interruptible(lcw_event_waitq, is_watchdog_fired());
- CDEBUG(D_INFO, "Watchdog got woken up...\n");
- if (test_bit(LCW_FLAG_STOP, &lcw_flags)) {
- CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n");
-
- spin_lock_bh(&lcw_pending_timers_lock);
- rc = !list_empty(&lcw_pending_timers);
- spin_unlock_bh(&lcw_pending_timers_lock);
- if (rc) {
- CERROR("pending timers list was not empty at "
- "time of watchdog dispatch shutdown\n");
- }
- break;
- }
-
- spin_lock_bh(&lcw_pending_timers_lock);
- while (!list_empty(&lcw_pending_timers)) {
-
- lcw = list_entry(lcw_pending_timers.next,
- struct lc_watchdog,
- lcw_list);
- list_del_init(&lcw->lcw_list);
- spin_unlock_bh(&lcw_pending_timers_lock);
-
- CDEBUG(D_INFO, "found lcw for pid %d: inactive for "
- "%lds\n", (int)lcw->lcw_pid,
- cfs_duration_sec(lcw->lcw_time));
-
- if (lcw->lcw_state != LC_WATCHDOG_DISABLED)
- lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
-
- spin_lock_bh(&lcw_pending_timers_lock);
- }
- spin_unlock_bh(&lcw_pending_timers_lock);
- }
-
- complete(&lcw_stop_completion);
-
- RETURN(rc);
-}
-
-static void lcw_dispatch_start(void)
-{
- int rc;
-
- ENTRY;
- LASSERT(lcw_refcount == 1);
-
- init_completion(&lcw_stop_completion);
- init_completion(&lcw_start_completion);
- init_waitqueue_head(&lcw_event_waitq);
-
- CDEBUG(D_INFO, "starting dispatch thread\n");
- rc = kernel_thread(lcw_dispatch_main, NULL, 0);
- if (rc < 0) {
- CERROR("error spawning watchdog dispatch thread: %d\n", rc);
- EXIT;
- return;
- }
- wait_for_completion(&lcw_start_completion);
- CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
-
- EXIT;
-}
-
-static void lcw_dispatch_stop(void)
-{
- ENTRY;
- LASSERT(lcw_refcount == 0);
-
- CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
-
- set_bit(LCW_FLAG_STOP, &lcw_flags);
- wake_up(&lcw_event_waitq);
-
- wait_for_completion(&lcw_stop_completion);
-
- CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
-
- EXIT;
-}
-
-struct lc_watchdog *lc_watchdog_add(int timeout_ms,
- void (*callback)(pid_t, void *),
- void *data)
-{
- struct lc_watchdog *lcw = NULL;
- ENTRY;
-
- LIBCFS_ALLOC(lcw, sizeof(*lcw));
- if (lcw == NULL) {
- CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n");
- RETURN(ERR_PTR(-ENOMEM));
- }
-
- lcw->lcw_task = cfs_current();
- lcw->lcw_pid = cfs_curproc_pid();
- lcw->lcw_time = cfs_time_seconds(timeout_ms) / 1000;
- lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
- lcw->lcw_data = data;
- lcw->lcw_state = LC_WATCHDOG_DISABLED;
-
- INIT_LIST_HEAD(&lcw->lcw_list);
-
- lcw->lcw_timer.function = lcw_cb;
- lcw->lcw_timer.data = (unsigned long)lcw;
- lcw->lcw_timer.expires = jiffies + lcw->lcw_time;
- init_timer(&lcw->lcw_timer);
-
- down(&lcw_refcount_sem);
- if (++lcw_refcount == 1)
- lcw_dispatch_start();
- up(&lcw_refcount_sem);
-
- /* Keep this working in case we enable them by default */
- if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
- do_gettimeofday(&lcw->lcw_last_touched);
- add_timer(&lcw->lcw_timer);
- }
-
- RETURN(lcw);
-}
-EXPORT_SYMBOL(lc_watchdog_add);
-
-static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
-{
- struct timeval newtime;
- struct timeval timediff;
-
- do_gettimeofday(&newtime);
- if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
- cfs_timeval_sub(&newtime, &lcw->lcw_last_touched, &timediff);
- CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n",
- lcw->lcw_pid,
- message,
- timediff.tv_sec,
- timediff.tv_usec / 100);
- }
- lcw->lcw_last_touched = newtime;
-}
-
-void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms)
-{
- ENTRY;
- LASSERT(lcw != NULL);
-
- spin_lock_bh(&lcw_pending_timers_lock);
- list_del_init(&lcw->lcw_list);
- spin_unlock_bh(&lcw_pending_timers_lock);
-
- lcw_update_time(lcw, "touched");
- lcw->lcw_state = LC_WATCHDOG_ENABLED;
-
- mod_timer(&lcw->lcw_timer, jiffies +
- cfs_time_seconds(timeout_ms) / 1000);
-
- EXIT;
-}
-EXPORT_SYMBOL(lc_watchdog_touch_ms);
-
-/* deprecated - use above instead */
-void lc_watchdog_touch(struct lc_watchdog *lcw)
-{
- lc_watchdog_touch_ms(lcw, cfs_duration_sec(lcw->lcw_time) * 1000);
-}
-EXPORT_SYMBOL(lc_watchdog_touch);
-
-void lc_watchdog_disable(struct lc_watchdog *lcw)
-{
- ENTRY;
- LASSERT(lcw != NULL);
-
- spin_lock_bh(&lcw_pending_timers_lock);
- if (!list_empty(&lcw->lcw_list))
- list_del_init(&lcw->lcw_list);
- spin_unlock_bh(&lcw_pending_timers_lock);
-
- lcw_update_time(lcw, "disabled");
- lcw->lcw_state = LC_WATCHDOG_DISABLED;
-
- EXIT;
-}
-EXPORT_SYMBOL(lc_watchdog_disable);
-
-void lc_watchdog_delete(struct lc_watchdog *lcw)
-{
- ENTRY;
- LASSERT(lcw != NULL);
-
- del_timer(&lcw->lcw_timer);
-
- lcw_update_time(lcw, "deleted");
-
- spin_lock_bh(&lcw_pending_timers_lock);
- if (!list_empty(&lcw->lcw_list))
- list_del_init(&lcw->lcw_list);
- spin_unlock_bh(&lcw_pending_timers_lock);
-
- down(&lcw_refcount_sem);
- if (--lcw_refcount == 0)
- lcw_dispatch_stop();
- up(&lcw_refcount_sem);
-
- LIBCFS_FREE(lcw, sizeof(*lcw));
-
- EXIT;
-}
-EXPORT_SYMBOL(lc_watchdog_delete);
-
-/*
- * Provided watchdog handlers
- */
-
-void lc_watchdog_dumplog(pid_t pid, void *data)
-{
- libcfs_debug_dumplog_internal((void *)((unsigned long)pid));
-}
-EXPORT_SYMBOL(lc_watchdog_dumplog);
-
-#else /* !defined(WITH_WATCHDOG) */
-
-struct lc_watchdog *lc_watchdog_add(int timeout_ms,
- void (*callback)(pid_t pid, void *),
- void *data)
-{
- static struct lc_watchdog watchdog;
- return &watchdog;
-}
-EXPORT_SYMBOL(lc_watchdog_add);
-
-void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms)
-{
-}
-EXPORT_SYMBOL(lc_watchdog_touch_ms);
-
-void lc_watchdog_touch(struct lc_watchdog *lcw)
-{
-}
-EXPORT_SYMBOL(lc_watchdog_touch);
-
-void lc_watchdog_disable(struct lc_watchdog *lcw)
-{
-}
-EXPORT_SYMBOL(lc_watchdog_disable);
-
-void lc_watchdog_delete(struct lc_watchdog *lcw)
-{
-}
-EXPORT_SYMBOL(lc_watchdog_delete);
-
-#endif
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- *
- * Impletion of winnt curproc routines.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-
-/*
- * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
- * for Linux kernel.
- */
-
-cfs_task_t this_task =
- { 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 1, 0, 0, 0, 0,
- "sysetm\0" };
-
-
-uid_t cfs_curproc_uid(void)
-{
- return this_task.uid;
-}
-
-gid_t cfs_curproc_gid(void)
-{
- return this_task.gid;
-}
-
-uid_t cfs_curproc_fsuid(void)
-{
- return this_task.fsuid;
-}
-
-gid_t cfs_curproc_fsgid(void)
-{
- return this_task.fsgid;
-}
-
-pid_t cfs_curproc_pid(void)
-{
- return cfs_current()->pid;
-}
-
-int cfs_curproc_groups_nr(void)
-{
- return this_task.ngroups;
-}
-
-void cfs_curproc_groups_dump(gid_t *array, int size)
-{
- LASSERT(size <= NGROUPS);
- size = min_t(int, size, this_task.ngroups);
- memcpy(array, this_task.groups, size * sizeof(__u32));
-}
-
-int cfs_curproc_is_in_groups(gid_t gid)
-{
- return in_group_p(gid);
-}
-
-mode_t cfs_curproc_umask(void)
-{
- return this_task.umask;
-}
-
-char *cfs_curproc_comm(void)
-{
- return this_task.comm;
-}
-
-cfs_kernel_cap_t cfs_curproc_cap_get(void)
-{
- return this_task.cap_effective;
-}
-
-void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
-{
- this_task.cap_effective = cap;
-}
-
-
-/*
- * Implementation of linux task management routines
- */
-
-
-/* global of the task manager structure */
-
-TASK_MAN TaskMan;
-
-
-/*
- * task slot routiens
- */
-
-PTASK_SLOT
-alloc_task_slot()
-{
- PTASK_SLOT task = NULL;
-
- if (TaskMan.slab) {
- task = cfs_mem_cache_alloc(TaskMan.slab, 0);
- } else {
- task = cfs_alloc(sizeof(TASK_SLOT), 0);
- }
-
- return task;
-}
-
-void
-init_task_slot(PTASK_SLOT task)
-{
- memset(task, 0, sizeof(TASK_SLOT));
- task->Magic = TASKSLT_MAGIC;
- task->task = this_task;
- task->task.pid = (pid_t)PsGetCurrentThreadId();
- cfs_init_event(&task->Event, TRUE, FALSE);
-}
-
-
-void
-cleanup_task_slot(PTASK_SLOT task)
-{
- if (TaskMan.slab) {
- cfs_mem_cache_free(TaskMan.slab, task);
- } else {
- cfs_free(task);
- }
-}
-
-/*
- * task manager related routines
- */
-
-VOID
-task_manager_notify(
- IN HANDLE ProcessId,
- IN HANDLE ThreadId,
- IN BOOLEAN Create
- )
-{
- PLIST_ENTRY ListEntry = NULL;
- PTASK_SLOT TaskSlot = NULL;
-
- spin_lock(&(TaskMan.Lock));
-
- ListEntry = TaskMan.TaskList.Flink;
-
- while (ListEntry != (&(TaskMan.TaskList))) {
-
- TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
-
- if (TaskSlot->Pid == ProcessId && TaskSlot->Tid == ThreadId) {
-
- if (Create) {
-/*
- DbgPrint("task_manager_notify: Pid=%xh Tid %xh resued (TaskSlot->Tet = %xh)...\n",
- ProcessId, ThreadId, TaskSlot->Tet);
-*/
- } else {
- /* remove the taskslot */
- RemoveEntryList(&(TaskSlot->Link));
- TaskMan.NumOfTasks--;
-
- /* now free the task slot */
- cleanup_task_slot(TaskSlot);
- }
- }
-
- ListEntry = ListEntry->Flink;
- }
-
- spin_unlock(&(TaskMan.Lock));
-}
-
-int
-init_task_manager()
-{
- NTSTATUS status;
-
- /* initialize the content and magic */
- memset(&TaskMan, 0, sizeof(TASK_MAN));
- TaskMan.Magic = TASKMAN_MAGIC;
-
- /* initialize the spinlock protection */
- spin_lock_init(&TaskMan.Lock);
-
- /* create slab memory cache */
- TaskMan.slab = cfs_mem_cache_create(
- "TSLT", sizeof(TASK_SLOT), 0, 0);
-
- /* intialize the list header */
- InitializeListHead(&(TaskMan.TaskList));
-
- /* set the thread creation/destruction notify routine */
- status = PsSetCreateThreadNotifyRoutine(task_manager_notify);
-
- if (!NT_SUCCESS(status)) {
- cfs_enter_debugger();
- }
-
- return 0;
-}
-
-void
-cleanup_task_manager()
-{
- PLIST_ENTRY ListEntry = NULL;
- PTASK_SLOT TaskSlot = NULL;
-
- /* we must stay in system since we succeed to register the
- CreateThreadNotifyRoutine: task_manager_notify */
- cfs_enter_debugger();
-
-
- /* cleanup all the taskslots attached to the list */
- spin_lock(&(TaskMan.Lock));
-
- while (!IsListEmpty(&(TaskMan.TaskList))) {
-
- ListEntry = TaskMan.TaskList.Flink;
- TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
-
- RemoveEntryList(ListEntry);
- cleanup_task_slot(TaskSlot);
- }
-
- spin_unlock(&TaskMan.Lock);
-
- /* destroy the taskslot cache slab */
- cfs_mem_cache_destroy(TaskMan.slab);
- memset(&TaskMan, 0, sizeof(TASK_MAN));
-}
-
-
-/*
- * schedule routines (task slot list)
- */
-
-
-cfs_task_t *
-cfs_current()
-{
- HANDLE Pid = PsGetCurrentProcessId();
- HANDLE Tid = PsGetCurrentThreadId();
- PETHREAD Tet = PsGetCurrentThread();
-
- PLIST_ENTRY ListEntry = NULL;
- PTASK_SLOT TaskSlot = NULL;
-
- spin_lock(&(TaskMan.Lock));
-
- ListEntry = TaskMan.TaskList.Flink;
-
- while (ListEntry != (&(TaskMan.TaskList))) {
-
- TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
-
- if (TaskSlot->Pid == Pid && TaskSlot->Tid == Tid) {
- if (TaskSlot->Tet != Tet) {
-
-/*
- DbgPrint("cfs_current: Pid=%xh Tid %xh Tet = %xh resued (TaskSlot->Tet = %xh)...\n",
- Pid, Tid, Tet, TaskSlot->Tet);
-*/
- //
- // The old thread was already exit. This must be a
- // new thread which get the same Tid to the previous.
- //
-
- TaskSlot->Tet = Tet;
- }
- break;
-
- } else {
-
- if ((ULONG)TaskSlot->Pid > (ULONG)Pid) {
- TaskSlot = NULL;
- break;
- } else if ((ULONG)TaskSlot->Pid == (ULONG)Pid) {
- if ((ULONG)TaskSlot->Tid > (ULONG)Tid) {
- TaskSlot = NULL;
- break;
- }
- }
-
- TaskSlot = NULL;
- }
-
- ListEntry = ListEntry->Flink;
- }
-
- if (!TaskSlot) {
-
- TaskSlot = alloc_task_slot();
-
- if (!TaskSlot) {
- cfs_enter_debugger();
- goto errorout;
- }
-
- init_task_slot(TaskSlot);
-
- TaskSlot->Pid = Pid;
- TaskSlot->Tid = Tid;
- TaskSlot->Tet = Tet;
-
- if (ListEntry == (&(TaskMan.TaskList))) {
- //
- // Empty case or the biggest case, put it to the tail.
- //
- InsertTailList(&(TaskMan.TaskList), &(TaskSlot->Link));
- } else {
- //
- // Get a slot and smaller than it's tid, put it just before.
- //
- InsertHeadList(ListEntry->Blink, &(TaskSlot->Link));
- }
-
- TaskMan.NumOfTasks++;
- }
-
- //
- // To Check whether he task structures are arranged in the expected order ?
- //
-
- {
- PTASK_SLOT Prev = NULL, Curr = NULL;
-
- ListEntry = TaskMan.TaskList.Flink;
-
- while (ListEntry != (&(TaskMan.TaskList))) {
-
- Curr = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
- ListEntry = ListEntry->Flink;
-
- if (Prev) {
- if ((ULONG)Prev->Pid > (ULONG)Curr->Pid) {
- cfs_enter_debugger();
- } else if ((ULONG)Prev->Pid == (ULONG)Curr->Pid) {
- if ((ULONG)Prev->Tid > (ULONG)Curr->Tid) {
- cfs_enter_debugger();
- }
- }
- }
-
- Prev = Curr;
- }
- }
-
-errorout:
-
- spin_unlock(&(TaskMan.Lock));
-
- if (!TaskSlot) {
- cfs_enter_debugger();
- return NULL;
- }
-
- return (&(TaskSlot->task));
-}
-
-int
-schedule_timeout(int64_t time)
-{
- cfs_task_t * task = cfs_current();
- PTASK_SLOT slot = NULL;
-
- if (!task) {
- cfs_enter_debugger();
- return 0;
- }
-
- slot = CONTAINING_RECORD(task, TASK_SLOT, task);
- cfs_assert(slot->Magic == TASKSLT_MAGIC);
-
- if (time == MAX_SCHEDULE_TIMEOUT) {
- time = 0;
- }
-
- return (cfs_wait_event(&(slot->Event), time) != 0);
-}
-
-int
-schedule()
-{
- return schedule_timeout(0);
-}
-
-int
-wake_up_process(
- cfs_task_t * task
- )
-{
- PTASK_SLOT slot = NULL;
-
- if (!task) {
- cfs_enter_debugger();
- return 0;
- }
-
- slot = CONTAINING_RECORD(task, TASK_SLOT, task);
- cfs_assert(slot->Magic == TASKSLT_MAGIC);
-
- cfs_wake_event(&(slot->Event));
-
- return TRUE;
-}
-
-void
-sleep_on(
- cfs_waitq_t *waitq
- )
-{
- cfs_waitlink_t link;
-
- cfs_waitlink_init(&link);
- cfs_waitq_add(waitq, &link);
- cfs_waitq_wait(&link, CFS_TASK_INTERRUPTIBLE);
- cfs_waitq_del(waitq, &link);
-}
-
-EXPORT_SYMBOL(cfs_curproc_uid);
-EXPORT_SYMBOL(cfs_curproc_pid);
-EXPORT_SYMBOL(cfs_curproc_gid);
-EXPORT_SYMBOL(cfs_curproc_fsuid);
-EXPORT_SYMBOL(cfs_curproc_fsgid);
-EXPORT_SYMBOL(cfs_curproc_umask);
-EXPORT_SYMBOL(cfs_curproc_comm);
-EXPORT_SYMBOL(cfs_curproc_groups_nr);
-EXPORT_SYMBOL(cfs_curproc_groups_dump);
-EXPORT_SYMBOL(cfs_curproc_is_in_groups);
-EXPORT_SYMBOL(cfs_curproc_cap_get);
-EXPORT_SYMBOL(cfs_curproc_cap_set);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/kp30.h>
-#include <libcfs/libcfs.h>
-#include "tracefile.h"
-
-void lnet_debug_dumpstack(cfs_task_t *tsk)
-{
- return;
-}
-
-cfs_task_t *lnet_current(void)
-{
- return cfs_current();
-}
-
-int lnet_arch_debug_init(unsigned long bufsize)
-{
- return 0;
-}
-
-int lnet_arch_debug_cleanup(void)
-{
- return 0;
-}
-
-void lnet_run_lbug_upcall(char *file, const char *fn, const int line)
-{
-}
-
-void lbug_with_loc(char *file, const char *func, const int line)
-{
- libcfs_catastrophe = 1;
- CEMERG("LBUG: pid: %u thread: %#x\n",
- (unsigned)cfs_curproc_pid(), (unsigned)PsGetCurrentThread());
- // portals_debug_dumplog();
- // portals_run_lbug_upcall(file, func, line);
-}
-
-#if TDI_LIBCFS_DBG
-
-/*
- * Definitions
- */
-
-LONG KsDebugLevel = 0x5;
-
-
-/*
- * Routines
- */
-
-
-/*
- * KsNtStatusToString
- * Get the error message for a specified nt status
- *
- * Arguments:
- * Status - nt status code
- *
- * Return Value:
- * PUCHAR - message string for the status code
- *
- * NOTES:
- * N/A
- */
-
-PUCHAR
-KsNtStatusToString (IN NTSTATUS Status)
-{
- switch (Status) {
-
- case 0x00000000: return "STATUS_SUCCESS";
- case 0x00000001: return "STATUS_WAIT_1";
- case 0x00000002: return "STATUS_WAIT_2";
- case 0x00000003: return "STATUS_WAIT_3";
- case 0x0000003F: return "STATUS_WAIT_63";
- case 0x00000080: return "STATUS_ABANDONED_WAIT_0";
- case 0x000000BF: return "STATUS_ABANDONED_WAIT_63";
- case 0x000000C0: return "STATUS_USER_APC";
- case 0x00000100: return "STATUS_KERNEL_APC";
- case 0x00000101: return "STATUS_ALERTED";
- case 0x00000102: return "STATUS_TIMEOUT";
- case 0x00000103: return "STATUS_PENDING";
- case 0x00000104: return "STATUS_REPARSE";
- case 0x00000105: return "STATUS_MORE_ENTRIES";
- case 0x00000106: return "STATUS_NOT_ALL_ASSIGNED";
- case 0x00000107: return "STATUS_SOME_NOT_MAPPED";
- case 0x00000108: return "STATUS_OPLOCK_BREAK_IN_PROGRESS";
- case 0x00000109: return "STATUS_VOLUME_MOUNTED";
- case 0x0000010A: return "STATUS_RXACT_COMMITTED";
- case 0x0000010B: return "STATUS_NOTIFY_CLEANUP";
- case 0x0000010C: return "STATUS_NOTIFY_ENUM_DIR";
- case 0x0000010D: return "STATUS_NO_QUOTAS_FOR_ACCOUNT";
- case 0x0000010E: return "STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED";
- case 0x00000110: return "STATUS_PAGE_FAULT_TRANSITION";
- case 0x00000111: return "STATUS_PAGE_FAULT_DEMAND_ZERO";
- case 0x00000112: return "STATUS_PAGE_FAULT_COPY_ON_WRITE";
- case 0x00000113: return "STATUS_PAGE_FAULT_GUARD_PAGE";
- case 0x00000114: return "STATUS_PAGE_FAULT_PAGING_FILE";
- case 0x00000115: return "STATUS_CACHE_PAGE_LOCKED";
- case 0x00000116: return "STATUS_CRASH_DUMP";
- case 0x00000117: return "STATUS_BUFFER_ALL_ZEROS";
- case 0x00000118: return "STATUS_REPARSE_OBJECT";
- case 0x00000119: return "STATUS_RESOURCE_REQUIREMENTS_CHANGED";
- case 0x00000120: return "STATUS_TRANSLATION_COMPLETE";
- case 0x00000121: return "STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY";
- case 0x00010001: return "DBG_EXCEPTION_HANDLED";
- case 0x00010002: return "DBG_CONTINUE";
- case 0x40000000: return "STATUS_OBJECT_NAME_EXISTS";
- case 0x40000001: return "STATUS_THREAD_WAS_SUSPENDED";
- case 0x40000002: return "STATUS_WORKING_SET_LIMIT_RANGE";
- case 0x40000003: return "STATUS_IMAGE_NOT_AT_BASE";
- case 0x40000004: return "STATUS_RXACT_STATE_CREATED";
- case 0x40000005: return "STATUS_SEGMENT_NOTIFICATION";
- case 0x40000006: return "STATUS_LOCAL_USER_SESSION_KEY";
- case 0x40000007: return "STATUS_BAD_CURRENT_DIRECTORY";
- case 0x40000008: return "STATUS_SERIAL_MORE_WRITES";
- case 0x40000009: return "STATUS_REGISTRY_RECOVERED";
- case 0x4000000A: return "STATUS_FT_READ_RECOVERY_FROM_BACKUP";
- case 0x4000000B: return "STATUS_FT_WRITE_RECOVERY";
- case 0x4000000C: return "STATUS_SERIAL_COUNTER_TIMEOUT";
- case 0x4000000D: return "STATUS_NULL_LM_PASSWORD";
- case 0x4000000E: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH";
- case 0x4000000F: return "STATUS_RECEIVE_PARTIAL";
- case 0x40000010: return "STATUS_RECEIVE_EXPEDITED";
- case 0x40000011: return "STATUS_RECEIVE_PARTIAL_EXPEDITED";
- case 0x40000012: return "STATUS_EVENT_DONE";
- case 0x40000013: return "STATUS_EVENT_PENDING";
- case 0x40000014: return "STATUS_CHECKING_FILE_SYSTEM";
- case 0x40000015: return "STATUS_FATAL_APP_EXIT";
- case 0x40000016: return "STATUS_PREDEFINED_HANDLE";
- case 0x40000017: return "STATUS_WAS_UNLOCKED";
- case 0x40000018: return "STATUS_SERVICE_NOTIFICATION";
- case 0x40000019: return "STATUS_WAS_LOCKED";
- case 0x4000001A: return "STATUS_LOG_HARD_ERROR";
- case 0x4000001B: return "STATUS_ALREADY_WIN32";
- case 0x4000001C: return "STATUS_WX86_UNSIMULATE";
- case 0x4000001D: return "STATUS_WX86_CONTINUE";
- case 0x4000001E: return "STATUS_WX86_SINGLE_STEP";
- case 0x4000001F: return "STATUS_WX86_BREAKPOINT";
- case 0x40000020: return "STATUS_WX86_EXCEPTION_CONTINUE";
- case 0x40000021: return "STATUS_WX86_EXCEPTION_LASTCHANCE";
- case 0x40000022: return "STATUS_WX86_EXCEPTION_CHAIN";
- case 0x40000023: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE";
- case 0x40000024: return "STATUS_NO_YIELD_PERFORMED";
- case 0x40000025: return "STATUS_TIMER_RESUME_IGNORED";
- case 0x40000026: return "STATUS_ARBITRATION_UNHANDLED";
- case 0x40000027: return "STATUS_CARDBUS_NOT_SUPPORTED";
- case 0x40000028: return "STATUS_WX86_CREATEWX86TIB";
- case 0x40000029: return "STATUS_MP_PROCESSOR_MISMATCH";
- case 0x40010001: return "DBG_REPLY_LATER";
- case 0x40010002: return "DBG_UNABLE_TO_PROVIDE_HANDLE";
- case 0x40010003: return "DBG_TERMINATE_THREAD";
- case 0x40010004: return "DBG_TERMINATE_PROCESS";
- case 0x40010005: return "DBG_CONTROL_C";
- case 0x40010006: return "DBG_PRINTEXCEPTION_C";
- case 0x40010007: return "DBG_RIPEXCEPTION";
- case 0x40010008: return "DBG_CONTROL_BREAK";
- case 0x80000001: return "STATUS_GUARD_PAGE_VIOLATION";
- case 0x80000002: return "STATUS_DATATYPE_MISALIGNMENT";
- case 0x80000003: return "STATUS_BREAKPOINT";
- case 0x80000004: return "STATUS_SINGLE_STEP";
- case 0x80000005: return "STATUS_BUFFER_OVERFLOW";
- case 0x80000006: return "STATUS_NO_MORE_FILES";
- case 0x80000007: return "STATUS_WAKE_SYSTEM_DEBUGGER";
- case 0x8000000A: return "STATUS_HANDLES_CLOSED";
- case 0x8000000B: return "STATUS_NO_INHERITANCE";
- case 0x8000000C: return "STATUS_GUID_SUBSTITUTION_MADE";
- case 0x8000000D: return "STATUS_PARTIAL_COPY";
- case 0x8000000E: return "STATUS_DEVICE_PAPER_EMPTY";
- case 0x8000000F: return "STATUS_DEVICE_POWERED_OFF";
- case 0x80000010: return "STATUS_DEVICE_OFF_LINE";
- case 0x80000011: return "STATUS_DEVICE_BUSY";
- case 0x80000012: return "STATUS_NO_MORE_EAS";
- case 0x80000013: return "STATUS_INVALID_EA_NAME";
- case 0x80000014: return "STATUS_EA_LIST_INCONSISTENT";
- case 0x80000015: return "STATUS_INVALID_EA_FLAG";
- case 0x80000016: return "STATUS_VERIFY_REQUIRED";
- case 0x80000017: return "STATUS_EXTRANEOUS_INFORMATION";
- case 0x80000018: return "STATUS_RXACT_COMMIT_NECESSARY";
- case 0x8000001A: return "STATUS_NO_MORE_ENTRIES";
- case 0x8000001B: return "STATUS_FILEMARK_DETECTED";
- case 0x8000001C: return "STATUS_MEDIA_CHANGED";
- case 0x8000001D: return "STATUS_BUS_RESET";
- case 0x8000001E: return "STATUS_END_OF_MEDIA";
- case 0x8000001F: return "STATUS_BEGINNING_OF_MEDIA";
- case 0x80000020: return "STATUS_MEDIA_CHECK";
- case 0x80000021: return "STATUS_SETMARK_DETECTED";
- case 0x80000022: return "STATUS_NO_DATA_DETECTED";
- case 0x80000023: return "STATUS_REDIRECTOR_HAS_OPEN_HANDLES";
- case 0x80000024: return "STATUS_SERVER_HAS_OPEN_HANDLES";
- case 0x80000025: return "STATUS_ALREADY_DISCONNECTED";
- case 0x80000026: return "STATUS_LONGJUMP";
- case 0x80010001: return "DBG_EXCEPTION_NOT_HANDLED";
- case 0xC0000001: return "STATUS_UNSUCCESSFUL";
- case 0xC0000002: return "STATUS_NOT_IMPLEMENTED";
- case 0xC0000003: return "STATUS_INVALID_INFO_CLASS";
- case 0xC0000004: return "STATUS_INFO_LENGTH_MISMATCH";
- case 0xC0000005: return "STATUS_ACCESS_VIOLATION";
- case 0xC0000006: return "STATUS_IN_PAGE_ERROR";
- case 0xC0000007: return "STATUS_PAGEFILE_QUOTA";
- case 0xC0000008: return "STATUS_INVALID_HANDLE";
- case 0xC0000009: return "STATUS_BAD_INITIAL_STACK";
- case 0xC000000A: return "STATUS_BAD_INITIAL_PC";
- case 0xC000000B: return "STATUS_INVALID_CID";
- case 0xC000000C: return "STATUS_TIMER_NOT_CANCELED";
- case 0xC000000D: return "STATUS_INVALID_PARAMETER";
- case 0xC000000E: return "STATUS_NO_SUCH_DEVICE";
- case 0xC000000F: return "STATUS_NO_SUCH_FILE";
- case 0xC0000010: return "STATUS_INVALID_DEVICE_REQUEST";
- case 0xC0000011: return "STATUS_END_OF_FILE";
- case 0xC0000012: return "STATUS_WRONG_VOLUME";
- case 0xC0000013: return "STATUS_NO_MEDIA_IN_DEVICE";
- case 0xC0000014: return "STATUS_UNRECOGNIZED_MEDIA";
- case 0xC0000015: return "STATUS_NONEXISTENT_SECTOR";
- case 0xC0000016: return "STATUS_MORE_PROCESSING_REQUIRED";
- case 0xC0000017: return "STATUS_NO_MEMORY";
- case 0xC0000018: return "STATUS_CONFLICTING_ADDRESSES";
- case 0xC0000019: return "STATUS_NOT_MAPPED_VIEW";
- case 0xC000001A: return "STATUS_UNABLE_TO_FREE_VM";
- case 0xC000001B: return "STATUS_UNABLE_TO_DELETE_SECTION";
- case 0xC000001C: return "STATUS_INVALID_SYSTEM_SERVICE";
- case 0xC000001D: return "STATUS_ILLEGAL_INSTRUCTION";
- case 0xC000001E: return "STATUS_INVALID_LOCK_SEQUENCE";
- case 0xC000001F: return "STATUS_INVALID_VIEW_SIZE";
- case 0xC0000020: return "STATUS_INVALID_FILE_FOR_SECTION";
- case 0xC0000021: return "STATUS_ALREADY_COMMITTED";
- case 0xC0000022: return "STATUS_ACCESS_DENIED";
- case 0xC0000023: return "STATUS_BUFFER_TOO_SMALL";
- case 0xC0000024: return "STATUS_OBJECT_TYPE_MISMATCH";
- case 0xC0000025: return "STATUS_NONCONTINUABLE_EXCEPTION";
- case 0xC0000026: return "STATUS_INVALID_DISPOSITION";
- case 0xC0000027: return "STATUS_UNWIND";
- case 0xC0000028: return "STATUS_BAD_STACK";
- case 0xC0000029: return "STATUS_INVALID_UNWIND_TARGET";
- case 0xC000002A: return "STATUS_NOT_LOCKED";
- case 0xC000002B: return "STATUS_PARITY_ERROR";
- case 0xC000002C: return "STATUS_UNABLE_TO_DECOMMIT_VM";
- case 0xC000002D: return "STATUS_NOT_COMMITTED";
- case 0xC000002E: return "STATUS_INVALID_PORT_ATTRIBUTES";
- case 0xC000002F: return "STATUS_PORT_MESSAGE_TOO_LONG";
- case 0xC0000030: return "STATUS_INVALID_PARAMETER_MIX";
- case 0xC0000031: return "STATUS_INVALID_QUOTA_LOWER";
- case 0xC0000032: return "STATUS_DISK_CORRUPT_ERROR";
- case 0xC0000033: return "STATUS_OBJECT_NAME_INVALID";
- case 0xC0000034: return "STATUS_OBJECT_NAME_NOT_FOUND";
- case 0xC0000035: return "STATUS_OBJECT_NAME_COLLISION";
- case 0xC0000037: return "STATUS_PORT_DISCONNECTED";
- case 0xC0000038: return "STATUS_DEVICE_ALREADY_ATTACHED";
- case 0xC0000039: return "STATUS_OBJECT_PATH_INVALID";
- case 0xC000003A: return "STATUS_OBJECT_PATH_NOT_FOUND";
- case 0xC000003B: return "STATUS_OBJECT_PATH_SYNTAX_BAD";
- case 0xC000003C: return "STATUS_DATA_OVERRUN";
- case 0xC000003D: return "STATUS_DATA_LATE_ERROR";
- case 0xC000003E: return "STATUS_DATA_ERROR";
- case 0xC000003F: return "STATUS_CRC_ERROR";
- case 0xC0000040: return "STATUS_SECTION_TOO_BIG";
- case 0xC0000041: return "STATUS_PORT_CONNECTION_REFUSED";
- case 0xC0000042: return "STATUS_INVALID_PORT_HANDLE";
- case 0xC0000043: return "STATUS_SHARING_VIOLATION";
- case 0xC0000044: return "STATUS_QUOTA_EXCEEDED";
- case 0xC0000045: return "STATUS_INVALID_PAGE_PROTECTION";
- case 0xC0000046: return "STATUS_MUTANT_NOT_OWNED";
- case 0xC0000047: return "STATUS_SEMAPHORE_LIMIT_EXCEEDED";
- case 0xC0000048: return "STATUS_PORT_ALREADY_SET";
- case 0xC0000049: return "STATUS_SECTION_NOT_IMAGE";
- case 0xC000004A: return "STATUS_SUSPEND_COUNT_EXCEEDED";
- case 0xC000004B: return "STATUS_THREAD_IS_TERMINATING";
- case 0xC000004C: return "STATUS_BAD_WORKING_SET_LIMIT";
- case 0xC000004D: return "STATUS_INCOMPATIBLE_FILE_MAP";
- case 0xC000004E: return "STATUS_SECTION_PROTECTION";
- case 0xC000004F: return "STATUS_EAS_NOT_SUPPORTED";
- case 0xC0000050: return "STATUS_EA_TOO_LARGE";
- case 0xC0000051: return "STATUS_NONEXISTENT_EA_ENTRY";
- case 0xC0000052: return "STATUS_NO_EAS_ON_FILE";
- case 0xC0000053: return "STATUS_EA_CORRUPT_ERROR";
- case 0xC0000054: return "STATUS_FILE_LOCK_CONFLICT";
- case 0xC0000055: return "STATUS_LOCK_NOT_GRANTED";
- case 0xC0000056: return "STATUS_DELETE_PENDING";
- case 0xC0000057: return "STATUS_CTL_FILE_NOT_SUPPORTED";
- case 0xC0000058: return "STATUS_UNKNOWN_REVISION";
- case 0xC0000059: return "STATUS_REVISION_MISMATCH";
- case 0xC000005A: return "STATUS_INVALID_OWNER";
- case 0xC000005B: return "STATUS_INVALID_PRIMARY_GROUP";
- case 0xC000005C: return "STATUS_NO_IMPERSONATION_TOKEN";
- case 0xC000005D: return "STATUS_CANT_DISABLE_MANDATORY";
- case 0xC000005E: return "STATUS_NO_LOGON_SERVERS";
- case 0xC000005F: return "STATUS_NO_SUCH_LOGON_SESSION";
- case 0xC0000060: return "STATUS_NO_SUCH_PRIVILEGE";
- case 0xC0000061: return "STATUS_PRIVILEGE_NOT_HELD";
- case 0xC0000062: return "STATUS_INVALID_ACCOUNT_NAME";
- case 0xC0000063: return "STATUS_USER_EXISTS";
- case 0xC0000064: return "STATUS_NO_SUCH_USER";
- case 0xC0000065: return "STATUS_GROUP_EXISTS";
- case 0xC0000066: return "STATUS_NO_SUCH_GROUP";
- case 0xC0000067: return "STATUS_MEMBER_IN_GROUP";
- case 0xC0000068: return "STATUS_MEMBER_NOT_IN_GROUP";
- case 0xC0000069: return "STATUS_LAST_ADMIN";
- case 0xC000006A: return "STATUS_WRONG_PASSWORD";
- case 0xC000006B: return "STATUS_ILL_FORMED_PASSWORD";
- case 0xC000006C: return "STATUS_PASSWORD_RESTRICTION";
- case 0xC000006D: return "STATUS_LOGON_FAILURE";
- case 0xC000006E: return "STATUS_ACCOUNT_RESTRICTION";
- case 0xC000006F: return "STATUS_INVALID_LOGON_HOURS";
- case 0xC0000070: return "STATUS_INVALID_WORKSTATION";
- case 0xC0000071: return "STATUS_PASSWORD_EXPIRED";
- case 0xC0000072: return "STATUS_ACCOUNT_DISABLED";
- case 0xC0000073: return "STATUS_NONE_MAPPED";
- case 0xC0000074: return "STATUS_TOO_MANY_LUIDS_REQUESTED";
- case 0xC0000075: return "STATUS_LUIDS_EXHAUSTED";
- case 0xC0000076: return "STATUS_INVALID_SUB_AUTHORITY";
- case 0xC0000077: return "STATUS_INVALID_ACL";
- case 0xC0000078: return "STATUS_INVALID_SID";
- case 0xC0000079: return "STATUS_INVALID_SECURITY_DESCR";
- case 0xC000007A: return "STATUS_PROCEDURE_NOT_FOUND";
- case 0xC000007B: return "STATUS_INVALID_IMAGE_FORMAT";
- case 0xC000007C: return "STATUS_NO_TOKEN";
- case 0xC000007D: return "STATUS_BAD_INHERITANCE_ACL";
- case 0xC000007E: return "STATUS_RANGE_NOT_LOCKED";
- case 0xC000007F: return "STATUS_DISK_FULL";
- case 0xC0000080: return "STATUS_SERVER_DISABLED";
- case 0xC0000081: return "STATUS_SERVER_NOT_DISABLED";
- case 0xC0000082: return "STATUS_TOO_MANY_GUIDS_REQUESTED";
- case 0xC0000083: return "STATUS_GUIDS_EXHAUSTED";
- case 0xC0000084: return "STATUS_INVALID_ID_AUTHORITY";
- case 0xC0000085: return "STATUS_AGENTS_EXHAUSTED";
- case 0xC0000086: return "STATUS_INVALID_VOLUME_LABEL";
- case 0xC0000087: return "STATUS_SECTION_NOT_EXTENDED";
- case 0xC0000088: return "STATUS_NOT_MAPPED_DATA";
- case 0xC0000089: return "STATUS_RESOURCE_DATA_NOT_FOUND";
- case 0xC000008A: return "STATUS_RESOURCE_TYPE_NOT_FOUND";
- case 0xC000008B: return "STATUS_RESOURCE_NAME_NOT_FOUND";
- case 0xC000008C: return "STATUS_ARRAY_BOUNDS_EXCEEDED";
- case 0xC000008D: return "STATUS_FLOAT_DENORMAL_OPERAND";
- case 0xC000008E: return "STATUS_FLOAT_DIVIDE_BY_ZERO";
- case 0xC000008F: return "STATUS_FLOAT_INEXACT_RESULT";
- case 0xC0000090: return "STATUS_FLOAT_INVALID_OPERATION";
- case 0xC0000091: return "STATUS_FLOAT_OVERFLOW";
- case 0xC0000092: return "STATUS_FLOAT_STACK_CHECK";
- case 0xC0000093: return "STATUS_FLOAT_UNDERFLOW";
- case 0xC0000094: return "STATUS_INTEGER_DIVIDE_BY_ZERO";
- case 0xC0000095: return "STATUS_INTEGER_OVERFLOW";
- case 0xC0000096: return "STATUS_PRIVILEGED_INSTRUCTION";
- case 0xC0000097: return "STATUS_TOO_MANY_PAGING_FILES";
- case 0xC0000098: return "STATUS_FILE_INVALID";
- case 0xC0000099: return "STATUS_ALLOTTED_SPACE_EXCEEDED";
- case 0xC000009A: return "STATUS_INSUFFICIENT_RESOURCES";
- case 0xC000009B: return "STATUS_DFS_EXIT_PATH_FOUND";
- case 0xC000009C: return "STATUS_DEVICE_DATA_ERROR";
- case 0xC000009D: return "STATUS_DEVICE_NOT_CONNECTED";
- case 0xC000009E: return "STATUS_DEVICE_POWER_FAILURE";
- case 0xC000009F: return "STATUS_FREE_VM_NOT_AT_BASE";
- case 0xC00000A0: return "STATUS_MEMORY_NOT_ALLOCATED";
- case 0xC00000A1: return "STATUS_WORKING_SET_QUOTA";
- case 0xC00000A2: return "STATUS_MEDIA_WRITE_PROTECTED";
- case 0xC00000A3: return "STATUS_DEVICE_NOT_READY";
- case 0xC00000A4: return "STATUS_INVALID_GROUP_ATTRIBUTES";
- case 0xC00000A5: return "STATUS_BAD_IMPERSONATION_LEVEL";
- case 0xC00000A6: return "STATUS_CANT_OPEN_ANONYMOUS";
- case 0xC00000A7: return "STATUS_BAD_VALIDATION_CLASS";
- case 0xC00000A8: return "STATUS_BAD_TOKEN_TYPE";
- case 0xC00000A9: return "STATUS_BAD_MASTER_BOOT_RECORD";
- case 0xC00000AA: return "STATUS_INSTRUCTION_MISALIGNMENT";
- case 0xC00000AB: return "STATUS_INSTANCE_NOT_AVAILABLE";
- case 0xC00000AC: return "STATUS_PIPE_NOT_AVAILABLE";
- case 0xC00000AD: return "STATUS_INVALID_PIPE_STATE";
- case 0xC00000AE: return "STATUS_PIPE_BUSY";
- case 0xC00000AF: return "STATUS_ILLEGAL_FUNCTION";
- case 0xC00000B0: return "STATUS_PIPE_DISCONNECTED";
- case 0xC00000B1: return "STATUS_PIPE_CLOSING";
- case 0xC00000B2: return "STATUS_PIPE_CONNECTED";
- case 0xC00000B3: return "STATUS_PIPE_LISTENING";
- case 0xC00000B4: return "STATUS_INVALID_READ_MODE";
- case 0xC00000B5: return "STATUS_IO_TIMEOUT";
- case 0xC00000B6: return "STATUS_FILE_FORCED_CLOSED";
- case 0xC00000B7: return "STATUS_PROFILING_NOT_STARTED";
- case 0xC00000B8: return "STATUS_PROFILING_NOT_STOPPED";
- case 0xC00000B9: return "STATUS_COULD_NOT_INTERPRET";
- case 0xC00000BA: return "STATUS_FILE_IS_A_DIRECTORY";
- case 0xC00000BB: return "STATUS_NOT_SUPPORTED";
- case 0xC00000BC: return "STATUS_REMOTE_NOT_LISTENING";
- case 0xC00000BD: return "STATUS_DUPLICATE_NAME";
- case 0xC00000BE: return "STATUS_BAD_NETWORK_PATH";
- case 0xC00000BF: return "STATUS_NETWORK_BUSY";
- case 0xC00000C0: return "STATUS_DEVICE_DOES_NOT_EXIST";
- case 0xC00000C1: return "STATUS_TOO_MANY_COMMANDS";
- case 0xC00000C2: return "STATUS_ADAPTER_HARDWARE_ERROR";
- case 0xC00000C3: return "STATUS_INVALID_NETWORK_RESPONSE";
- case 0xC00000C4: return "STATUS_UNEXPECTED_NETWORK_ERROR";
- case 0xC00000C5: return "STATUS_BAD_REMOTE_ADAPTER";
- case 0xC00000C6: return "STATUS_PRINT_QUEUE_FULL";
- case 0xC00000C7: return "STATUS_NO_SPOOL_SPACE";
- case 0xC00000C8: return "STATUS_PRINT_CANCELLED";
- case 0xC00000C9: return "STATUS_NETWORK_NAME_DELETED";
- case 0xC00000CA: return "STATUS_NETWORK_ACCESS_DENIED";
- case 0xC00000CB: return "STATUS_BAD_DEVICE_TYPE";
- case 0xC00000CC: return "STATUS_BAD_NETWORK_NAME";
- case 0xC00000CD: return "STATUS_TOO_MANY_NAMES";
- case 0xC00000CE: return "STATUS_TOO_MANY_SESSIONS";
- case 0xC00000CF: return "STATUS_SHARING_PAUSED";
- case 0xC00000D0: return "STATUS_REQUEST_NOT_ACCEPTED";
- case 0xC00000D1: return "STATUS_REDIRECTOR_PAUSED";
- case 0xC00000D2: return "STATUS_NET_WRITE_FAULT";
- case 0xC00000D3: return "STATUS_PROFILING_AT_LIMIT";
- case 0xC00000D4: return "STATUS_NOT_SAME_DEVICE";
- case 0xC00000D5: return "STATUS_FILE_RENAMED";
- case 0xC00000D6: return "STATUS_VIRTUAL_CIRCUIT_CLOSED";
- case 0xC00000D7: return "STATUS_NO_SECURITY_ON_OBJECT";
- case 0xC00000D8: return "STATUS_CANT_WAIT";
- case 0xC00000D9: return "STATUS_PIPE_EMPTY";
- case 0xC00000DA: return "STATUS_CANT_ACCESS_DOMAIN_INFO";
- case 0xC00000DB: return "STATUS_CANT_TERMINATE_SELF";
- case 0xC00000DC: return "STATUS_INVALID_SERVER_STATE";
- case 0xC00000DD: return "STATUS_INVALID_DOMAIN_STATE";
- case 0xC00000DE: return "STATUS_INVALID_DOMAIN_ROLE";
- case 0xC00000DF: return "STATUS_NO_SUCH_DOMAIN";
- case 0xC00000E0: return "STATUS_DOMAIN_EXISTS";
- case 0xC00000E1: return "STATUS_DOMAIN_LIMIT_EXCEEDED";
- case 0xC00000E2: return "STATUS_OPLOCK_NOT_GRANTED";
- case 0xC00000E3: return "STATUS_INVALID_OPLOCK_PROTOCOL";
- case 0xC00000E4: return "STATUS_INTERNAL_DB_CORRUPTION";
- case 0xC00000E5: return "STATUS_INTERNAL_ERROR";
- case 0xC00000E6: return "STATUS_GENERIC_NOT_MAPPED";
- case 0xC00000E7: return "STATUS_BAD_DESCRIPTOR_FORMAT";
- case 0xC00000E8: return "STATUS_INVALID_USER_BUFFER";
- case 0xC00000E9: return "STATUS_UNEXPECTED_IO_ERROR";
- case 0xC00000EA: return "STATUS_UNEXPECTED_MM_CREATE_ERR";
- case 0xC00000EB: return "STATUS_UNEXPECTED_MM_MAP_ERROR";
- case 0xC00000EC: return "STATUS_UNEXPECTED_MM_EXTEND_ERR";
- case 0xC00000ED: return "STATUS_NOT_LOGON_PROCESS";
- case 0xC00000EE: return "STATUS_LOGON_SESSION_EXISTS";
- case 0xC00000EF: return "STATUS_INVALID_PARAMETER_1";
- case 0xC00000F0: return "STATUS_INVALID_PARAMETER_2";
- case 0xC00000F1: return "STATUS_INVALID_PARAMETER_3";
- case 0xC00000F2: return "STATUS_INVALID_PARAMETER_4";
- case 0xC00000F3: return "STATUS_INVALID_PARAMETER_5";
- case 0xC00000F4: return "STATUS_INVALID_PARAMETER_6";
- case 0xC00000F5: return "STATUS_INVALID_PARAMETER_7";
- case 0xC00000F6: return "STATUS_INVALID_PARAMETER_8";
- case 0xC00000F7: return "STATUS_INVALID_PARAMETER_9";
- case 0xC00000F8: return "STATUS_INVALID_PARAMETER_10";
- case 0xC00000F9: return "STATUS_INVALID_PARAMETER_11";
- case 0xC00000FA: return "STATUS_INVALID_PARAMETER_12";
- case 0xC00000FB: return "STATUS_REDIRECTOR_NOT_STARTED";
- case 0xC00000FC: return "STATUS_REDIRECTOR_STARTED";
- case 0xC00000FD: return "STATUS_STACK_OVERFLOW";
- case 0xC00000FE: return "STATUS_NO_SUCH_PACKAGE";
- case 0xC00000FF: return "STATUS_BAD_FUNCTION_TABLE";
- case 0xC0000100: return "STATUS_VARIABLE_NOT_FOUND";
- case 0xC0000101: return "STATUS_DIRECTORY_NOT_EMPTY";
- case 0xC0000102: return "STATUS_FILE_CORRUPT_ERROR";
- case 0xC0000103: return "STATUS_NOT_A_DIRECTORY";
- case 0xC0000104: return "STATUS_BAD_LOGON_SESSION_STATE";
- case 0xC0000105: return "STATUS_LOGON_SESSION_COLLISION";
- case 0xC0000106: return "STATUS_NAME_TOO_LONG";
- case 0xC0000107: return "STATUS_FILES_OPEN";
- case 0xC0000108: return "STATUS_CONNECTION_IN_USE";
- case 0xC0000109: return "STATUS_MESSAGE_NOT_FOUND";
- case 0xC000010A: return "STATUS_PROCESS_IS_TERMINATING";
- case 0xC000010B: return "STATUS_INVALID_LOGON_TYPE";
- case 0xC000010C: return "STATUS_NO_GUID_TRANSLATION";
- case 0xC000010D: return "STATUS_CANNOT_IMPERSONATE";
- case 0xC000010E: return "STATUS_IMAGE_ALREADY_LOADED";
- case 0xC000010F: return "STATUS_ABIOS_NOT_PRESENT";
- case 0xC0000110: return "STATUS_ABIOS_LID_NOT_EXIST";
- case 0xC0000111: return "STATUS_ABIOS_LID_ALREADY_OWNED";
- case 0xC0000112: return "STATUS_ABIOS_NOT_LID_OWNER";
- case 0xC0000113: return "STATUS_ABIOS_INVALID_COMMAND";
- case 0xC0000114: return "STATUS_ABIOS_INVALID_LID";
- case 0xC0000115: return "STATUS_ABIOS_SELECTOR_NOT_AVAILABLE";
- case 0xC0000116: return "STATUS_ABIOS_INVALID_SELECTOR";
- case 0xC0000117: return "STATUS_NO_LDT";
- case 0xC0000118: return "STATUS_INVALID_LDT_SIZE";
- case 0xC0000119: return "STATUS_INVALID_LDT_OFFSET";
- case 0xC000011A: return "STATUS_INVALID_LDT_DESCRIPTOR";
- case 0xC000011B: return "STATUS_INVALID_IMAGE_NE_FORMAT";
- case 0xC000011C: return "STATUS_RXACT_INVALID_STATE";
- case 0xC000011D: return "STATUS_RXACT_COMMIT_FAILURE";
- case 0xC000011E: return "STATUS_MAPPED_FILE_SIZE_ZERO";
- case 0xC000011F: return "STATUS_TOO_MANY_OPENED_FILES";
- case 0xC0000120: return "STATUS_CANCELLED";
- case 0xC0000121: return "STATUS_CANNOT_DELETE";
- case 0xC0000122: return "STATUS_INVALID_COMPUTER_NAME";
- case 0xC0000123: return "STATUS_FILE_DELETED";
- case 0xC0000124: return "STATUS_SPECIAL_ACCOUNT";
- case 0xC0000125: return "STATUS_SPECIAL_GROUP";
- case 0xC0000126: return "STATUS_SPECIAL_USER";
- case 0xC0000127: return "STATUS_MEMBERS_PRIMARY_GROUP";
- case 0xC0000128: return "STATUS_FILE_CLOSED";
- case 0xC0000129: return "STATUS_TOO_MANY_THREADS";
- case 0xC000012A: return "STATUS_THREAD_NOT_IN_PROCESS";
- case 0xC000012B: return "STATUS_TOKEN_ALREADY_IN_USE";
- case 0xC000012C: return "STATUS_PAGEFILE_QUOTA_EXCEEDED";
- case 0xC000012D: return "STATUS_COMMITMENT_LIMIT";
- case 0xC000012E: return "STATUS_INVALID_IMAGE_LE_FORMAT";
- case 0xC000012F: return "STATUS_INVALID_IMAGE_NOT_MZ";
- case 0xC0000130: return "STATUS_INVALID_IMAGE_PROTECT";
- case 0xC0000131: return "STATUS_INVALID_IMAGE_WIN_16";
- case 0xC0000132: return "STATUS_LOGON_SERVER_CONFLICT";
- case 0xC0000133: return "STATUS_TIME_DIFFERENCE_AT_DC";
- case 0xC0000134: return "STATUS_SYNCHRONIZATION_REQUIRED";
- case 0xC0000135: return "STATUS_DLL_NOT_FOUND";
- case 0xC0000136: return "STATUS_OPEN_FAILED";
- case 0xC0000137: return "STATUS_IO_PRIVILEGE_FAILED";
- case 0xC0000138: return "STATUS_ORDINAL_NOT_FOUND";
- case 0xC0000139: return "STATUS_ENTRYPOINT_NOT_FOUND";
- case 0xC000013A: return "STATUS_CONTROL_C_EXIT";
- case 0xC000013B: return "STATUS_LOCAL_DISCONNECT";
- case 0xC000013C: return "STATUS_REMOTE_DISCONNECT";
- case 0xC000013D: return "STATUS_REMOTE_RESOURCES";
- case 0xC000013E: return "STATUS_LINK_FAILED";
- case 0xC000013F: return "STATUS_LINK_TIMEOUT";
- case 0xC0000140: return "STATUS_INVALID_CONNECTION";
- case 0xC0000141: return "STATUS_INVALID_ADDRESS";
- case 0xC0000142: return "STATUS_DLL_INIT_FAILED";
- case 0xC0000143: return "STATUS_MISSING_SYSTEMFILE";
- case 0xC0000144: return "STATUS_UNHANDLED_EXCEPTION";
- case 0xC0000145: return "STATUS_APP_INIT_FAILURE";
- case 0xC0000146: return "STATUS_PAGEFILE_CREATE_FAILED";
- case 0xC0000147: return "STATUS_NO_PAGEFILE";
- case 0xC0000148: return "STATUS_INVALID_LEVEL";
- case 0xC0000149: return "STATUS_WRONG_PASSWORD_CORE";
- case 0xC000014A: return "STATUS_ILLEGAL_FLOAT_CONTEXT";
- case 0xC000014B: return "STATUS_PIPE_BROKEN";
- case 0xC000014C: return "STATUS_REGISTRY_CORRUPT";
- case 0xC000014D: return "STATUS_REGISTRY_IO_FAILED";
- case 0xC000014E: return "STATUS_NO_EVENT_PAIR";
- case 0xC000014F: return "STATUS_UNRECOGNIZED_VOLUME";
- case 0xC0000150: return "STATUS_SERIAL_NO_DEVICE_INITED";
- case 0xC0000151: return "STATUS_NO_SUCH_ALIAS";
- case 0xC0000152: return "STATUS_MEMBER_NOT_IN_ALIAS";
- case 0xC0000153: return "STATUS_MEMBER_IN_ALIAS";
- case 0xC0000154: return "STATUS_ALIAS_EXISTS";
- case 0xC0000155: return "STATUS_LOGON_NOT_GRANTED";
- case 0xC0000156: return "STATUS_TOO_MANY_SECRETS";
- case 0xC0000157: return "STATUS_SECRET_TOO_LONG";
- case 0xC0000158: return "STATUS_INTERNAL_DB_ERROR";
- case 0xC0000159: return "STATUS_FULLSCREEN_MODE";
- case 0xC000015A: return "STATUS_TOO_MANY_CONTEXT_IDS";
- case 0xC000015B: return "STATUS_LOGON_TYPE_NOT_GRANTED";
- case 0xC000015C: return "STATUS_NOT_REGISTRY_FILE";
- case 0xC000015D: return "STATUS_NT_CROSS_ENCRYPTION_REQUIRED";
- case 0xC000015E: return "STATUS_DOMAIN_CTRLR_CONFIG_ERROR";
- case 0xC000015F: return "STATUS_FT_MISSING_MEMBER";
- case 0xC0000160: return "STATUS_ILL_FORMED_SERVICE_ENTRY";
- case 0xC0000161: return "STATUS_ILLEGAL_CHARACTER";
- case 0xC0000162: return "STATUS_UNMAPPABLE_CHARACTER";
- case 0xC0000163: return "STATUS_UNDEFINED_CHARACTER";
- case 0xC0000164: return "STATUS_FLOPPY_VOLUME";
- case 0xC0000165: return "STATUS_FLOPPY_ID_MARK_NOT_FOUND";
- case 0xC0000166: return "STATUS_FLOPPY_WRONG_CYLINDER";
- case 0xC0000167: return "STATUS_FLOPPY_UNKNOWN_ERROR";
- case 0xC0000168: return "STATUS_FLOPPY_BAD_REGISTERS";
- case 0xC0000169: return "STATUS_DISK_RECALIBRATE_FAILED";
- case 0xC000016A: return "STATUS_DISK_OPERATION_FAILED";
- case 0xC000016B: return "STATUS_DISK_RESET_FAILED";
- case 0xC000016C: return "STATUS_SHARED_IRQ_BUSY";
- case 0xC000016D: return "STATUS_FT_ORPHANING";
- case 0xC000016E: return "STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT";
- case 0xC0000172: return "STATUS_PARTITION_FAILURE";
- case 0xC0000173: return "STATUS_INVALID_BLOCK_LENGTH";
- case 0xC0000174: return "STATUS_DEVICE_NOT_PARTITIONED";
- case 0xC0000175: return "STATUS_UNABLE_TO_LOCK_MEDIA";
- case 0xC0000176: return "STATUS_UNABLE_TO_UNLOAD_MEDIA";
- case 0xC0000177: return "STATUS_EOM_OVERFLOW";
- case 0xC0000178: return "STATUS_NO_MEDIA";
- case 0xC000017A: return "STATUS_NO_SUCH_MEMBER";
- case 0xC000017B: return "STATUS_INVALID_MEMBER";
- case 0xC000017C: return "STATUS_KEY_DELETED";
- case 0xC000017D: return "STATUS_NO_LOG_SPACE";
- case 0xC000017E: return "STATUS_TOO_MANY_SIDS";
- case 0xC000017F: return "STATUS_LM_CROSS_ENCRYPTION_REQUIRED";
- case 0xC0000180: return "STATUS_KEY_HAS_CHILDREN";
- case 0xC0000181: return "STATUS_CHILD_MUST_BE_VOLATILE";
- case 0xC0000182: return "STATUS_DEVICE_CONFIGURATION_ERROR";
- case 0xC0000183: return "STATUS_DRIVER_INTERNAL_ERROR";
- case 0xC0000184: return "STATUS_INVALID_DEVICE_STATE";
- case 0xC0000185: return "STATUS_IO_DEVICE_ERROR";
- case 0xC0000186: return "STATUS_DEVICE_PROTOCOL_ERROR";
- case 0xC0000187: return "STATUS_BACKUP_CONTROLLER";
- case 0xC0000188: return "STATUS_LOG_FILE_FULL";
- case 0xC0000189: return "STATUS_TOO_LATE";
- case 0xC000018A: return "STATUS_NO_TRUST_LSA_SECRET";
- case 0xC000018B: return "STATUS_NO_TRUST_SAM_ACCOUNT";
- case 0xC000018C: return "STATUS_TRUSTED_DOMAIN_FAILURE";
- case 0xC000018D: return "STATUS_TRUSTED_RELATIONSHIP_FAILURE";
- case 0xC000018E: return "STATUS_EVENTLOG_FILE_CORRUPT";
- case 0xC000018F: return "STATUS_EVENTLOG_CANT_START";
- case 0xC0000190: return "STATUS_TRUST_FAILURE";
- case 0xC0000191: return "STATUS_MUTANT_LIMIT_EXCEEDED";
- case 0xC0000192: return "STATUS_NETLOGON_NOT_STARTED";
- case 0xC0000193: return "STATUS_ACCOUNT_EXPIRED";
- case 0xC0000194: return "STATUS_POSSIBLE_DEADLOCK";
- case 0xC0000195: return "STATUS_NETWORK_CREDENTIAL_CONFLICT";
- case 0xC0000196: return "STATUS_REMOTE_SESSION_LIMIT";
- case 0xC0000197: return "STATUS_EVENTLOG_FILE_CHANGED";
- case 0xC0000198: return "STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT";
- case 0xC0000199: return "STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT";
- case 0xC000019A: return "STATUS_NOLOGON_SERVER_TRUST_ACCOUNT";
- case 0xC000019B: return "STATUS_DOMAIN_TRUST_INCONSISTENT";
- case 0xC000019C: return "STATUS_FS_DRIVER_REQUIRED";
- case 0xC0000202: return "STATUS_NO_USER_SESSION_KEY";
- case 0xC0000203: return "STATUS_USER_SESSION_DELETED";
- case 0xC0000204: return "STATUS_RESOURCE_LANG_NOT_FOUND";
- case 0xC0000205: return "STATUS_INSUFF_SERVER_RESOURCES";
- case 0xC0000206: return "STATUS_INVALID_BUFFER_SIZE";
- case 0xC0000207: return "STATUS_INVALID_ADDRESS_COMPONENT";
- case 0xC0000208: return "STATUS_INVALID_ADDRESS_WILDCARD";
- case 0xC0000209: return "STATUS_TOO_MANY_ADDRESSES";
- case 0xC000020A: return "STATUS_ADDRESS_ALREADY_EXISTS";
- case 0xC000020B: return "STATUS_ADDRESS_CLOSED";
- case 0xC000020C: return "STATUS_CONNECTION_DISCONNECTED";
- case 0xC000020D: return "STATUS_CONNECTION_RESET";
- case 0xC000020E: return "STATUS_TOO_MANY_NODES";
- case 0xC000020F: return "STATUS_TRANSACTION_ABORTED";
- case 0xC0000210: return "STATUS_TRANSACTION_TIMED_OUT";
- case 0xC0000211: return "STATUS_TRANSACTION_NO_RELEASE";
- case 0xC0000212: return "STATUS_TRANSACTION_NO_MATCH";
- case 0xC0000213: return "STATUS_TRANSACTION_RESPONDED";
- case 0xC0000214: return "STATUS_TRANSACTION_INVALID_ID";
- case 0xC0000215: return "STATUS_TRANSACTION_INVALID_TYPE";
- case 0xC0000216: return "STATUS_NOT_SERVER_SESSION";
- case 0xC0000217: return "STATUS_NOT_CLIENT_SESSION";
- case 0xC0000218: return "STATUS_CANNOT_LOAD_REGISTRY_FILE";
- case 0xC0000219: return "STATUS_DEBUG_ATTACH_FAILED";
- case 0xC000021A: return "STATUS_SYSTEM_PROCESS_TERMINATED";
- case 0xC000021B: return "STATUS_DATA_NOT_ACCEPTED";
- case 0xC000021C: return "STATUS_NO_BROWSER_SERVERS_FOUND";
- case 0xC000021D: return "STATUS_VDM_HARD_ERROR";
- case 0xC000021E: return "STATUS_DRIVER_CANCEL_TIMEOUT";
- case 0xC000021F: return "STATUS_REPLY_MESSAGE_MISMATCH";
- case 0xC0000220: return "STATUS_MAPPED_ALIGNMENT";
- case 0xC0000221: return "STATUS_IMAGE_CHECKSUM_MISMATCH";
- case 0xC0000222: return "STATUS_LOST_WRITEBEHIND_DATA";
- case 0xC0000223: return "STATUS_CLIENT_SERVER_PARAMETERS_INVALID";
- case 0xC0000224: return "STATUS_PASSWORD_MUST_CHANGE";
- case 0xC0000225: return "STATUS_NOT_FOUND";
- case 0xC0000226: return "STATUS_NOT_TINY_STREAM";
- case 0xC0000227: return "STATUS_RECOVERY_FAILURE";
- case 0xC0000228: return "STATUS_STACK_OVERFLOW_READ";
- case 0xC0000229: return "STATUS_FAIL_CHECK";
- case 0xC000022A: return "STATUS_DUPLICATE_OBJECTID";
- case 0xC000022B: return "STATUS_OBJECTID_EXISTS";
- case 0xC000022C: return "STATUS_CONVERT_TO_LARGE";
- case 0xC000022D: return "STATUS_RETRY";
- case 0xC000022E: return "STATUS_FOUND_OUT_OF_SCOPE";
- case 0xC000022F: return "STATUS_ALLOCATE_BUCKET";
- case 0xC0000230: return "STATUS_PROPSET_NOT_FOUND";
- case 0xC0000231: return "STATUS_MARSHALL_OVERFLOW";
- case 0xC0000232: return "STATUS_INVALID_VARIANT";
- case 0xC0000233: return "STATUS_DOMAIN_CONTROLLER_NOT_FOUND";
- case 0xC0000234: return "STATUS_ACCOUNT_LOCKED_OUT";
- case 0xC0000235: return "STATUS_HANDLE_NOT_CLOSABLE";
- case 0xC0000236: return "STATUS_CONNECTION_REFUSED";
- case 0xC0000237: return "STATUS_GRACEFUL_DISCONNECT";
- case 0xC0000238: return "STATUS_ADDRESS_ALREADY_ASSOCIATED";
- case 0xC0000239: return "STATUS_ADDRESS_NOT_ASSOCIATED";
- case 0xC000023A: return "STATUS_CONNECTION_INVALID";
- case 0xC000023B: return "STATUS_CONNECTION_ACTIVE";
- case 0xC000023C: return "STATUS_NETWORK_UNREACHABLE";
- case 0xC000023D: return "STATUS_HOST_UNREACHABLE";
- case 0xC000023E: return "STATUS_PROTOCOL_UNREACHABLE";
- case 0xC000023F: return "STATUS_PORT_UNREACHABLE";
- case 0xC0000240: return "STATUS_REQUEST_ABORTED";
- case 0xC0000241: return "STATUS_CONNECTION_ABORTED";
- case 0xC0000242: return "STATUS_BAD_COMPRESSION_BUFFER";
- case 0xC0000243: return "STATUS_USER_MAPPED_FILE";
- case 0xC0000244: return "STATUS_AUDIT_FAILED";
- case 0xC0000245: return "STATUS_TIMER_RESOLUTION_NOT_SET";
- case 0xC0000246: return "STATUS_CONNECTION_COUNT_LIMIT";
- case 0xC0000247: return "STATUS_LOGIN_TIME_RESTRICTION";
- case 0xC0000248: return "STATUS_LOGIN_WKSTA_RESTRICTION";
- case 0xC0000249: return "STATUS_IMAGE_MP_UP_MISMATCH";
- case 0xC0000250: return "STATUS_INSUFFICIENT_LOGON_INFO";
- case 0xC0000251: return "STATUS_BAD_DLL_ENTRYPOINT";
- case 0xC0000252: return "STATUS_BAD_SERVICE_ENTRYPOINT";
- case 0xC0000253: return "STATUS_LPC_REPLY_LOST";
- case 0xC0000254: return "STATUS_IP_ADDRESS_CONFLICT1";
- case 0xC0000255: return "STATUS_IP_ADDRESS_CONFLICT2";
- case 0xC0000256: return "STATUS_REGISTRY_QUOTA_LIMIT";
- case 0xC0000257: return "STATUS_PATH_NOT_COVERED";
- case 0xC0000258: return "STATUS_NO_CALLBACK_ACTIVE";
- case 0xC0000259: return "STATUS_LICENSE_QUOTA_EXCEEDED";
- case 0xC000025A: return "STATUS_PWD_TOO_SHORT";
- case 0xC000025B: return "STATUS_PWD_TOO_RECENT";
- case 0xC000025C: return "STATUS_PWD_HISTORY_CONFLICT";
- case 0xC000025E: return "STATUS_PLUGPLAY_NO_DEVICE";
- case 0xC000025F: return "STATUS_UNSUPPORTED_COMPRESSION";
- case 0xC0000260: return "STATUS_INVALID_HW_PROFILE";
- case 0xC0000261: return "STATUS_INVALID_PLUGPLAY_DEVICE_PATH";
- case 0xC0000262: return "STATUS_DRIVER_ORDINAL_NOT_FOUND";
- case 0xC0000263: return "STATUS_DRIVER_ENTRYPOINT_NOT_FOUND";
- case 0xC0000264: return "STATUS_RESOURCE_NOT_OWNED";
- case 0xC0000265: return "STATUS_TOO_MANY_LINKS";
- case 0xC0000266: return "STATUS_QUOTA_LIST_INCONSISTENT";
- case 0xC0000267: return "STATUS_FILE_IS_OFFLINE";
- case 0xC0000268: return "STATUS_EVALUATION_EXPIRATION";
- case 0xC0000269: return "STATUS_ILLEGAL_DLL_RELOCATION";
- case 0xC000026A: return "STATUS_LICENSE_VIOLATION";
- case 0xC000026B: return "STATUS_DLL_INIT_FAILED_LOGOFF";
- case 0xC000026C: return "STATUS_DRIVER_UNABLE_TO_LOAD";
- case 0xC000026D: return "STATUS_DFS_UNAVAILABLE";
- case 0xC000026E: return "STATUS_VOLUME_DISMOUNTED";
- case 0xC000026F: return "STATUS_WX86_INTERNAL_ERROR";
- case 0xC0000270: return "STATUS_WX86_FLOAT_STACK_CHECK";
- case 0xC0000271: return "STATUS_VALIDATE_CONTINUE";
- case 0xC0000272: return "STATUS_NO_MATCH";
- case 0xC0000273: return "STATUS_NO_MORE_MATCHES";
- case 0xC0000275: return "STATUS_NOT_A_REPARSE_POINT";
- case 0xC0000276: return "STATUS_IO_REPARSE_TAG_INVALID";
- case 0xC0000277: return "STATUS_IO_REPARSE_TAG_MISMATCH";
- case 0xC0000278: return "STATUS_IO_REPARSE_DATA_INVALID";
- case 0xC0000279: return "STATUS_IO_REPARSE_TAG_NOT_HANDLED";
- case 0xC0000280: return "STATUS_REPARSE_POINT_NOT_RESOLVED";
- case 0xC0000281: return "STATUS_DIRECTORY_IS_A_REPARSE_POINT";
- case 0xC0000282: return "STATUS_RANGE_LIST_CONFLICT";
- case 0xC0000283: return "STATUS_SOURCE_ELEMENT_EMPTY";
- case 0xC0000284: return "STATUS_DESTINATION_ELEMENT_FULL";
- case 0xC0000285: return "STATUS_ILLEGAL_ELEMENT_ADDRESS";
- case 0xC0000286: return "STATUS_MAGAZINE_NOT_PRESENT";
- case 0xC0000287: return "STATUS_REINITIALIZATION_NEEDED";
- case 0x80000288: return "STATUS_DEVICE_REQUIRES_CLEANING";
- case 0x80000289: return "STATUS_DEVICE_DOOR_OPEN";
- case 0xC000028A: return "STATUS_ENCRYPTION_FAILED";
- case 0xC000028B: return "STATUS_DECRYPTION_FAILED";
- case 0xC000028C: return "STATUS_RANGE_NOT_FOUND";
- case 0xC000028D: return "STATUS_NO_RECOVERY_POLICY";
- case 0xC000028E: return "STATUS_NO_EFS";
- case 0xC000028F: return "STATUS_WRONG_EFS";
- case 0xC0000290: return "STATUS_NO_USER_KEYS";
- case 0xC0000291: return "STATUS_FILE_NOT_ENCRYPTED";
- case 0xC0000292: return "STATUS_NOT_EXPORT_FORMAT";
- case 0xC0000293: return "STATUS_FILE_ENCRYPTED";
- case 0x40000294: return "STATUS_WAKE_SYSTEM";
- case 0xC0000295: return "STATUS_WMI_GUID_NOT_FOUND";
- case 0xC0000296: return "STATUS_WMI_INSTANCE_NOT_FOUND";
- case 0xC0000297: return "STATUS_WMI_ITEMID_NOT_FOUND";
- case 0xC0000298: return "STATUS_WMI_TRY_AGAIN";
- case 0xC0000299: return "STATUS_SHARED_POLICY";
- case 0xC000029A: return "STATUS_POLICY_OBJECT_NOT_FOUND";
- case 0xC000029B: return "STATUS_POLICY_ONLY_IN_DS";
- case 0xC000029C: return "STATUS_VOLUME_NOT_UPGRADED";
- case 0xC000029D: return "STATUS_REMOTE_STORAGE_NOT_ACTIVE";
- case 0xC000029E: return "STATUS_REMOTE_STORAGE_MEDIA_ERROR";
- case 0xC000029F: return "STATUS_NO_TRACKING_SERVICE";
- case 0xC00002A0: return "STATUS_SERVER_SID_MISMATCH";
- case 0xC00002A1: return "STATUS_DS_NO_ATTRIBUTE_OR_VALUE";
- case 0xC00002A2: return "STATUS_DS_INVALID_ATTRIBUTE_SYNTAX";
- case 0xC00002A3: return "STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED";
- case 0xC00002A4: return "STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS";
- case 0xC00002A5: return "STATUS_DS_BUSY";
- case 0xC00002A6: return "STATUS_DS_UNAVAILABLE";
- case 0xC00002A7: return "STATUS_DS_NO_RIDS_ALLOCATED";
- case 0xC00002A8: return "STATUS_DS_NO_MORE_RIDS";
- case 0xC00002A9: return "STATUS_DS_INCORRECT_ROLE_OWNER";
- case 0xC00002AA: return "STATUS_DS_RIDMGR_INIT_ERROR";
- case 0xC00002AB: return "STATUS_DS_OBJ_CLASS_VIOLATION";
- case 0xC00002AC: return "STATUS_DS_CANT_ON_NON_LEAF";
- case 0xC00002AD: return "STATUS_DS_CANT_ON_RDN";
- case 0xC00002AE: return "STATUS_DS_CANT_MOD_OBJ_CLASS";
- case 0xC00002AF: return "STATUS_DS_CROSS_DOM_MOVE_FAILED";
- case 0xC00002B0: return "STATUS_DS_GC_NOT_AVAILABLE";
- case 0xC00002B1: return "STATUS_DIRECTORY_SERVICE_REQUIRED";
- case 0xC00002B2: return "STATUS_REPARSE_ATTRIBUTE_CONFLICT";
- case 0xC00002B3: return "STATUS_CANT_ENABLE_DENY_ONLY";
- case 0xC00002B4: return "STATUS_FLOAT_MULTIPLE_FAULTS";
- case 0xC00002B5: return "STATUS_FLOAT_MULTIPLE_TRAPS";
- case 0xC00002B6: return "STATUS_DEVICE_REMOVED";
- case 0xC00002B7: return "STATUS_JOURNAL_DELETE_IN_PROGRESS";
- case 0xC00002B8: return "STATUS_JOURNAL_NOT_ACTIVE";
- case 0xC00002B9: return "STATUS_NOINTERFACE";
- case 0xC00002C1: return "STATUS_DS_ADMIN_LIMIT_EXCEEDED";
- case 0xC00002C2: return "STATUS_DRIVER_FAILED_SLEEP";
- case 0xC00002C3: return "STATUS_MUTUAL_AUTHENTICATION_FAILED";
- case 0xC00002C4: return "STATUS_CORRUPT_SYSTEM_FILE";
- case 0xC00002C5: return "STATUS_DATATYPE_MISALIGNMENT_ERROR";
- case 0xC00002C6: return "STATUS_WMI_READ_ONLY";
- case 0xC00002C7: return "STATUS_WMI_SET_FAILURE";
- case 0xC00002C8: return "STATUS_COMMITMENT_MINIMUM";
- case 0xC00002C9: return "STATUS_REG_NAT_CONSUMPTION";
- case 0xC00002CA: return "STATUS_TRANSPORT_FULL";
- case 0xC00002CB: return "STATUS_DS_SAM_INIT_FAILURE";
- case 0xC00002CC: return "STATUS_ONLY_IF_CONNECTED";
- case 0xC00002CD: return "STATUS_DS_SENSITIVE_GROUP_VIOLATION";
- case 0xC00002CE: return "STATUS_PNP_RESTART_ENUMERATION";
- case 0xC00002CF: return "STATUS_JOURNAL_ENTRY_DELETED";
- case 0xC00002D0: return "STATUS_DS_CANT_MOD_PRIMARYGROUPID";
- case 0xC00002D1: return "STATUS_SYSTEM_IMAGE_BAD_SIGNATURE";
- case 0xC00002D2: return "STATUS_PNP_REBOOT_REQUIRED";
- case 0xC00002D3: return "STATUS_POWER_STATE_INVALID";
- case 0xC00002D4: return "STATUS_DS_INVALID_GROUP_TYPE";
- case 0xC00002D5: return "STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN";
- case 0xC00002D6: return "STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN";
- case 0xC00002D7: return "STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER";
- case 0xC00002D8: return "STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER";
- case 0xC00002D9: return "STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER";
- case 0xC00002DA: return "STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER";
- case 0xC00002DB: return "STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER";
- case 0xC00002DC: return "STATUS_DS_HAVE_PRIMARY_MEMBERS";
- case 0xC00002DD: return "STATUS_WMI_NOT_SUPPORTED";
- case 0xC00002DE: return "STATUS_INSUFFICIENT_POWER";
- case 0xC00002DF: return "STATUS_SAM_NEED_BOOTKEY_PASSWORD";
- case 0xC00002E0: return "STATUS_SAM_NEED_BOOTKEY_FLOPPY";
- case 0xC00002E1: return "STATUS_DS_CANT_START";
- case 0xC00002E2: return "STATUS_DS_INIT_FAILURE";
- case 0xC00002E3: return "STATUS_SAM_INIT_FAILURE";
- case 0xC00002E4: return "STATUS_DS_GC_REQUIRED";
- case 0xC00002E5: return "STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY";
- case 0xC00002E6: return "STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS";
- case 0xC00002E7: return "STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED";
- case 0xC00002E8: return "STATUS_MULTIPLE_FAULT_VIOLATION";
- case 0xC0000300: return "STATUS_NOT_SUPPORTED_ON_SBS";
- case 0xC0009898: return "STATUS_WOW_ASSERTION";
- case 0xC0010001: return "DBG_NO_STATE_CHANGE";
- case 0xC0010002: return "DBG_APP_NOT_IDLE";
- case 0xC0020001: return "RPC_NT_INVALID_STRING_BINDING";
- case 0xC0020002: return "RPC_NT_WRONG_KIND_OF_BINDING";
- case 0xC0020003: return "RPC_NT_INVALID_BINDING";
- case 0xC0020004: return "RPC_NT_PROTSEQ_NOT_SUPPORTED";
- case 0xC0020005: return "RPC_NT_INVALID_RPC_PROTSEQ";
- case 0xC0020006: return "RPC_NT_INVALID_STRING_UUID";
- case 0xC0020007: return "RPC_NT_INVALID_ENDPOINT_FORMAT";
- case 0xC0020008: return "RPC_NT_INVALID_NET_ADDR";
- case 0xC0020009: return "RPC_NT_NO_ENDPOINT_FOUND";
- case 0xC002000A: return "RPC_NT_INVALID_TIMEOUT";
- case 0xC002000B: return "RPC_NT_OBJECT_NOT_FOUND";
- case 0xC002000C: return "RPC_NT_ALREADY_REGISTERED";
- case 0xC002000D: return "RPC_NT_TYPE_ALREADY_REGISTERED";
- case 0xC002000E: return "RPC_NT_ALREADY_LISTENING";
- case 0xC002000F: return "RPC_NT_NO_PROTSEQS_REGISTERED";
- case 0xC0020010: return "RPC_NT_NOT_LISTENING";
- case 0xC0020011: return "RPC_NT_UNKNOWN_MGR_TYPE";
- case 0xC0020012: return "RPC_NT_UNKNOWN_IF";
- case 0xC0020013: return "RPC_NT_NO_BINDINGS";
- case 0xC0020014: return "RPC_NT_NO_PROTSEQS";
- case 0xC0020015: return "RPC_NT_CANT_CREATE_ENDPOINT";
- case 0xC0020016: return "RPC_NT_OUT_OF_RESOURCES";
- case 0xC0020017: return "RPC_NT_SERVER_UNAVAILABLE";
- case 0xC0020018: return "RPC_NT_SERVER_TOO_BUSY";
- case 0xC0020019: return "RPC_NT_INVALID_NETWORK_OPTIONS";
- case 0xC002001A: return "RPC_NT_NO_CALL_ACTIVE";
- case 0xC002001B: return "RPC_NT_CALL_FAILED";
- case 0xC002001C: return "RPC_NT_CALL_FAILED_DNE";
- case 0xC002001D: return "RPC_NT_PROTOCOL_ERROR";
- case 0xC002001F: return "RPC_NT_UNSUPPORTED_TRANS_SYN";
- case 0xC0020021: return "RPC_NT_UNSUPPORTED_TYPE";
- case 0xC0020022: return "RPC_NT_INVALID_TAG";
- case 0xC0020023: return "RPC_NT_INVALID_BOUND";
- case 0xC0020024: return "RPC_NT_NO_ENTRY_NAME";
- case 0xC0020025: return "RPC_NT_INVALID_NAME_SYNTAX";
- case 0xC0020026: return "RPC_NT_UNSUPPORTED_NAME_SYNTAX";
- case 0xC0020028: return "RPC_NT_UUID_NO_ADDRESS";
- case 0xC0020029: return "RPC_NT_DUPLICATE_ENDPOINT";
- case 0xC002002A: return "RPC_NT_UNKNOWN_AUTHN_TYPE";
- case 0xC002002B: return "RPC_NT_MAX_CALLS_TOO_SMALL";
- case 0xC002002C: return "RPC_NT_STRING_TOO_LONG";
- case 0xC002002D: return "RPC_NT_PROTSEQ_NOT_FOUND";
- case 0xC002002E: return "RPC_NT_PROCNUM_OUT_OF_RANGE";
- case 0xC002002F: return "RPC_NT_BINDING_HAS_NO_AUTH";
- case 0xC0020030: return "RPC_NT_UNKNOWN_AUTHN_SERVICE";
- case 0xC0020031: return "RPC_NT_UNKNOWN_AUTHN_LEVEL";
- case 0xC0020032: return "RPC_NT_INVALID_AUTH_IDENTITY";
- case 0xC0020033: return "RPC_NT_UNKNOWN_AUTHZ_SERVICE";
- case 0xC0020034: return "EPT_NT_INVALID_ENTRY";
- case 0xC0020035: return "EPT_NT_CANT_PERFORM_OP";
- case 0xC0020036: return "EPT_NT_NOT_REGISTERED";
- case 0xC0020037: return "RPC_NT_NOTHING_TO_EXPORT";
- case 0xC0020038: return "RPC_NT_INCOMPLETE_NAME";
- case 0xC0020039: return "RPC_NT_INVALID_VERS_OPTION";
- case 0xC002003A: return "RPC_NT_NO_MORE_MEMBERS";
- case 0xC002003B: return "RPC_NT_NOT_ALL_OBJS_UNEXPORTED";
- case 0xC002003C: return "RPC_NT_INTERFACE_NOT_FOUND";
- case 0xC002003D: return "RPC_NT_ENTRY_ALREADY_EXISTS";
- case 0xC002003E: return "RPC_NT_ENTRY_NOT_FOUND";
- case 0xC002003F: return "RPC_NT_NAME_SERVICE_UNAVAILABLE";
- case 0xC0020040: return "RPC_NT_INVALID_NAF_ID";
- case 0xC0020041: return "RPC_NT_CANNOT_SUPPORT";
- case 0xC0020042: return "RPC_NT_NO_CONTEXT_AVAILABLE";
- case 0xC0020043: return "RPC_NT_INTERNAL_ERROR";
- case 0xC0020044: return "RPC_NT_ZERO_DIVIDE";
- case 0xC0020045: return "RPC_NT_ADDRESS_ERROR";
- case 0xC0020046: return "RPC_NT_FP_DIV_ZERO";
- case 0xC0020047: return "RPC_NT_FP_UNDERFLOW";
- case 0xC0020048: return "RPC_NT_FP_OVERFLOW";
- case 0xC0030001: return "RPC_NT_NO_MORE_ENTRIES";
- case 0xC0030002: return "RPC_NT_SS_CHAR_TRANS_OPEN_FAIL";
- case 0xC0030003: return "RPC_NT_SS_CHAR_TRANS_SHORT_FILE";
- case 0xC0030004: return "RPC_NT_SS_IN_NULL_CONTEXT";
- case 0xC0030005: return "RPC_NT_SS_CONTEXT_MISMATCH";
- case 0xC0030006: return "RPC_NT_SS_CONTEXT_DAMAGED";
- case 0xC0030007: return "RPC_NT_SS_HANDLES_MISMATCH";
- case 0xC0030008: return "RPC_NT_SS_CANNOT_GET_CALL_HANDLE";
- case 0xC0030009: return "RPC_NT_NULL_REF_POINTER";
- case 0xC003000A: return "RPC_NT_ENUM_VALUE_OUT_OF_RANGE";
- case 0xC003000B: return "RPC_NT_BYTE_COUNT_TOO_SMALL";
- case 0xC003000C: return "RPC_NT_BAD_STUB_DATA";
- case 0xC0020049: return "RPC_NT_CALL_IN_PROGRESS";
- case 0xC002004A: return "RPC_NT_NO_MORE_BINDINGS";
- case 0xC002004B: return "RPC_NT_GROUP_MEMBER_NOT_FOUND";
- case 0xC002004C: return "EPT_NT_CANT_CREATE";
- case 0xC002004D: return "RPC_NT_INVALID_OBJECT";
- case 0xC002004F: return "RPC_NT_NO_INTERFACES";
- case 0xC0020050: return "RPC_NT_CALL_CANCELLED";
- case 0xC0020051: return "RPC_NT_BINDING_INCOMPLETE";
- case 0xC0020052: return "RPC_NT_COMM_FAILURE";
- case 0xC0020053: return "RPC_NT_UNSUPPORTED_AUTHN_LEVEL";
- case 0xC0020054: return "RPC_NT_NO_PRINC_NAME";
- case 0xC0020055: return "RPC_NT_NOT_RPC_ERROR";
- case 0x40020056: return "RPC_NT_UUID_LOCAL_ONLY";
- case 0xC0020057: return "RPC_NT_SEC_PKG_ERROR";
- case 0xC0020058: return "RPC_NT_NOT_CANCELLED";
- case 0xC0030059: return "RPC_NT_INVALID_ES_ACTION";
- case 0xC003005A: return "RPC_NT_WRONG_ES_VERSION";
- case 0xC003005B: return "RPC_NT_WRONG_STUB_VERSION";
- case 0xC003005C: return "RPC_NT_INVALID_PIPE_OBJECT";
- case 0xC003005D: return "RPC_NT_INVALID_PIPE_OPERATION";
- case 0xC003005E: return "RPC_NT_WRONG_PIPE_VERSION";
- case 0xC003005F: return "RPC_NT_PIPE_CLOSED";
- case 0xC0030060: return "RPC_NT_PIPE_DISCIPLINE_ERROR";
- case 0xC0030061: return "RPC_NT_PIPE_EMPTY";
- case 0xC0020062: return "RPC_NT_INVALID_ASYNC_HANDLE";
- case 0xC0020063: return "RPC_NT_INVALID_ASYNC_CALL";
- case 0x400200AF: return "RPC_NT_SEND_INCOMPLETE";
- case 0xC0140001: return "STATUS_ACPI_INVALID_OPCODE";
- case 0xC0140002: return "STATUS_ACPI_STACK_OVERFLOW";
- case 0xC0140003: return "STATUS_ACPI_ASSERT_FAILED";
- case 0xC0140004: return "STATUS_ACPI_INVALID_INDEX";
- case 0xC0140005: return "STATUS_ACPI_INVALID_ARGUMENT";
- case 0xC0140006: return "STATUS_ACPI_FATAL";
- case 0xC0140007: return "STATUS_ACPI_INVALID_SUPERNAME";
- case 0xC0140008: return "STATUS_ACPI_INVALID_ARGTYPE";
- case 0xC0140009: return "STATUS_ACPI_INVALID_OBJTYPE";
- case 0xC014000A: return "STATUS_ACPI_INVALID_TARGETTYPE";
- case 0xC014000B: return "STATUS_ACPI_INCORRECT_ARGUMENT_COUNT";
- case 0xC014000C: return "STATUS_ACPI_ADDRESS_NOT_MAPPED";
- case 0xC014000D: return "STATUS_ACPI_INVALID_EVENTTYPE";
- case 0xC014000E: return "STATUS_ACPI_HANDLER_COLLISION";
- case 0xC014000F: return "STATUS_ACPI_INVALID_DATA";
- case 0xC0140010: return "STATUS_ACPI_INVALID_REGION";
- case 0xC0140011: return "STATUS_ACPI_INVALID_ACCESS_SIZE";
- case 0xC0140012: return "STATUS_ACPI_ACQUIRE_GLOBAL_LOCK";
- case 0xC0140013: return "STATUS_ACPI_ALREADY_INITIALIZED";
- case 0xC0140014: return "STATUS_ACPI_NOT_INITIALIZED";
- case 0xC0140015: return "STATUS_ACPI_INVALID_MUTEX_LEVEL";
- case 0xC0140016: return "STATUS_ACPI_MUTEX_NOT_OWNED";
- case 0xC0140017: return "STATUS_ACPI_MUTEX_NOT_OWNER";
- case 0xC0140018: return "STATUS_ACPI_RS_ACCESS";
- case 0xC0140019: return "STATUS_ACPI_INVALID_TABLE";
- case 0xC0140020: return "STATUS_ACPI_REG_HANDLER_FAILED";
- case 0xC0140021: return "STATUS_ACPI_POWER_REQUEST_FAILED";
- case 0xC00A0001: return "STATUS_CTX_WINSTATION_NAME_INVALID";
- case 0xC00A0002: return "STATUS_CTX_INVALID_PD";
- case 0xC00A0003: return "STATUS_CTX_PD_NOT_FOUND";
- case 0x400A0004: return "STATUS_CTX_CDM_CONNECT";
- case 0x400A0005: return "STATUS_CTX_CDM_DISCONNECT";
- case 0xC00A0006: return "STATUS_CTX_CLOSE_PENDING";
- case 0xC00A0007: return "STATUS_CTX_NO_OUTBUF";
- case 0xC00A0008: return "STATUS_CTX_MODEM_INF_NOT_FOUND";
- case 0xC00A0009: return "STATUS_CTX_INVALID_MODEMNAME";
- case 0xC00A000A: return "STATUS_CTX_RESPONSE_ERROR";
- case 0xC00A000B: return "STATUS_CTX_MODEM_RESPONSE_TIMEOUT";
- case 0xC00A000C: return "STATUS_CTX_MODEM_RESPONSE_NO_CARRIER";
- case 0xC00A000D: return "STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE";
- case 0xC00A000E: return "STATUS_CTX_MODEM_RESPONSE_BUSY";
- case 0xC00A000F: return "STATUS_CTX_MODEM_RESPONSE_VOICE";
- case 0xC00A0010: return "STATUS_CTX_TD_ERROR";
- case 0xC00A0012: return "STATUS_CTX_LICENSE_CLIENT_INVALID";
- case 0xC00A0013: return "STATUS_CTX_LICENSE_NOT_AVAILABLE";
- case 0xC00A0014: return "STATUS_CTX_LICENSE_EXPIRED";
- case 0xC00A0015: return "STATUS_CTX_WINSTATION_NOT_FOUND";
- case 0xC00A0016: return "STATUS_CTX_WINSTATION_NAME_COLLISION";
- case 0xC00A0017: return "STATUS_CTX_WINSTATION_BUSY";
- case 0xC00A0018: return "STATUS_CTX_BAD_VIDEO_MODE";
- case 0xC00A0022: return "STATUS_CTX_GRAPHICS_INVALID";
- case 0xC00A0024: return "STATUS_CTX_NOT_CONSOLE";
- case 0xC00A0026: return "STATUS_CTX_CLIENT_QUERY_TIMEOUT";
- case 0xC00A0027: return "STATUS_CTX_CONSOLE_DISCONNECT";
- case 0xC00A0028: return "STATUS_CTX_CONSOLE_CONNECT";
- case 0xC00A002A: return "STATUS_CTX_SHADOW_DENIED";
- case 0xC00A002B: return "STATUS_CTX_WINSTATION_ACCESS_DENIED";
- case 0xC00A002E: return "STATUS_CTX_INVALID_WD";
- case 0xC00A002F: return "STATUS_CTX_WD_NOT_FOUND";
- case 0xC00A0030: return "STATUS_CTX_SHADOW_INVALID";
- case 0xC00A0031: return "STATUS_CTX_SHADOW_DISABLED";
- case 0xC00A0032: return "STATUS_RDP_PROTOCOL_ERROR";
- case 0xC00A0033: return "STATUS_CTX_CLIENT_LICENSE_NOT_SET";
- case 0xC00A0034: return "STATUS_CTX_CLIENT_LICENSE_IN_USE";
- case 0xC0040035: return "STATUS_PNP_BAD_MPS_TABLE";
- case 0xC0040036: return "STATUS_PNP_TRANSLATION_FAILED";
- case 0xC0040037: return "STATUS_PNP_IRQ_TRANSLATION_FAILED";
- default: return "STATUS_UNKNOWN";
- }
-}
-
-
-/*
- * KsPrintf
- * This function is variable-argument, level-sensitive debug print routine.
- * If the specified debug level for the print statement is lower or equal
- * to the current debug level, the message will be printed.
- *
- * Arguments:
- * DebugPrintLevel - Specifies at which debugging level the string should
- * be printed
- * DebugMessage - Variable argument ascii c string
- *
- * Return Value:
- * N/A
- *
- * NOTES:
- * N/A
- */
-
-VOID
-KsPrintf(
- LONG DebugPrintLevel,
- PCHAR DebugMessage,
- ...
- )
-{
- va_list ap;
-
- va_start(ap, DebugMessage);
-
- if (DebugPrintLevel <= KsDebugLevel)
- {
- CHAR buffer[0x200];
-
- vsprintf(buffer, DebugMessage, ap);
-
- KdPrint(("TID:%8.8x: %s", PsGetCurrentThread(), buffer));
- }
-
- va_end(ap);
-
-} // KsPrint()
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-
-const CHAR *dos_file_prefix = "\\??\\";
-
-/*
- * cfs_filp_open
- * To open or create a file in kernel mode
- *
- * Arguments:
- * name: name of the file to be opened or created, no dos path prefix
- * flags: open/creation attribute options
- * mode: access mode/permission to open or create
- * err: error code
- *
- * Return Value:
- * the pointer to the cfs_file_t or NULL if it fails
- *
- * Notes:
- * N/A
- */
-
-cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
-{
- cfs_file_t * fp = NULL;
-
- NTSTATUS Status;
-
- OBJECT_ATTRIBUTES ObjectAttributes;
- HANDLE FileHandle;
- IO_STATUS_BLOCK IoStatus;
- ACCESS_MASK DesiredAccess;
- ULONG CreateDisposition;
- ULONG ShareAccess;
- ULONG CreateOptions;
-
- USHORT NameLength = 0;
- USHORT PrefixLength = 0;
-
- UNICODE_STRING UnicodeName;
- PWCHAR UnicodeString = NULL;
-
- ANSI_STRING AnsiName;
- PUCHAR AnsiString = NULL;
-
- /* Analyze the flags settings */
-
- if (cfs_is_flag_set(flags, O_WRONLY)) {
- DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE);
- ShareAccess = 0;
- } else if (cfs_is_flag_set(flags, O_RDWR)) {
- DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE);
- ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE;
- } else {
- DesiredAccess = (GENERIC_READ | SYNCHRONIZE);
- ShareAccess = FILE_SHARE_READ;
- }
-
- if (cfs_is_flag_set(flags, O_CREAT)) {
- if (cfs_is_flag_set(flags, O_EXCL)) {
- CreateDisposition = FILE_CREATE;
- } else {
- CreateDisposition = FILE_OPEN_IF;
- }
- } else {
- CreateDisposition = FILE_OPEN;
- }
-
- if (cfs_is_flag_set(flags, O_TRUNC)) {
- if (cfs_is_flag_set(flags, O_EXCL)) {
- CreateDisposition = FILE_OVERWRITE;
- } else {
- CreateDisposition = FILE_OVERWRITE_IF;
- }
- }
-
- CreateOptions = 0;
-
- if (cfs_is_flag_set(flags, O_DIRECTORY)) {
- cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE);
- }
-
- if (cfs_is_flag_set(flags, O_SYNC)) {
- cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH);
- }
-
- if (cfs_is_flag_set(flags, O_DIRECT)) {
- cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING);
- }
-
- /* Initialize the unicode path name for the specified file */
-
- NameLength = (USHORT)strlen(name);
-
- if (name[0] != '\\') {
- PrefixLength = (USHORT)strlen(dos_file_prefix);
- }
-
- AnsiString = cfs_alloc( sizeof(CHAR) * (NameLength + PrefixLength + 1),
- CFS_ALLOC_ZERO);
- if (NULL == AnsiString) {
- if (err) *err = -ENOMEM;
- return NULL;
- }
-
- UnicodeString = cfs_alloc( sizeof(WCHAR) * (NameLength + PrefixLength + 1),
- CFS_ALLOC_ZERO);
-
- if (NULL == UnicodeString) {
- if (err) *err = -ENOMEM;
- cfs_free(AnsiString);
- return NULL;
- }
-
- if (PrefixLength) {
- RtlCopyMemory(&AnsiString[0], dos_file_prefix , PrefixLength);
- }
-
- RtlCopyMemory(&AnsiString[PrefixLength], name, NameLength);
- NameLength += PrefixLength;
-
- AnsiName.MaximumLength = NameLength + 1;
- AnsiName.Length = NameLength;
- AnsiName.Buffer = AnsiString;
-
- UnicodeName.MaximumLength = (NameLength + 1) * sizeof(WCHAR);
- UnicodeName.Length = 0;
- UnicodeName.Buffer = (PWSTR)UnicodeString;
-
- RtlAnsiStringToUnicodeString(&UnicodeName, &AnsiName, FALSE);
-
- /* Setup the object attributes structure for the file. */
-
- InitializeObjectAttributes(
- &ObjectAttributes,
- &UnicodeName,
- OBJ_CASE_INSENSITIVE |
- OBJ_KERNEL_HANDLE,
- NULL,
- NULL );
-
- /* Now to open or create the file now */
-
- Status = ZwCreateFile(
- &FileHandle,
- DesiredAccess,
- &ObjectAttributes,
- &IoStatus,
- 0,
- FILE_ATTRIBUTE_NORMAL,
- ShareAccess,
- CreateDisposition,
- CreateOptions,
- NULL,
- 0 );
-
- /* Check the returned status of IoStatus... */
-
- if (!NT_SUCCESS(IoStatus.Status)) {
- *err = cfs_error_code(IoStatus.Status);
- cfs_free(UnicodeString);
- cfs_free(AnsiString);
- return NULL;
- }
-
- /* Allocate the cfs_file_t: libcfs file object */
-
- fp = cfs_alloc(sizeof(cfs_file_t) + NameLength, CFS_ALLOC_ZERO);
-
- if (NULL == fp) {
- Status = ZwClose(FileHandle);
- ASSERT(NT_SUCCESS(Status));
- *err = -ENOMEM;
- cfs_free(UnicodeString);
- cfs_free(AnsiString);
- return NULL;
- }
-
- fp->f_handle = FileHandle;
- strcpy(fp->f_name, name);
- fp->f_flags = flags;
- fp->f_mode = (mode_t)mode;
- fp->f_count = 1;
- *err = 0;
-
- /* free the memory of temporary name strings */
- cfs_free(UnicodeString);
- cfs_free(AnsiString);
-
- return fp;
-}
-
-
-/*
- * cfs_filp_close
- * To close the opened file and release the filp structure
- *
- * Arguments:
- * fp: the pointer of the cfs_file_t strcture
- *
- * Return Value:
- * ZERO: on success
- * Non-Zero: on failure
- *
- * Notes:
- * N/A
- */
-
-int cfs_filp_close(cfs_file_t *fp)
-{
- NTSTATUS Status;
-
- ASSERT(fp != NULL);
- ASSERT(fp->f_handle != NULL);
-
- /* release the file handle */
- Status = ZwClose(fp->f_handle);
- ASSERT(NT_SUCCESS(Status));
-
- /* free the file flip structure */
- cfs_free(fp);
- return 0;
-}
-
-
-/*
- * cfs_filp_read
- * To read data from the opened file
- *
- * Arguments:
- * fp: the pointer of the cfs_file_t strcture
- * buf: pointer to the buffer to contain the data
- * nbytes: size in bytes to be read from the file
- * pos: offset in file where reading starts, if pos
- * NULL, then read from current file offset
- *
- * Return Value:
- * Actual size read into the buffer in success case
- * Error code in failure case
- *
- * Notes:
- * N/A
- */
-
-int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos)
-{
- LARGE_INTEGER address;
- NTSTATUS Status;
- IO_STATUS_BLOCK IoStatus;
-
- int rc = 0;
-
- /* Read data from the file into the specified buffer */
-
- if (pos != NULL) {
- address.QuadPart = *pos;
- } else {
- address.QuadPart = fp->f_pos;
- }
-
- Status = ZwReadFile( fp->f_handle,
- 0,
- NULL,
- NULL,
- &IoStatus,
- buf,
- nbytes,
- &address,
- NULL );
-
- if (!NT_SUCCESS(IoStatus.Status)) {
- rc = cfs_error_code(IoStatus.Status);
- } else {
- rc = (int)IoStatus.Information;
- fp->f_pos = address.QuadPart + rc;
-
- if (pos != NULL) {
- *pos = fp->f_pos;
- }
- }
-
- return rc;
-}
-
-
-/*
- * cfs_filp_wrtie
- * To write specified data to the opened file
- *
- * Arguments:
- * fp: the pointer of the cfs_file_t strcture
- * buf: pointer to the buffer containing the data
- * nbytes: size in bytes to be written to the file
- * pos: offset in file where writing starts, if pos
- * NULL, then write to current file offset
- *
- * Return Value:
- * Actual size written into the buffer in success case
- * Error code in failure case
- *
- * Notes:
- * N/A
- */
-
-int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos)
-{
- LARGE_INTEGER address;
- NTSTATUS Status;
- IO_STATUS_BLOCK IoStatus;
- int rc = 0;
-
- /* Write user specified data into the file */
-
- if (pos != NULL) {
- address.QuadPart = *pos;
- } else {
- address.QuadPart = fp->f_pos;
- }
-
- Status = ZwWriteFile( fp->f_handle,
- 0,
- NULL,
- NULL,
- &IoStatus,
- buf,
- nbytes,
- &address,
- NULL );
-
- if (!NT_SUCCESS(Status)) {
- rc = cfs_error_code(Status);
- } else {
- rc = (int)IoStatus.Information;
- fp->f_pos = address.QuadPart + rc;
-
- if (pos != NULL) {
- *pos = fp->f_pos;
- }
- }
-
- return rc;
-}
-
-
-NTSTATUS
-CompletionRoutine(
- PDEVICE_OBJECT DeviceObject,
- PIRP Irp,
- PVOID Context)
-{
- /* copy the IoStatus result */
- *Irp->UserIosb = Irp->IoStatus;
-
- /* singal the event we set */
- KeSetEvent(Irp->UserEvent, 0, FALSE);
-
- /* free the Irp we allocated */
- IoFreeIrp(Irp);
-
- return STATUS_MORE_PROCESSING_REQUIRED;
-}
-
-
-/*
- * cfs_filp_fsync
- * To sync the dirty data of the file to disk
- *
- * Arguments:
- * fp: the pointer of the cfs_file_t strcture
- *
- * Return Value:
- * Zero: in success case
- * Error code: in failure case
- *
- * Notes:
- * Nt kernel doesn't export such a routine to flush a file,
- * we must allocate our own Irp and issue it to the file
- * system driver.
- */
-
-int cfs_filp_fsync(cfs_file_t *fp)
-{
-
- PFILE_OBJECT FileObject;
- PDEVICE_OBJECT DeviceObject;
-
- NTSTATUS Status;
- PIRP Irp;
- KEVENT Event;
- IO_STATUS_BLOCK IoSb;
- PIO_STACK_LOCATION IrpSp;
-
- /* get the FileObject and the DeviceObject */
-
- Status = ObReferenceObjectByHandle(
- fp->f_handle,
- FILE_WRITE_DATA,
- NULL,
- KernelMode,
- (PVOID*)&FileObject,
- NULL );
-
- if (!NT_SUCCESS(Status)) {
- return cfs_error_code(Status);
- }
-
- DeviceObject = IoGetRelatedDeviceObject(FileObject);
-
- /* allocate a new Irp */
-
- Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE);
-
- if (!Irp) {
-
- ObDereferenceObject(FileObject);
- return -ENOMEM;
- }
-
- /* intialize the event */
- KeInitializeEvent(&Event, SynchronizationEvent, FALSE);
-
- /* setup the Irp */
- Irp->UserEvent = &Event;
- Irp->UserIosb = &IoSb;
- Irp->RequestorMode = KernelMode;
-
- Irp->Tail.Overlay.Thread = PsGetCurrentThread();
- Irp->Tail.Overlay.OriginalFileObject = FileObject;
-
- /* setup the Irp stack location */
- IrpSp = IoGetNextIrpStackLocation(Irp);
-
- IrpSp->MajorFunction = IRP_MJ_FLUSH_BUFFERS;
- IrpSp->DeviceObject = DeviceObject;
- IrpSp->FileObject = FileObject;
-
- IoSetCompletionRoutine(Irp, CompletionRoutine, 0, TRUE, TRUE, TRUE);
-
-
- /* issue the Irp to the underlying file system driver */
- IoCallDriver(DeviceObject, Irp);
-
- /* wait until it is finished */
- KeWaitForSingleObject(&Event, Executive, KernelMode, TRUE, 0);
-
- /* cleanup our reference on it */
- ObDereferenceObject(FileObject);
-
- Status = IoSb.Status;
-
- return cfs_error_code(Status);
-}
-
-/*
- * cfs_get_file
- * To increase the reference of the file object
- *
- * Arguments:
- * fp: the pointer of the cfs_file_t strcture
- *
- * Return Value:
- * Zero: in success case
- * Non-Zero: in failure case
- *
- * Notes:
- * N/A
- */
-
-int cfs_get_file(cfs_file_t *fp)
-{
- InterlockedIncrement(&(fp->f_count));
- return 0;
-}
-
-
-/*
- * cfs_put_file
- * To decrease the reference of the file object
- *
- * Arguments:
- * fp: the pointer of the cfs_file_t strcture
- *
- * Return Value:
- * Zero: in success case
- * Non-Zero: in failure case
- *
- * Notes:
- * N/A
- */
-
-int cfs_put_file(cfs_file_t *fp)
-{
- if (InterlockedDecrement(&(fp->f_count)) == 0) {
- cfs_filp_close(fp);
- }
-
- return 0;
-}
-
-
-/*
- * cfs_file_count
- * To query the reference count of the file object
- *
- * Arguments:
- * fp: the pointer of the cfs_file_t strcture
- *
- * Return Value:
- * the reference count of the file object
- *
- * Notes:
- * N/A
- */
-
-int cfs_file_count(cfs_file_t *fp)
-{
- return (int)(fp->f_count);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-
-
-#if _X86_
-
-void __declspec (naked) FASTCALL
-atomic_add(
- int i,
- atomic_t *v
- )
-{
- // ECX = i
- // EDX = v ; [EDX][0] = v->counter
-
- __asm {
- lock add dword ptr [edx][0], ecx
- ret
- }
-}
-
-void __declspec (naked) FASTCALL
-atomic_sub(
- int i,
- atomic_t *v
- )
-{
- // ECX = i
- // EDX = v ; [EDX][0] = v->counter
-
- __asm {
- lock sub dword ptr [edx][0], ecx
- ret
- }
-}
-
-void __declspec (naked) FASTCALL
-atomic_inc(
- atomic_t *v
- )
-{
- //InterlockedIncrement((PULONG)(&((v)->counter)));
-
- //` ECX = v ; [ECX][0] = v->counter
-
- __asm {
- lock inc dword ptr [ecx][0]
- ret
- }
-}
-
-void __declspec (naked) FASTCALL
-atomic_dec(
- atomic_t *v
- )
-{
- // ECX = v ; [ECX][0] = v->counter
-
- __asm {
- lock dec dword ptr [ecx][0]
- ret
- }
-}
-
-int __declspec (naked) FASTCALL
-atomic_sub_and_test(
- int i,
- atomic_t *v
- )
-{
-
- // ECX = i
- // EDX = v ; [EDX][0] = v->counter
-
- __asm {
- xor eax, eax
- lock sub dword ptr [edx][0], ecx
- sete al
- ret
- }
-}
-
-int __declspec (naked) FASTCALL
-atomic_inc_and_test(
- atomic_t *v
- )
-{
- // ECX = v ; [ECX][0] = v->counter
-
- __asm {
- xor eax, eax
- lock inc dword ptr [ecx][0]
- sete al
- ret
- }
-}
-
-int __declspec (naked) FASTCALL
-atomic_dec_and_test(
- atomic_t *v
- )
-{
- // ECX = v ; [ECX][0] = v->counter
-
- __asm {
- xor eax, eax
- lock dec dword ptr [ecx][0]
- sete al
- ret
- }
-}
-
-#else
-
-void FASTCALL
-atomic_add(
- int i,
- atomic_t *v
- )
-{
- InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (i));
-}
-
-void FASTCALL
-atomic_sub(
- int i,
- atomic_t *v
- )
-{
- InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (-1*i));
-}
-
-void FASTCALL
-atomic_inc(
- atomic_t *v
- )
-{
- InterlockedIncrement((PULONG)(&((v)->counter)));
-}
-
-void FASTCALL
-atomic_dec(
- atomic_t *v
- )
-{
- InterlockedDecrement((PULONG)(&((v)->counter)));
-}
-
-int FASTCALL
-atomic_sub_and_test(
- int i,
- atomic_t *v
- )
-{
- int counter, result;
-
- do {
-
- counter = v->counter;
- result = counter - i;
-
- } while ( InterlockedCompareExchange(
- &(v->counter),
- result,
- counter) != counter);
-
- return (result == 0);
-}
-
-int FASTCALL
-atomic_inc_and_test(
- atomic_t *v
- )
-{
- int counter, result;
-
- do {
-
- counter = v->counter;
- result = counter + 1;
-
- } while ( InterlockedCompareExchange(
- &(v->counter),
- result,
- counter) != counter);
-
- return (result == 0);
-}
-
-int FASTCALL
-atomic_dec_and_test(
- atomic_t *v
- )
-{
- int counter, result;
-
- do {
-
- counter = v->counter;
- result = counter + 1;
-
- } while ( InterlockedCompareExchange(
- &(v->counter),
- result,
- counter) != counter);
-
- return (result == 0);
-}
-
-#endif
-
-
-/*
- * rw spinlock
- */
-
-
-void
-rwlock_init(rwlock_t * rwlock)
-{
- spin_lock_init(&rwlock->guard);
- rwlock->count = 0;
-}
-
-void
-rwlock_fini(rwlock_t * rwlock)
-{
-}
-
-void
-read_lock(rwlock_t * rwlock)
-{
- cfs_task_t * task = cfs_current();
- PTASK_SLOT slot = NULL;
-
- if (!task) {
- /* should bugchk here */
- cfs_enter_debugger();
- return;
- }
-
- slot = CONTAINING_RECORD(task, TASK_SLOT, task);
- ASSERT(slot->Magic == TASKSLT_MAGIC);
-
- slot->irql = KeRaiseIrqlToDpcLevel();
-
- while (TRUE) {
- spin_lock(&rwlock->guard);
- if (rwlock->count >= 0)
- break;
- spin_unlock(&rwlock->guard);
- }
-
- rwlock->count++;
- spin_unlock(&rwlock->guard);
-}
-
-void
-read_unlock(rwlock_t * rwlock)
-{
- cfs_task_t * task = cfs_current();
- PTASK_SLOT slot = NULL;
-
- if (!task) {
- /* should bugchk here */
- cfs_enter_debugger();
- return;
- }
-
- slot = CONTAINING_RECORD(task, TASK_SLOT, task);
- ASSERT(slot->Magic == TASKSLT_MAGIC);
-
- spin_lock(&rwlock->guard);
- ASSERT(rwlock->count > 0);
- rwlock->count--;
- if (rwlock < 0) {
- cfs_enter_debugger();
- }
- spin_unlock(&rwlock->guard);
-
- KeLowerIrql(slot->irql);
-}
-
-void
-write_lock(rwlock_t * rwlock)
-{
- cfs_task_t * task = cfs_current();
- PTASK_SLOT slot = NULL;
-
- if (!task) {
- /* should bugchk here */
- cfs_enter_debugger();
- return;
- }
-
- slot = CONTAINING_RECORD(task, TASK_SLOT, task);
- ASSERT(slot->Magic == TASKSLT_MAGIC);
-
- slot->irql = KeRaiseIrqlToDpcLevel();
-
- while (TRUE) {
- spin_lock(&rwlock->guard);
- if (rwlock->count == 0)
- break;
- spin_unlock(&rwlock->guard);
- }
-
- rwlock->count = -1;
- spin_unlock(&rwlock->guard);
-}
-
-void
-write_unlock(rwlock_t * rwlock)
-{
- cfs_task_t * task = cfs_current();
- PTASK_SLOT slot = NULL;
-
- if (!task) {
- /* should bugchk here */
- cfs_enter_debugger();
- return;
- }
-
- slot = CONTAINING_RECORD(task, TASK_SLOT, task);
- ASSERT(slot->Magic == TASKSLT_MAGIC);
-
- spin_lock(&rwlock->guard);
- ASSERT(rwlock->count == -1);
- rwlock->count = 0;
- spin_unlock(&rwlock->guard);
-
- KeLowerIrql(slot->irql);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-# define DEBUG_SUBSYSTEM S_LNET
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-
-
-cfs_mem_cache_t *cfs_page_t_slab = NULL;
-cfs_mem_cache_t *cfs_page_p_slab = NULL;
-
-/*
- * cfs_alloc_page
- * To allocate the cfs_page_t and also 1 page of memory
- *
- * Arguments:
- * flags: the allocation options
- *
- * Return Value:
- * pointer to the cfs_page_t strcture in success or
- * NULL in failure case
- *
- * Notes:
- * N/A
- */
-
-cfs_page_t * cfs_alloc_page(int flags)
-{
- cfs_page_t *pg;
- pg = cfs_mem_cache_alloc(cfs_page_t_slab, 0);
-
- if (NULL == pg) {
- cfs_enter_debugger();
- return NULL;
- }
-
- memset(pg, 0, sizeof(cfs_page_t));
- pg->addr = cfs_mem_cache_alloc(cfs_page_p_slab, 0);
- atomic_set(&pg->count, 1);
-
- if (pg->addr) {
- if (cfs_is_flag_set(flags, CFS_ALLOC_ZERO)) {
- memset(pg->addr, 0, CFS_PAGE_SIZE);
- }
- } else {
- cfs_enter_debugger();
- cfs_mem_cache_free(cfs_page_t_slab, pg);
- pg = NULL;
- }
-
- return pg;
-}
-
-/*
- * cfs_free_page
- * To free the cfs_page_t including the page
- *
- * Arguments:
- * pg: pointer to the cfs_page_t strcture
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-void cfs_free_page(cfs_page_t *pg)
-{
- ASSERT(pg != NULL);
- ASSERT(pg->addr != NULL);
- ASSERT(atomic_read(&pg->count) <= 1);
-
- cfs_mem_cache_free(cfs_page_p_slab, pg->addr);
- cfs_mem_cache_free(cfs_page_t_slab, pg);
-}
-
-
-/*
- * cfs_alloc
- * To allocate memory from system pool
- *
- * Arguments:
- * nr_bytes: length in bytes of the requested buffer
- * flags: flags indiction
- *
- * Return Value:
- * NULL: if there's no enough memory space in system
- * the address of the allocated memory in success.
- *
- * Notes:
- * This operation can be treated as atomic.
- */
-
-void *
-cfs_alloc(size_t nr_bytes, u_int32_t flags)
-{
- void *ptr;
-
- /* Ignore the flags: always allcoate from NonPagedPool */
-
- ptr = ExAllocatePoolWithTag(NonPagedPool, nr_bytes, 'Lufs');
-
- if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) {
- memset(ptr, 0, nr_bytes);
- }
-
- if (!ptr) {
- cfs_enter_debugger();
- }
-
- return ptr;
-}
-
-/*
- * cfs_free
- * To free the sepcified memory to system pool
- *
- * Arguments:
- * addr: pointer to the buffer to be freed
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * This operation can be treated as atomic.
- */
-
-void
-cfs_free(void *addr)
-{
- ExFreePool(addr);
-}
-
-/*
- * cfs_alloc_large
- * To allocate large block of memory from system pool
- *
- * Arguments:
- * nr_bytes: length in bytes of the requested buffer
- *
- * Return Value:
- * NULL: if there's no enough memory space in system
- * the address of the allocated memory in success.
- *
- * Notes:
- * N/A
- */
-
-void *
-cfs_alloc_large(size_t nr_bytes)
-{
- return cfs_alloc(nr_bytes, 0);
-}
-
-/*
- * cfs_free_large
- * To free the sepcified memory to system pool
- *
- * Arguments:
- * addr: pointer to the buffer to be freed
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void
-cfs_free_large(void *addr)
-{
- cfs_free(addr);
-}
-
-
-/*
- * cfs_mem_cache_create
- * To create a SLAB cache
- *
- * Arguments:
- * name: name string of the SLAB cache to be created
- * size: size in bytes of SLAB entry buffer
- * offset: offset in the page
- * flags: SLAB creation flags
-*
- * Return Value:
- * The poitner of cfs_memory_cache structure in success.
- * NULL pointer in failure case.
- *
- * Notes:
- * 1, offset won't be used here.
- * 2, it could be better to induce a lock to protect the access of the
- * SLAB structure on SMP if there's not outside lock protection.
- * 3, parameters C/D are removed.
- */
-
-cfs_mem_cache_t *
-cfs_mem_cache_create(
- const char * name,
- size_t size,
- size_t offset,
- unsigned long flags
- )
-{
- cfs_mem_cache_t * kmc = NULL;
-
- /* The name of the SLAB could not exceed 20 chars */
-
- if (name && strlen(name) >= 20) {
- goto errorout;
- }
-
- /* Allocate and initialize the SLAB strcture */
-
- kmc = cfs_alloc (sizeof(cfs_mem_cache_t), 0);
-
- if (NULL == kmc) {
- goto errorout;
- }
-
- memset(kmc, 0, sizeof(cfs_mem_cache_t));
-
- kmc->flags = flags;
-
- if (name) {
- strcpy(&kmc->name[0], name);
- }
-
- /* Initialize the corresponding LookAside list */
-
- ExInitializeNPagedLookasideList(
- &(kmc->npll),
- NULL,
- NULL,
- 0,
- size,
- 'pnmk',
- 0);
-
-errorout:
-
- return kmc;
-}
-
-/*
- * cfs_mem_cache_destroy
- * To destroy the unused SLAB cache
- *
- * Arguments:
- * kmc: the SLAB cache to be destroied.
- *
- * Return Value:
- * 0: in success case.
- * 1: in failure case.
- *
- * Notes:
- * N/A
- */
-
-int cfs_mem_cache_destroy (cfs_mem_cache_t * kmc)
-{
- ASSERT(kmc != NULL);
-
- ExDeleteNPagedLookasideList(&(kmc->npll));
-
- cfs_free(kmc);
-
- return 0;
-}
-
-/*
- * cfs_mem_cache_alloc
- * To allocate an object (LookAside entry) from the SLAB
- *
- * Arguments:
- * kmc: the SLAB cache to be allocated from.
- * flags: flags for allocation options
- *
- * Return Value:
- * object buffer address: in success case.
- * NULL: in failure case.
- *
- * Notes:
- * N/A
- */
-
-void *cfs_mem_cache_alloc(cfs_mem_cache_t * kmc, int flags)
-{
- void *buf = NULL;
-
- buf = ExAllocateFromNPagedLookasideList(&(kmc->npll));
-
- return buf;
-}
-
-/*
- * cfs_mem_cache_free
- * To free an object (LookAside entry) to the SLAB cache
- *
- * Arguments:
- * kmc: the SLAB cache to be freed to.
- * buf: the pointer to the object to be freed.
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_mem_cache_free(cfs_mem_cache_t * kmc, void * buf)
-{
- ExFreeToNPagedLookasideList(&(kmc->npll), buf);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-
-#define DEBUG_SUBSYSTEM S_LIBCFS
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-#define LIBCFS_MINOR 240
-
-int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
-{
- struct libcfs_ioctl_hdr *hdr;
- struct libcfs_ioctl_data *data;
- int err;
- ENTRY;
-
- hdr = (struct libcfs_ioctl_hdr *)buf;
- data = (struct libcfs_ioctl_data *)buf;
-
- err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
- if (err)
- RETURN(err);
-
- if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) {
- CERROR(("LIBCFS: version mismatch kernel vs application\n"));
- RETURN(-EINVAL);
- }
-
- if (hdr->ioc_len + buf >= end) {
- CERROR(("LIBCFS: user buffer exceeds kernel buffer\n"));
- RETURN(-EINVAL);
- }
-
- if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) {
- CERROR(("LIBCFS: user buffer too small for ioctl\n"));
- RETURN(-EINVAL);
- }
-
- err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
- if (err)
- RETURN(err);
-
- if (libcfs_ioctl_is_invalid(data)) {
- CERROR(("LIBCFS: ioctl not correctly formatted\n"));
- RETURN(-EINVAL);
- }
-
- if (data->ioc_inllen1)
- data->ioc_inlbuf1 = &data->ioc_bulk[0];
-
- if (data->ioc_inllen2)
- data->ioc_inlbuf2 = &data->ioc_bulk[0] +
- size_round(data->ioc_inllen1);
-
- RETURN(0);
-}
-
-extern struct cfs_psdev_ops libcfs_psdev_ops;
-
-static int
-libcfs_psdev_open(cfs_file_t * file)
-{
- struct libcfs_device_userstate **pdu = NULL;
- int rc = 0;
-
- pdu = (struct libcfs_device_userstate **)&file->private_data;
- if (libcfs_psdev_ops.p_open != NULL)
- rc = libcfs_psdev_ops.p_open(0, (void *)pdu);
- else
- return (-EPERM);
- return rc;
-}
-
-/* called when closing /dev/device */
-static int
-libcfs_psdev_release(cfs_file_t * file)
-{
- struct libcfss_device_userstate *pdu;
- int rc = 0;
-
- pdu = file->private_data;
- if (libcfs_psdev_ops.p_close != NULL)
- rc = libcfs_psdev_ops.p_close(0, (void *)pdu);
- else
- rc = -EPERM;
- return rc;
-}
-
-static int
-libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg)
-{
- struct cfs_psdev_file pfile;
- int rc = 0;
-
- if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
- _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR ||
- _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) {
- CDEBUG(D_IOCTL, ("invalid ioctl ( type %d, nr %d, size %d )\n",
- _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)));
- return (-EINVAL);
- }
-
- /* Handle platform-dependent IOC requests */
- switch (cmd) {
- case IOC_LIBCFS_PANIC:
- if (!capable (CAP_SYS_BOOT))
- return (-EPERM);
- CERROR(("debugctl-invoked panic"));
- KeBugCheckEx('LUFS', (ULONG_PTR)libcfs_ioctl, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL);
-
- return (0);
- case IOC_LIBCFS_MEMHOG:
-
- if (!capable (CAP_SYS_ADMIN))
- return -EPERM;
- break;
- }
-
- pfile.off = 0;
- pfile.private_data = file->private_data;
- if (libcfs_psdev_ops.p_ioctl != NULL)
- rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
- else
- rc = -EPERM;
- return (rc);
-}
-
-static struct file_operations libcfs_fops = {
- /* lseek: */ NULL,
- /* read: */ NULL,
- /* write: */ NULL,
- /* ioctl: */ libcfs_ioctl,
- /* open: */ libcfs_psdev_open,
- /* release:*/ libcfs_psdev_release
-};
-
-cfs_psdev_t libcfs_dev = {
- LIBCFS_MINOR,
- "lnet",
- &libcfs_fops
-};
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-
-/*
- * Thread routines
- */
-
-/*
- * cfs_thread_proc
- * Lustre thread procedure wrapper routine (It's an internal routine)
- *
- * Arguments:
- * context: a structure of cfs_thread_context_t, containing
- * all the necessary parameters
- *
- * Return Value:
- * void: N/A
- *
- * Notes:
- * N/A
- */
-
-void
-cfs_thread_proc(
- void * context
- )
-{
- cfs_thread_context_t * thread_context =
- (cfs_thread_context_t *) context;
-
- /* Execute the specified function ... */
-
- if (thread_context->func) {
- (thread_context->func)(thread_context->arg);
- }
-
- /* Free the context memory */
-
- cfs_free(context);
-
- /* Terminate this system thread */
-
- PsTerminateSystemThread(STATUS_SUCCESS);
-}
-
-/*
- * cfs_kernel_thread
- * Create a system thread to execute the routine specified
- *
- * Arguments:
- * func: function to be executed in the thread
- * arg: argument transferred to func function
- * flag: thread creation flags.
- *
- * Return Value:
- * int: 0 on success or error codes
- *
- * Notes:
- * N/A
- */
-
-int cfs_kernel_thread(int (*func)(void *), void *arg, int flag)
-{
- cfs_handle_t thread = NULL;
- NTSTATUS status;
- cfs_thread_context_t * context = NULL;
-
- /* Allocate the context to be transferred to system thread */
-
- context = cfs_alloc(sizeof(cfs_thread_context_t), CFS_ALLOC_ZERO);
-
- if (!context) {
- return -ENOMEM;
- }
-
- context->func = func;
- context->arg = arg;
-
- /* Create system thread with the cfs_thread_proc wrapper */
-
- status = PsCreateSystemThread(
- &thread,
- (ACCESS_MASK)0L,
- 0, 0, 0,
- cfs_thread_proc,
- context);
-
- if (!NT_SUCCESS(status)) {
-
-
- cfs_free(context);
-
- /* We need translate the nt status to linux error code */
-
- return cfs_error_code(status);
- }
-
- //
- // Query the thread id of the newly created thread
- //
-
- ZwClose(thread);
-
- return 0;
-}
-
-
-/*
- * Symbols routines
- */
-
-
-static CFS_DECL_RWSEM(cfs_symbol_lock);
-CFS_LIST_HEAD(cfs_symbol_list);
-
-int MPSystem = FALSE;
-
-/*
- * cfs_symbol_get
- * To query the specified symbol form the symbol table
- *
- * Arguments:
- * name: the symbol name to be queried
- *
- * Return Value:
- * If the symbol is in the table, return the address of it.
- * If not, return NULL.
- *
- * Notes:
- * N/A
- */
-
-void *
-cfs_symbol_get(const char *name)
-{
- struct list_head *walker;
- struct cfs_symbol *sym = NULL;
-
- down_read(&cfs_symbol_lock);
- list_for_each(walker, &cfs_symbol_list) {
- sym = list_entry (walker, struct cfs_symbol, sym_list);
- if (!strcmp(sym->name, name)) {
- sym->ref ++;
- break;
- }
- }
- up_read(&cfs_symbol_lock);
-
- if (sym != NULL)
- return sym->value;
-
- return NULL;
-}
-
-/*
- * cfs_symbol_put
- * To decrease the reference of the specified symbol
- *
- * Arguments:
- * name: the symbol name to be dereferred
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void
-cfs_symbol_put(const char *name)
-{
- struct list_head *walker;
- struct cfs_symbol *sym = NULL;
-
- down_read(&cfs_symbol_lock);
- list_for_each(walker, &cfs_symbol_list) {
- sym = list_entry (walker, struct cfs_symbol, sym_list);
- if (!strcmp(sym->name, name)) {
- LASSERT(sym->ref > 0);
- sym->ref--;
- break;
- }
- }
- up_read(&cfs_symbol_lock);
-
- LASSERT(sym != NULL);
-}
-
-
-/*
- * cfs_symbol_register
- * To register the specified symbol infromation
- *
- * Arguments:
- * name: the symbol name to be dereferred
- * value: the value that the symbol stands for
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * Zero: Succeed to register
- * Non-Zero: Fail to register the symbol
- */
-
-int
-cfs_symbol_register(const char *name, const void *value)
-{
- struct list_head *walker;
- struct cfs_symbol *sym = NULL;
- struct cfs_symbol *new = NULL;
-
- new = cfs_alloc(sizeof(struct cfs_symbol), CFS_ALLOC_ZERO);
- if (!new) {
- return (-ENOMEM);
- }
- strncpy(new->name, name, CFS_SYMBOL_LEN);
- new->value = (void *)value;
- new->ref = 0;
- CFS_INIT_LIST_HEAD(&new->sym_list);
-
- down_write(&cfs_symbol_lock);
- list_for_each(walker, &cfs_symbol_list) {
- sym = list_entry (walker, struct cfs_symbol, sym_list);
- if (!strcmp(sym->name, name)) {
- up_write(&cfs_symbol_lock);
- cfs_free(new);
- return 0; // alreay registerred
- }
- }
- list_add_tail(&new->sym_list, &cfs_symbol_list);
- up_write(&cfs_symbol_lock);
-
- return 0;
-}
-
-/*
- * cfs_symbol_unregister
- * To unregister/remove the specified symbol
- *
- * Arguments:
- * name: the symbol name to be dereferred
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void
-cfs_symbol_unregister(const char *name)
-{
- struct list_head *walker;
- struct list_head *nxt;
- struct cfs_symbol *sym = NULL;
-
- down_write(&cfs_symbol_lock);
- list_for_each_safe(walker, nxt, &cfs_symbol_list) {
- sym = list_entry (walker, struct cfs_symbol, sym_list);
- if (!strcmp(sym->name, name)) {
- LASSERT(sym->ref == 0);
- list_del (&sym->sym_list);
- cfs_free(sym);
- break;
- }
- }
- up_write(&cfs_symbol_lock);
-}
-
-/*
- * cfs_symbol_clean
- * To clean all the symbols
- *
- * Arguments:
- * N/A
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void
-cfs_symbol_clean()
-{
- struct list_head *walker;
- struct cfs_symbol *sym = NULL;
-
- down_write(&cfs_symbol_lock);
- list_for_each(walker, &cfs_symbol_list) {
- sym = list_entry (walker, struct cfs_symbol, sym_list);
- LASSERT(sym->ref == 0);
- list_del (&sym->sym_list);
- cfs_free(sym);
- }
- up_write(&cfs_symbol_lock);
- return;
-}
-
-
-
-/*
- * Timer routines
- */
-
-
-/* Timer dpc procedure */
-
-static void
-cfs_timer_dpc_proc (
- IN PKDPC Dpc,
- IN PVOID DeferredContext,
- IN PVOID SystemArgument1,
- IN PVOID SystemArgument2)
-{
- cfs_timer_t * timer;
- KIRQL Irql;
-
- timer = (cfs_timer_t *) DeferredContext;
-
- /* clear the flag */
- KeAcquireSpinLock(&(timer->Lock), &Irql);
- cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED);
- KeReleaseSpinLock(&(timer->Lock), Irql);
-
- /* call the user specified timer procedure */
- timer->proc((unsigned long)(timer->arg));
-}
-
-/*
- * cfs_timer_init
- * To initialize the cfs_timer_t
- *
- * Arguments:
- * timer: the cfs_timer to be initialized
- * func: the timer callback procedure
- * arg: argument for the callback proc
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_timer_init(cfs_timer_t *timer, void (*func)(unsigned long), void *arg)
-{
- memset(timer, 0, sizeof(cfs_timer_t));
-
- timer->proc = func;
- timer->arg = arg;
-
- KeInitializeSpinLock(&(timer->Lock));
- KeInitializeTimer(&timer->Timer);
- KeInitializeDpc (&timer->Dpc, cfs_timer_dpc_proc, timer);
-
- cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_INITED);
-}
-
-/*
- * cfs_timer_done
- * To finialize the cfs_timer_t (unused)
- *
- * Arguments:
- * timer: the cfs_timer to be cleaned up
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_timer_done(cfs_timer_t *timer)
-{
- return;
-}
-
-/*
- * cfs_timer_arm
- * To schedule the timer while touching @deadline
- *
- * Arguments:
- * timer: the cfs_timer to be freed
- * dealine: timeout value to wake up the timer
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_timer_arm(cfs_timer_t *timer, cfs_time_t deadline)
-{
- LARGE_INTEGER timeout;
- KIRQL Irql;
-
- KeAcquireSpinLock(&(timer->Lock), &Irql);
- if (!cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)){
-
- timeout.QuadPart = (LONGLONG)-1*1000*1000*10/HZ*deadline;
-
- if (KeSetTimer(&timer->Timer, timeout, &timer->Dpc )) {
- cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED);
- }
-
- timer->deadline = deadline;
- }
-
- KeReleaseSpinLock(&(timer->Lock), Irql);
-}
-
-/*
- * cfs_timer_disarm
- * To discard the timer to be scheduled
- *
- * Arguments:
- * timer: the cfs_timer to be discarded
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_timer_disarm(cfs_timer_t *timer)
-{
- KIRQL Irql;
-
- KeAcquireSpinLock(&(timer->Lock), &Irql);
- KeCancelTimer(&(timer->Timer));
- cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED);
- KeReleaseSpinLock(&(timer->Lock), Irql);
-}
-
-
-/*
- * cfs_timer_is_armed
- * To check the timer is scheduled or not
- *
- * Arguments:
- * timer: the cfs_timer to be checked
- *
- * Return Value:
- * 1: if it's armed.
- * 0: if it's not.
- *
- * Notes:
- * N/A
- */
-
-int cfs_timer_is_armed(cfs_timer_t *timer)
-{
- int rc = 0;
- KIRQL Irql;
-
- KeAcquireSpinLock(&(timer->Lock), &Irql);
- if (cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)) {
- rc = 1;
- }
- KeReleaseSpinLock(&(timer->Lock), Irql);
-
- return rc;
-}
-
-/*
- * cfs_timer_deadline
- * To query the deadline of the timer
- *
- * Arguments:
- * timer: the cfs_timer to be queried
- *
- * Return Value:
- * the deadline value
- *
- * Notes:
- * N/A
- */
-
-cfs_time_t cfs_timer_deadline(cfs_timer_t * timer)
-{
- return timer->deadline;
-}
-
-/*
- * daemonize routine stub
- */
-
-void cfs_daemonize(char *str)
-{
- return;
-}
-
-/*
- * routine related with sigals
- */
-
-cfs_sigset_t cfs_get_blockedsigs()
-{
- return 0;
-}
-
-cfs_sigset_t cfs_block_allsigs()
-{
- return 0;
-}
-
-cfs_sigset_t cfs_block_sigs(sigset_t bit)
-{
- return 0;
-}
-
-void cfs_restore_sigs(cfs_sigset_t old)
-{
-}
-
-int cfs_signal_pending(void)
-{
- return 0;
-}
-
-void cfs_clear_sigpending(void)
-{
- return;
-}
-
-/**
- ** Initialize routines
- **/
-
-int
-libcfs_arch_init(void)
-{
- int rc;
-
- spinlock_t lock;
- /* Workground to check the system is MP build or UP build */
- spin_lock_init(&lock);
- spin_lock(&lock);
- MPSystem = (int)lock.lock;
- /* MP build system: it's a real spin, for UP build system, it
- only raises the IRQL to DISPATCH_LEVEL */
- spin_unlock(&lock);
-
- /* create slab memory caches for page alloctors */
- cfs_page_t_slab = cfs_mem_cache_create(
- "CPGT", sizeof(cfs_page_t), 0, 0 );
-
- cfs_page_p_slab = cfs_mem_cache_create(
- "CPGP", CFS_PAGE_SIZE, 0, 0 );
-
- if ( cfs_page_t_slab == NULL ||
- cfs_page_p_slab == NULL ){
- rc = -ENOMEM;
- goto errorout;
- }
-
- rc = init_task_manager();
-
- if (rc != 0) {
- cfs_enter_debugger();
- KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing task manager ...\n"));
- goto errorout;
- }
-
- /* initialize the proc file system */
- rc = proc_init_fs();
-
- if (rc != 0) {
- cfs_enter_debugger();
- KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing proc fs ...\n"));
- cleanup_task_manager();
- goto errorout;
- }
-
- /* initialize the tdi data */
- rc = ks_init_tdi_data();
-
- if (rc != 0) {
- cfs_enter_debugger();
- KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing tdi ...\n"));
- proc_destroy_fs();
- cleanup_task_manager();
- goto errorout;
- }
-
-errorout:
-
- if (rc != 0) {
- /* destroy the taskslot cache slab */
- if (cfs_page_t_slab) {
- cfs_mem_cache_destroy(cfs_page_t_slab);
- }
- if (cfs_page_p_slab) {
- cfs_mem_cache_destroy(cfs_page_p_slab);
- }
- }
-
- return rc;
-}
-
-void
-libcfs_arch_cleanup(void)
-{
- /* finialize the tdi data */
- ks_fini_tdi_data();
-
- /* detroy the whole proc fs tree and nodes */
- proc_destroy_fs();
-
- /* destroy the taskslot cache slab */
- if (cfs_page_t_slab) {
- cfs_mem_cache_destroy(cfs_page_t_slab);
- }
-
- if (cfs_page_p_slab) {
- cfs_mem_cache_destroy(cfs_page_p_slab);
- }
-
- return;
-}
-
-EXPORT_SYMBOL(libcfs_arch_init);
-EXPORT_SYMBOL(libcfs_arch_cleanup);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-#include "tracefile.h"
-
-#ifdef __KERNEL__
-
-
-/*
- * /proc emulator routines ...
- */
-
-/* The root node of the proc fs emulation: /proc */
-cfs_proc_entry_t * proc_fs_root = NULL;
-
-
-/* The sys root: /proc/sys */
-cfs_proc_entry_t * proc_sys_root = NULL;
-
-
-/* The sys root: /proc/dev | to implement misc device */
-
-cfs_proc_entry_t * proc_dev_root = NULL;
-
-
-/* SLAB object for cfs_proc_entry_t allocation */
-
-cfs_mem_cache_t * proc_entry_cache = NULL;
-
-/* root node for sysctl table */
-
-cfs_sysctl_table_header_t root_table_header;
-
-/* The global lock to protect all the access */
-
-#if LIBCFS_PROCFS_SPINLOCK
-spinlock_t proc_fs_lock;
-
-#define INIT_PROCFS_LOCK() spin_lock_init(&proc_fs_lock)
-#define LOCK_PROCFS() spin_lock(&proc_fs_lock)
-#define UNLOCK_PROCFS() spin_unlock(&proc_fs_lock)
-
-#else
-
-mutex_t proc_fs_lock;
-
-#define INIT_PROCFS_LOCK() init_mutex(&proc_fs_lock)
-#define LOCK_PROCFS() mutex_down(&proc_fs_lock)
-#define UNLOCK_PROCFS() mutex_up(&proc_fs_lock)
-
-#endif
-
-static ssize_t
-proc_file_read(struct file * file, const char * buf, size_t nbytes, loff_t *ppos)
-{
- char *page;
- ssize_t retval=0;
- int eof=0;
- ssize_t n, count;
- char *start;
- cfs_proc_entry_t * dp;
-
- dp = (cfs_proc_entry_t *) file->private_data;
- if (!(page = (char*) cfs_alloc(CFS_PAGE_SIZE, 0)))
- return -ENOMEM;
-
- while ((nbytes > 0) && !eof) {
-
- count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
-
- start = NULL;
- if (dp->read_proc) {
- n = dp->read_proc( page, &start, (long)*ppos,
- count, &eof, dp->data);
- } else
- break;
-
- if (!start) {
- /*
- * For proc files that are less than 4k
- */
- start = page + *ppos;
- n -= (ssize_t)(*ppos);
- if (n <= 0)
- break;
- if (n > count)
- n = count;
- }
- if (n == 0)
- break; /* End of file */
- if (n < 0) {
- if (retval == 0)
- retval = n;
- break;
- }
-
- n -= copy_to_user((void *)buf, start, n);
- if (n == 0) {
- if (retval == 0)
- retval = -EFAULT;
- break;
- }
-
- *ppos += n;
- nbytes -= n;
- buf += n;
- retval += n;
- }
- cfs_free(page);
-
- return retval;
-}
-
-static ssize_t
-proc_file_write(struct file * file, const char * buffer,
- size_t count, loff_t *ppos)
-{
- cfs_proc_entry_t * dp;
-
- dp = (cfs_proc_entry_t *) file->private_data;
-
- if (!dp->write_proc)
- return -EIO;
-
- /* FIXME: does this routine need ppos? probably... */
- return dp->write_proc(file, buffer, count, dp->data);
-}
-
-struct file_operations proc_file_operations = {
- /*lseek:*/ NULL, //proc_file_lseek,
- /*read:*/ proc_file_read,
- /*write:*/ proc_file_write,
- /*ioctl:*/ NULL,
- /*open:*/ NULL,
- /*release:*/ NULL
-};
-
-/* allocate proc entry block */
-
-cfs_proc_entry_t *
-proc_alloc_entry()
-{
- cfs_proc_entry_t * entry = NULL;
-
- entry = cfs_mem_cache_alloc(proc_entry_cache, 0);
- if (!entry) {
- return NULL;
- }
-
- memset(entry, 0, sizeof(cfs_proc_entry_t));
-
- entry->magic = CFS_PROC_ENTRY_MAGIC;
- RtlInitializeSplayLinks(&(entry->s_link));
- entry->proc_fops = &proc_file_operations;
-
- return entry;
-}
-
-/* free the proc entry block */
-
-void
-proc_free_entry(cfs_proc_entry_t * entry)
-
-{
- ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC);
-
- cfs_mem_cache_free(proc_entry_cache, entry);
-}
-
-/* dissect the path string for a given full proc path */
-
-void
-proc_dissect_name(
- char *path,
- char **first,
- int *first_len,
- char **remain
- )
-{
- int i = 0, j = 0, len = 0;
-
- *first = *remain = NULL;
- *first_len = 0;
-
- len = strlen(path);
-
- while (i < len && (path[i] == '/')) i++;
-
- if (i < len) {
-
- *first = path + i;
- while (i < len && (path[i] != '/')) i++;
- *first_len = (path + i - *first);
-
- if (i + 1 < len) {
- *remain = path + i + 1;
- }
- }
-}
-
-/* search the children entries of the parent entry */
-
-cfs_proc_entry_t *
-proc_search_splay (
- cfs_proc_entry_t * parent,
- char * name
- )
-{
- cfs_proc_entry_t * node;
- PRTL_SPLAY_LINKS link;
-
- ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC);
- ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY));
-
- link = parent->root;
-
- while (link) {
-
- ANSI_STRING ename,nname;
- long result;
-
- node = CONTAINING_RECORD(link, cfs_proc_entry_t, s_link);
-
- ASSERT(node->magic == CFS_PROC_ENTRY_MAGIC);
-
- /* Compare the prefix in the tree with the full name */
-
- RtlInitAnsiString(&ename, name);
- RtlInitAnsiString(&nname, node->name);
-
- result = RtlCompareString(&nname, &ename,TRUE);
-
- if (result > 0) {
-
- /* The prefix is greater than the full name
- so we go down the left child */
-
- link = RtlLeftChild(link);
-
- } else if (result < 0) {
-
- /* The prefix is less than the full name
- so we go down the right child */
- //
-
- link = RtlRightChild(link);
-
- } else {
-
- /* We got the entry in the splay tree and
- make it root node instead */
-
- parent->root = RtlSplay(link);
-
- return node;
- }
-
- /* we need continue searching down the tree ... */
- }
-
- /* There's no the exptected entry in the splay tree */
-
- return NULL;
-}
-
-int
-proc_insert_splay (
- cfs_proc_entry_t * parent,
- cfs_proc_entry_t * child
- )
-{
- cfs_proc_entry_t * entry;
-
- ASSERT(parent != NULL && child != NULL);
- ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC);
- ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC);
- ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY));
-
- if (!parent->root) {
- parent->root = &(child->s_link);
- } else {
- entry = CONTAINING_RECORD(parent->root, cfs_proc_entry_t, s_link);
- while (TRUE) {
- long result;
- ANSI_STRING ename, cname;
-
- ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC);
-
- RtlInitAnsiString(&ename, entry->name);
- RtlInitAnsiString(&cname, child->name);
-
- result = RtlCompareString(&ename, &cname,TRUE);
-
- if (result == 0) {
- cfs_enter_debugger();
- if (entry == child) {
- break;
- }
- return FALSE;
- }
-
- if (result > 0) {
- if (RtlLeftChild(&entry->s_link) == NULL) {
- RtlInsertAsLeftChild(&entry->s_link, &child->s_link);
- break;
- } else {
- entry = CONTAINING_RECORD( RtlLeftChild(&entry->s_link),
- cfs_proc_entry_t, s_link);
- }
- } else {
- if (RtlRightChild(&entry->s_link) == NULL) {
- RtlInsertAsRightChild(&entry->s_link, &child->s_link);
- break;
- } else {
- entry = CONTAINING_RECORD( RtlRightChild(&entry->s_link),
- cfs_proc_entry_t, s_link );
- }
- }
- }
- }
-
- cfs_set_flag(child->flags, CFS_PROC_FLAG_ATTACHED);
- parent->nlink++;
-
- return TRUE;
-}
-
-
-/* remove a child entry from the splay tree */
-int
-proc_remove_splay (
- cfs_proc_entry_t * parent,
- cfs_proc_entry_t * child
- )
-{
- cfs_proc_entry_t * entry = NULL;
-
- ASSERT(parent != NULL && child != NULL);
- ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC);
- ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC);
- ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY));
- ASSERT(cfs_is_flag_set(child->flags, CFS_PROC_FLAG_ATTACHED));
-
- entry = proc_search_splay(parent, child->name);
-
- if (entry) {
- ASSERT(entry == child);
- parent->root = RtlDelete(&(entry->s_link));
- parent->nlink--;
- } else {
- cfs_enter_debugger();
- return FALSE;
- }
-
- return TRUE;
-}
-
-
-/* search a node inside the proc fs tree */
-
-cfs_proc_entry_t *
-proc_search_entry(
- char * name,
- cfs_proc_entry_t * root
- )
-{
- cfs_proc_entry_t * entry;
- cfs_proc_entry_t * parent;
- char *first, *remain;
- int flen;
- char *ename = NULL;
-
- parent = root;
- entry = NULL;
-
- ename = cfs_alloc(0x21, CFS_ALLOC_ZERO);
-
- if (ename == NULL) {
- goto errorout;
- }
-
-again:
-
- /* dissect the file name string */
- proc_dissect_name(name, &first, &flen, &remain);
-
- if (first) {
-
- if (flen >= 0x20) {
- cfs_enter_debugger();
- entry = NULL;
- goto errorout;
- }
-
- memset(ename, 0, 0x20);
- memcpy(ename, first, flen);
-
- entry = proc_search_splay(parent, ename);
-
- if (!entry) {
- goto errorout;
- }
-
- if (remain) {
- name = remain;
- parent = entry;
-
- goto again;
- }
- }
-
-errorout:
-
- if (ename) {
- cfs_free(ename);
- }
-
- return entry;
-}
-
-/* insert the path nodes to the proc fs tree */
-
-cfs_proc_entry_t *
-proc_insert_entry(
- char * name,
- cfs_proc_entry_t * root
- )
-{
- cfs_proc_entry_t *entry;
- cfs_proc_entry_t *parent;
- char *first, *remain;
- int flen;
- char ename[0x20];
-
- parent = root;
- entry = NULL;
-
-again:
-
- proc_dissect_name(name, &first, &flen, &remain);
-
- if (first) {
-
- if (flen >= 0x20) {
- return NULL;
- }
-
- memset(ename, 0, 0x20);
- memcpy(ename, first, flen);
-
- entry = proc_search_splay(parent, ename);
-
- if (!entry) {
- entry = proc_alloc_entry();
- memcpy(entry->name, ename, flen);
-
- if (entry) {
- if(!proc_insert_splay(parent, entry)) {
- proc_free_entry(entry);
- entry = NULL;
- }
- }
- }
-
- if (!entry) {
- return NULL;
- }
-
- if (remain) {
- entry->mode |= S_IFDIR | S_IRUGO | S_IXUGO;
- cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY);
- name = remain;
- parent = entry;
- goto again;
- }
- }
-
- return entry;
-}
-
-/* remove the path nodes from the proc fs tree */
-
-void
-proc_remove_entry(
- char * name,
- cfs_proc_entry_t * root
- )
-{
- cfs_proc_entry_t *entry;
- char *first, *remain;
- int flen;
- char ename[0x20];
-
- entry = NULL;
-
- proc_dissect_name(name, &first, &flen, &remain);
-
- if (first) {
-
- memset(ename, 0, 0x20);
- memcpy(ename, first, flen);
-
- entry = proc_search_splay(root, ename);
-
- if (entry) {
-
- if (remain) {
- ASSERT(S_ISDIR(entry->mode));
- proc_remove_entry(remain, entry);
- }
-
- if (!entry->nlink) {
- proc_remove_splay(root, entry);
- proc_free_entry(entry);
- }
- }
- } else {
- cfs_enter_debugger();
- }
-}
-
-/* create proc entry and insert it into the proc fs */
-
-cfs_proc_entry_t *
-create_proc_entry (
- char * name,
- mode_t mode,
- cfs_proc_entry_t * root
- )
-{
- cfs_proc_entry_t *parent = root;
- cfs_proc_entry_t *entry = NULL;
-
- if (S_ISDIR(mode)) {
- if ((mode & S_IALLUGO) == 0)
- mode |= S_IRUGO | S_IXUGO;
- } else {
- if ((mode & S_IFMT) == 0)
- mode |= S_IFREG;
- if ((mode & S_IALLUGO) == 0)
- mode |= S_IRUGO;
- }
-
- LOCK_PROCFS();
-
- ASSERT(NULL != proc_fs_root);
-
- if (!parent) {
- parent = proc_fs_root;
- }
-
- entry = proc_search_entry(name, parent);
-
- if (!entry) {
- entry = proc_insert_entry(name, parent);
- if (!entry) {
- /* Failed to create/insert the splay node ... */
- cfs_enter_debugger();
- goto errorout;
- }
- /* Initializing entry ... */
- entry->mode = mode;
-
- if (S_ISDIR(mode)) {
- cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY);
- }
- }
-
-errorout:
-
- UNLOCK_PROCFS();
-
- return entry;
-}
-
-
-/* search the specified entry form the proc fs */
-
-cfs_proc_entry_t *
-search_proc_entry(
- char * name,
- cfs_proc_entry_t * root
- )
-{
- cfs_proc_entry_t * entry;
-
- LOCK_PROCFS();
- if (root == NULL) {
- root = proc_fs_root;
- }
- entry = proc_search_entry(name, root);
- UNLOCK_PROCFS();
-
- return entry;
-}
-
-/* remove the entry from the proc fs */
-
-void
-remove_proc_entry(
- char * name,
- cfs_proc_entry_t * parent
- )
-{
- LOCK_PROCFS();
- if (parent == NULL) {
- parent = proc_fs_root;
- }
- proc_remove_entry(name, parent);
- UNLOCK_PROCFS();
-}
-
-
-void proc_destroy_splay(cfs_proc_entry_t * entry)
-{
- cfs_proc_entry_t * node;
-
- if (S_ISDIR(entry->mode)) {
-
- while (entry->root) {
- node = CONTAINING_RECORD(entry->root, cfs_proc_entry_t, s_link);
- entry->root = RtlDelete(&(node->s_link));
- proc_destroy_splay(node);
- }
- }
-
- proc_free_entry(entry);
-}
-
-
-/* destory the whole proc fs tree */
-
-void proc_destroy_fs()
-{
- LOCK_PROCFS();
-
- if (proc_fs_root) {
- proc_destroy_splay(proc_fs_root);
- }
-
- if (proc_entry_cache) {
- cfs_mem_cache_destroy(proc_entry_cache);
- }
-
- UNLOCK_PROCFS();
-}
-
-/* initilaize / build the proc fs tree */
-
-int proc_init_fs()
-{
- cfs_proc_entry_t * root = NULL;
-
- memset(&(root_table_header), 0, sizeof(struct ctl_table_header));
- INIT_LIST_HEAD(&(root_table_header.ctl_entry));
-
- INIT_PROCFS_LOCK();
- proc_entry_cache = cfs_mem_cache_create(
- NULL,
- sizeof(cfs_proc_entry_t),
- 0,
- 0
- );
-
- if (!proc_entry_cache) {
- return (-ENOMEM);
- }
-
- root = proc_alloc_entry();
-
- if (!root) {
- proc_destroy_fs();
- return (-ENOMEM);
- }
-
- root->magic = CFS_PROC_ENTRY_MAGIC;
- root->flags = CFS_PROC_FLAG_DIRECTORY;
- root->mode = S_IFDIR | S_IRUGO | S_IXUGO;
- root->nlink = 3; // root should never be deleted.
-
- root->name[0]='p';
- root->name[1]='r';
- root->name[2]='o';
- root->name[3]='c';
-
- proc_fs_root = root;
-
- proc_sys_root = create_proc_entry("sys", S_IFDIR, root);
-
- if (!proc_sys_root) {
- proc_free_entry(root);
- proc_fs_root = NULL;
- proc_destroy_fs();
- return (-ENOMEM);
- }
-
- proc_sys_root->nlink = 1;
-
- proc_dev_root = create_proc_entry("dev", S_IFDIR, root);
-
- if (!proc_dev_root) {
- proc_free_entry(proc_sys_root);
- proc_sys_root = NULL;
- proc_free_entry(proc_fs_root);
- proc_fs_root = NULL;
- proc_destroy_fs();
- return (-ENOMEM);
- }
-
- proc_dev_root->nlink = 1;
-
- return 0;
-}
-
-
-static ssize_t do_rw_proc(int write, struct file * file, char * buf,
- size_t count, loff_t *ppos)
-{
- int op;
- cfs_proc_entry_t *de;
- struct ctl_table *table;
- size_t res;
- ssize_t error;
-
- de = (cfs_proc_entry_t *) file->proc_dentry;
-
- if (!de || !de->data)
- return -ENOTDIR;
- table = (struct ctl_table *) de->data;
- if (!table || !table->proc_handler)
- return -ENOTDIR;
- op = (write ? 002 : 004);
-
-// if (ctl_perm(table, op))
-// return -EPERM;
-
- res = count;
-
- /*
- * FIXME: we need to pass on ppos to the handler.
- */
-
- error = (*table->proc_handler) (table, write, file, buf, &res);
- if (error)
- return error;
- return res;
-}
-
-static ssize_t proc_readsys(struct file * file, char * buf,
- size_t count, loff_t *ppos)
-{
- return do_rw_proc(0, file, buf, count, ppos);
-}
-
-static ssize_t proc_writesys(struct file * file, const char * buf,
- size_t count, loff_t *ppos)
-{
- return do_rw_proc(1, file, (char *) buf, count, ppos);
-}
-
-
-struct file_operations proc_sys_file_operations = {
- /*lseek:*/ NULL,
- /*read:*/ proc_readsys,
- /*write:*/ proc_writesys,
- /*ioctl:*/ NULL,
- /*open:*/ NULL,
- /*release:*/ NULL
-};
-
-
-/* Scan the sysctl entries in table and add them all into /proc */
-void register_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t * root)
-{
- cfs_proc_entry_t * de;
- int len;
- mode_t mode;
-
- for (; table->ctl_name; table++) {
- /* Can't do anything without a proc name. */
- if (!table->procname)
- continue;
- /* Maybe we can't do anything with it... */
- if (!table->proc_handler && !table->child) {
- printk(KERN_WARNING "SYSCTL: Can't register %s\n",
- table->procname);
- continue;
- }
-
- len = strlen(table->procname);
- mode = table->mode;
-
- de = NULL;
- if (table->proc_handler)
- mode |= S_IFREG;
- else {
- de = search_proc_entry(table->procname, root);
- if (de) {
- break;
- }
- /* If the subdir exists already, de is non-NULL */
- }
-
- if (!de) {
-
- de = create_proc_entry((char *)table->procname, mode, root);
- if (!de)
- continue;
- de->data = (void *) table;
- if (table->proc_handler) {
- de->proc_fops = &proc_sys_file_operations;
- }
- }
- table->de = de;
- if (de->mode & S_IFDIR)
- register_proc_table(table->child, de);
- }
-}
-
-
-/*
- * Unregister a /proc sysctl table and any subdirectories.
- */
-void unregister_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t *root)
-{
- cfs_proc_entry_t *de;
- for (; table->ctl_name; table++) {
- if (!(de = table->de))
- continue;
- if (de->mode & S_IFDIR) {
- if (!table->child) {
- printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
- continue;
- }
- unregister_proc_table(table->child, de);
-
- /* Don't unregister directories which still have entries.. */
- if (de->nlink)
- continue;
- }
-
- /* Don't unregister proc entries that are still being used.. */
- if (de->nlink)
- continue;
-
- table->de = NULL;
- remove_proc_entry((char *)table->procname, root);
- }
-}
-
-/* The generic string strategy routine: */
-int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen,
- void *oldval, size_t *oldlenp,
- void *newval, size_t newlen, void **context)
-{
- int l, len;
-
- if (!table->data || !table->maxlen)
- return -ENOTDIR;
-
- if (oldval && oldlenp) {
- if(get_user(len, oldlenp))
- return -EFAULT;
- if (len) {
- l = strlen(table->data);
- if (len > l) len = l;
- if (len >= table->maxlen)
- len = table->maxlen;
- if(copy_to_user(oldval, table->data, len))
- return -EFAULT;
- if(put_user(0, ((char *) oldval) + len))
- return -EFAULT;
- if(put_user(len, oldlenp))
- return -EFAULT;
- }
- }
- if (newval && newlen) {
- len = newlen;
- if (len > table->maxlen)
- len = table->maxlen;
- if(copy_from_user(table->data, newval, len))
- return -EFAULT;
- if (len == table->maxlen)
- len--;
- ((char *) table->data)[len] = 0;
- }
- return 0;
-}
-
-/**
- * simple_strtoul - convert a string to an unsigned long
- * @cp: The start of the string
- * @endp: A pointer to the end of the parsed string will be placed here
- * @base: The number base to use
- */
-unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
-{
- unsigned long result = 0, value;
-
- if (!base) {
- base = 10;
- if (*cp == '0') {
- base = 8;
- cp++;
- if ((*cp == 'x') && isxdigit(cp[1])) {
- cp++;
- base = 16;
- }
- }
- }
- while (isxdigit(*cp) &&
- (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
- result = result*base + value;
- cp++;
- }
- if (endp)
- *endp = (char *)cp;
- return result;
-}
-
-#define OP_SET 0
-#define OP_AND 1
-#define OP_OR 2
-#define OP_MAX 3
-#define OP_MIN 4
-
-
-static int do_proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp,
- void *buffer, size_t *lenp, int conv, int op)
-{
- int *i, vleft, first=1, neg, val;
- size_t left, len;
-
- #define TMPBUFLEN 20
- char buf[TMPBUFLEN], *p;
-
- if (!table->data || !table->maxlen || !*lenp)
- {
- *lenp = 0;
- return 0;
- }
-
- i = (int *) table->data;
- vleft = table->maxlen / sizeof(int);
- left = *lenp;
-
- for (; left && vleft--; i++, first=0) {
- if (write) {
- while (left) {
- char c;
- if(get_user(c,(char *) buffer))
- return -EFAULT;
- if (!isspace(c))
- break;
- left--;
- ((char *) buffer)++;
- }
- if (!left)
- break;
- neg = 0;
- len = left;
- if (len > TMPBUFLEN-1)
- len = TMPBUFLEN-1;
- if(copy_from_user(buf, buffer, len))
- return -EFAULT;
- buf[len] = 0;
- p = buf;
- if (*p == '-' && left > 1) {
- neg = 1;
- left--, p++;
- }
- if (*p < '0' || *p > '9')
- break;
- val = simple_strtoul(p, &p, 0) * conv;
- len = p-buf;
- if ((len < left) && *p && !isspace(*p))
- break;
- if (neg)
- val = -val;
- (char *)buffer += len;
- left -= len;
- switch(op) {
- case OP_SET: *i = val; break;
- case OP_AND: *i &= val; break;
- case OP_OR: *i |= val; break;
- case OP_MAX: if(*i < val)
- *i = val;
- break;
- case OP_MIN: if(*i > val)
- *i = val;
- break;
- }
- } else {
- p = buf;
- if (!first)
- *p++ = '\t';
- sprintf(p, "%d", (*i) / conv);
- len = strlen(buf);
- if (len > left)
- len = left;
- if(copy_to_user(buffer, buf, len))
- return -EFAULT;
- left -= len;
- (char *)buffer += len;
- }
- }
-
- if (!write && !first && left) {
- if(put_user('\n', (char *) buffer))
- return -EFAULT;
- left--, ((char *)buffer)++;
- }
- if (write) {
- p = (char *) buffer;
- while (left) {
- char c;
- if(get_user(c, p++))
- return -EFAULT;
- if (!isspace(c))
- break;
- left--;
- }
- }
- if (write && first)
- return -EINVAL;
- *lenp -= left;
- memset(&(filp->f_pos) , 0, sizeof(loff_t));
- filp->f_pos += (loff_t)(*lenp);
- return 0;
-}
-
-/**
- * proc_dointvec - read a vector of integers
- * @table: the sysctl table
- * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
- * @buffer: the user buffer
- * @lenp: the size of the user buffer
- *
- * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
- * values from/to the user buffer, treated as an ASCII string.
- *
- * Returns 0 on success.
- */
-int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp,
- void *buffer, size_t *lenp)
-{
- return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET);
-}
-
-
-/**
- * proc_dostring - read a string sysctl
- * @table: the sysctl table
- * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
- * @buffer: the user buffer
- * @lenp: the size of the user buffer
- *
- * Reads/writes a string from/to the user buffer. If the kernel
- * buffer provided is not large enough to hold the string, the
- * string is truncated. The copied string is %NULL-terminated.
- * If the string is being read by the user process, it is copied
- * and a newline '\n' is added. It is truncated if the buffer is
- * not large enough.
- *
- * Returns 0 on success.
- */
-int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp,
- void *buffer, size_t *lenp)
-{
- size_t len;
- char *p, c;
-
- if (!table->data || !table->maxlen || !*lenp ||
- (filp->f_pos && !write)) {
- *lenp = 0;
- return 0;
- }
-
- if (write) {
- len = 0;
- p = buffer;
- while (len < *lenp) {
- if(get_user(c, p++))
- return -EFAULT;
- if (c == 0 || c == '\n')
- break;
- len++;
- }
- if (len >= (size_t)table->maxlen)
- len = (size_t)table->maxlen-1;
- if(copy_from_user(table->data, buffer, len))
- return -EFAULT;
- ((char *) table->data)[len] = 0;
- filp->f_pos += *lenp;
- } else {
- len = (size_t)strlen(table->data);
- if (len > (size_t)table->maxlen)
- len = (size_t)table->maxlen;
- if (len > *lenp)
- len = *lenp;
- if (len)
- if(copy_to_user(buffer, table->data, len))
- return -EFAULT;
- if (len < *lenp) {
- if(put_user('\n', ((char *) buffer) + len))
- return -EFAULT;
- len++;
- }
- *lenp = len;
- filp->f_pos += len;
- }
- return 0;
-}
-
-/* Perform the actual read/write of a sysctl table entry. */
-int do_sysctl_strategy (cfs_sysctl_table_t *table,
- int *name, int nlen,
- void *oldval, size_t *oldlenp,
- void *newval, size_t newlen, void **context)
-{
- int op = 0, rc;
- size_t len;
-
- if (oldval)
- op |= 004;
- if (newval)
- op |= 002;
-
- if (table->strategy) {
- rc = table->strategy(table, name, nlen, oldval, oldlenp,
- newval, newlen, context);
- if (rc < 0)
- return rc;
- if (rc > 0)
- return 0;
- }
-
- /* If there is no strategy routine, or if the strategy returns
- * zero, proceed with automatic r/w */
- if (table->data && table->maxlen) {
- if (oldval && oldlenp) {
- get_user(len, oldlenp);
- if (len) {
- if (len > (size_t)table->maxlen)
- len = (size_t)table->maxlen;
- if(copy_to_user(oldval, table->data, len))
- return -EFAULT;
- if(put_user(len, oldlenp))
- return -EFAULT;
- }
- }
- if (newval && newlen) {
- len = newlen;
- if (len > (size_t)table->maxlen)
- len = (size_t)table->maxlen;
- if(copy_from_user(table->data, newval, len))
- return -EFAULT;
- }
- }
- return 0;
-}
-
-static int parse_table(int *name, int nlen,
- void *oldval, size_t *oldlenp,
- void *newval, size_t newlen,
- cfs_sysctl_table_t *table, void **context)
-{
- int n;
-
-repeat:
-
- if (!nlen)
- return -ENOTDIR;
- if (get_user(n, name))
- return -EFAULT;
- for ( ; table->ctl_name; table++) {
- if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
- int error;
- if (table->child) {
-/*
- if (ctl_perm(table, 001))
- return -EPERM;
-*/
- if (table->strategy) {
- error = table->strategy(
- table, name, nlen,
- oldval, oldlenp,
- newval, newlen, context);
- if (error)
- return error;
- }
- name++;
- nlen--;
- table = table->child;
- goto repeat;
- }
- error = do_sysctl_strategy(table, name, nlen,
- oldval, oldlenp,
- newval, newlen, context);
- return error;
- }
- }
- return -ENOTDIR;
-}
-
-int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp,
- void *newval, size_t newlen)
-{
- struct list_head *tmp;
-
- if (nlen <= 0 || nlen >= CTL_MAXNAME)
- return -ENOTDIR;
- if (oldval) {
- int old_len;
- if (!oldlenp || get_user(old_len, oldlenp))
- return -EFAULT;
- }
- tmp = &root_table_header.ctl_entry;
- do {
- struct ctl_table_header *head =
- list_entry(tmp, struct ctl_table_header, ctl_entry);
- void *context = NULL;
- int error = parse_table(name, nlen, oldval, oldlenp,
- newval, newlen, head->ctl_table,
- &context);
- if (context)
- cfs_free(context);
- if (error != -ENOTDIR)
- return error;
- tmp = tmp->next;
- } while (tmp != &root_table_header.ctl_entry);
- return -ENOTDIR;
-}
-
-/**
- * register_sysctl_table - register a sysctl heirarchy
- * @table: the top-level table structure
- * @insert_at_head: whether the entry should be inserted in front or at the end
- *
- * Register a sysctl table heirarchy. @table should be a filled in ctl_table
- * array. An entry with a ctl_name of 0 terminates the table.
- *
- * The members of the &ctl_table structure are used as follows:
- *
- * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
- * must be unique within that level of sysctl
- *
- * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
- * enter a sysctl file
- *
- * data - a pointer to data for use by proc_handler
- *
- * maxlen - the maximum size in bytes of the data
- *
- * mode - the file permissions for the /proc/sys file, and for sysctl(2)
- *
- * child - a pointer to the child sysctl table if this entry is a directory, or
- * %NULL.
- *
- * proc_handler - the text handler routine (described below)
- *
- * strategy - the strategy routine (described below)
- *
- * de - for internal use by the sysctl routines
- *
- * extra1, extra2 - extra pointers usable by the proc handler routines
- *
- * Leaf nodes in the sysctl tree will be represented by a single file
- * under /proc; non-leaf nodes will be represented by directories.
- *
- * sysctl(2) can automatically manage read and write requests through
- * the sysctl table. The data and maxlen fields of the ctl_table
- * struct enable minimal validation of the values being written to be
- * performed, and the mode field allows minimal authentication.
- *
- * More sophisticated management can be enabled by the provision of a
- * strategy routine with the table entry. This will be called before
- * any automatic read or write of the data is performed.
- *
- * The strategy routine may return
- *
- * < 0 - Error occurred (error is passed to user process)
- *
- * 0 - OK - proceed with automatic read or write.
- *
- * > 0 - OK - read or write has been done by the strategy routine, so
- * return immediately.
- *
- * There must be a proc_handler routine for any terminal nodes
- * mirrored under /proc/sys (non-terminals are handled by a built-in
- * directory handler). Several default handlers are available to
- * cover common cases -
- *
- * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
- * proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(),
- * proc_doulongvec_minmax()
- *
- * It is the handler's job to read the input buffer from user memory
- * and process it. The handler should return 0 on success.
- *
- * This routine returns %NULL on a failure to register, and a pointer
- * to the table header on success.
- */
-struct ctl_table_header *register_sysctl_table(cfs_sysctl_table_t * table,
- int insert_at_head)
-{
- struct ctl_table_header *tmp;
- tmp = cfs_alloc(sizeof(struct ctl_table_header), 0);
- if (!tmp)
- return NULL;
- tmp->ctl_table = table;
-
- INIT_LIST_HEAD(&tmp->ctl_entry);
- if (insert_at_head)
- list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
- else
- list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
-#ifdef CONFIG_PROC_FS
- register_proc_table(table, proc_sys_root);
-#endif
- return tmp;
-}
-
-/**
- * unregister_sysctl_table - unregister a sysctl table heirarchy
- * @header: the header returned from register_sysctl_table
- *
- * Unregisters the sysctl table and all children. proc entries may not
- * actually be removed until they are no longer used by anyone.
- */
-void unregister_sysctl_table(struct ctl_table_header * header)
-{
- list_del(&header->ctl_entry);
-#ifdef CONFIG_PROC_FS
- unregister_proc_table(header->ctl_table, proc_sys_root);
-#endif
- cfs_free(header);
-}
-
-
-int cfs_psdev_register(cfs_psdev_t * psdev)
-{
- cfs_proc_entry_t * entry;
-
- entry = create_proc_entry (
- (char *)psdev->name,
- S_IFREG,
- proc_dev_root
- );
-
- if (!entry) {
- return -ENOMEM;
- }
-
- entry->flags |= CFS_PROC_FLAG_MISCDEV;
-
- entry->proc_fops = psdev->fops;
- entry->data = (void *)psdev;
-
- return 0;
-}
-
-int cfs_psdev_deregister(cfs_psdev_t * psdev)
-{
- cfs_proc_entry_t * entry;
-
- entry = search_proc_entry (
- (char *)psdev->name,
- proc_dev_root
- );
-
- if (entry) {
-
- ASSERT(entry->data == (void *)psdev);
- ASSERT(entry->flags & CFS_PROC_FLAG_MISCDEV);
-
- remove_proc_entry(
- (char *)psdev->name,
- proc_dev_root
- );
- }
-
- return 0;
-}
-
-extern char debug_file_path[1024];
-
-#define PSDEV_LNET (0x100)
-enum {
- PSDEV_DEBUG = 1, /* control debugging */
- PSDEV_SUBSYSTEM_DEBUG, /* control debugging */
- PSDEV_PRINTK, /* force all messages to console */
- PSDEV_CONSOLE_RATELIMIT, /* rate limit console messages */
- PSDEV_DEBUG_PATH, /* crashdump log location */
- PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */
- PSDEV_LIBCFS_MEMUSED, /* bytes currently PORTAL_ALLOCated */
-};
-
-static struct ctl_table lnet_table[] = {
- {PSDEV_DEBUG, "debug", &libcfs_debug, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &libcfs_subsystem_debug,
- sizeof(int), 0644, NULL, &proc_dointvec},
- {PSDEV_PRINTK, "printk", &libcfs_printk, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {PSDEV_CONSOLE_RATELIMIT, "console_ratelimit", &libcfs_console_ratelimit,
- sizeof(int), 0644, NULL, &proc_dointvec},
- {PSDEV_DEBUG_PATH, "debug_path", debug_file_path,
- sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string},
-/*
- {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall,
- sizeof(portals_upcall), 0644, NULL, &proc_dostring,
- &sysctl_string},
-*/
- {PSDEV_LIBCFS_MEMUSED, "memused", (int *)&libcfs_kmemory.counter,
- sizeof(int), 0644, NULL, &proc_dointvec},
- {0}
-};
-
-static struct ctl_table top_table[2] = {
- {PSDEV_LNET, "lnet", NULL, 0, 0555, lnet_table},
- {0}
-};
-
-
-int trace_write_dump_kernel(struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- int rc = trace_dump_debug_buffer_usrstr(buffer, count);
-
- return (rc < 0) ? rc : count;
-}
-
-int trace_write_daemon_file(struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- int rc = trace_daemon_command_usrstr(buffer, count);
-
- return (rc < 0) ? rc : count;
-}
-
-int trace_read_daemon_file(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- int rc;
-
- tracefile_read_lock();
-
- rc = trace_copyout_string(page, count, tracefile, "\n");
-
- tracefile_read_unlock();
-
- return rc;
-}
-
-int trace_write_debug_mb(struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- int rc = trace_set_debug_mb_userstr(buffer, count);
-
- return (rc < 0) ? rc : count;
-}
-
-int trace_read_debug_mb(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- char str[32];
-
- snprintf(str, sizeof(str), "%d\n", trace_get_debug_mb());
-
- return trace_copyout_string(page, count, str, NULL);
-}
-
-int insert_proc(void)
-{
- cfs_proc_entry_t *ent;
-
- ent = create_proc_entry("sys/lnet/dump_kernel", 0, NULL);
- if (ent == NULL) {
- CERROR(("couldn't register dump_kernel\n"));
- return -1;
- }
- ent->write_proc = trace_write_dump_kernel;
-
- ent = create_proc_entry("sys/lnet/daemon_file", 0, NULL);
- if (ent == NULL) {
- CERROR(("couldn't register daemon_file\n"));
- return -1;
- }
- ent->write_proc = trace_write_daemon_file;
- ent->read_proc = trace_read_daemon_file;
-
- ent = create_proc_entry("sys/lnet/debug_mb", 0, NULL);
- if (ent == NULL) {
- CERROR(("couldn't register debug_mb\n"));
- return -1;
- }
- ent->write_proc = trace_write_debug_mb;
- ent->read_proc = trace_read_debug_mb;
-
- return 0;
-}
-
-void remove_proc(void)
-{
- remove_proc_entry("sys/portals/dump_kernel", NULL);
- remove_proc_entry("sys/portals/daemon_file", NULL);
- remove_proc_entry("sys/portals/debug_mb", NULL);
-
-#ifdef CONFIG_SYSCTL
- if (portals_table_header)
- unregister_sysctl_table(portals_table_header);
- portals_table_header = NULL;
-#endif
-}
-
-
-/*
- * proc process routines of kernel space
- */
-
-cfs_file_t *
-lustre_open_file(char * filename)
-{
- int rc = 0;
- cfs_file_t * fh = NULL;
- cfs_proc_entry_t * fp = NULL;
-
- fp = search_proc_entry(filename, proc_fs_root);
-
- if (!fp) {
- rc = -ENOENT;
- return NULL;
- }
-
- fh = cfs_alloc(sizeof(cfs_file_t), CFS_ALLOC_ZERO);
-
- if (!fh) {
- rc = -ENOMEM;
- return NULL;
- }
-
- fh->private_data = (void *)fp;
- fh->f_op = fp->proc_fops;
-
- if (fh->f_op->open) {
- rc = (fh->f_op->open)(fh);
- } else {
- fp->nlink++;
- }
-
- if (0 != rc) {
- cfs_free(fh);
- return NULL;
- }
-
- return fh;
-}
-
-int
-lustre_close_file(cfs_file_t * fh)
-{
- int rc = 0;
- cfs_proc_entry_t * fp = NULL;
-
- fp = (cfs_proc_entry_t *) fh->private_data;
-
- if (fh->f_op->release) {
- rc = (fh->f_op->release)(fh);
- } else {
- fp->nlink--;
- }
-
- cfs_free(fh);
-
- return rc;
-}
-
-int
-lustre_do_ioctl( cfs_file_t * fh,
- unsigned long cmd,
- ulong_ptr arg )
-{
- int rc = 0;
-
- if (fh->f_op->ioctl) {
- rc = (fh->f_op->ioctl)(fh, cmd, arg);
- }
-
- if (rc != 0) {
- printk("lustre_do_ioctl: fialed: cmd = %xh arg = %xh rc = %d\n",
- cmd, arg, rc);
- }
-
- return rc;
-}
-
-int
-lustre_ioctl_file(cfs_file_t * fh, PCFS_PROC_IOCTL devctl)
-{
- int rc = 0;
- ulong_ptr data;
-
- data = (ulong_ptr)devctl + sizeof(CFS_PROC_IOCTL);
-
- /* obd ioctl code */
- if (_IOC_TYPE(devctl->cmd) == 'f') {
-#if 0
- struct obd_ioctl_data * obd = (struct obd_ioctl_data *) data;
-
- if ( devctl->cmd != (ULONG)OBD_IOC_BRW_WRITE &&
- devctl->cmd != (ULONG)OBD_IOC_BRW_READ ) {
-
- unsigned long off = obd->ioc_len;
-
- if (obd->ioc_pbuf1) {
- obd->ioc_pbuf1 = (char *)(data + off);
- off += size_round(obd->ioc_plen1);
- }
-
- if (obd->ioc_pbuf2) {
- obd->ioc_pbuf2 = (char *)(data + off);
- }
- }
- #endif
- }
-
- rc = lustre_do_ioctl(fh, devctl->cmd, data);
-
- return rc;
-}
-
-
-size_t
-lustre_read_file(
- cfs_file_t * fh,
- loff_t off,
- size_t size,
- char * buf
- )
-{
- size_t rc = 0;
-
- if (fh->f_op->read) {
- rc = (fh->f_op->read) (fh, buf, size, &off);
- }
-
- return rc;
-}
-
-
-size_t
-lustre_write_file(
- cfs_file_t * fh,
- loff_t off,
- size_t size,
- char * buf
- )
-{
- size_t rc = 0;
-
- if (fh->f_op->write) {
- rc = (fh->f_op->write)(fh, buf, size, &off);
- }
-
- return rc;
-}
-
-#else /* !__KERNEL__ */
-
-#include <lnet/api-support.h>
-#include <liblustre.h>
-#include <lustre_lib.h>
-
-/*
- * proc process routines of user space
- */
-
-HANDLE cfs_proc_open (char * filename, int oflag)
-{
- NTSTATUS status;
- IO_STATUS_BLOCK iosb;
- int rc;
-
- HANDLE FileHandle = INVALID_HANDLE_VALUE;
- OBJECT_ATTRIBUTES ObjectAttributes;
- ACCESS_MASK DesiredAccess;
- ULONG CreateDisposition;
- ULONG ShareAccess;
- ULONG CreateOptions;
- UNICODE_STRING UnicodeName;
- USHORT NameLength;
-
- PFILE_FULL_EA_INFORMATION Ea = NULL;
- ULONG EaLength;
- UCHAR EaBuffer[EA_MAX_LENGTH];
-
- /* Check the filename: should start with "/proc" or "/dev" */
- NameLength = (USHORT)strlen(filename);
- if (NameLength > 0x05) {
- if (_strnicmp(filename, "/proc/", 6) == 0) {
- filename += 6;
- NameLength -=6;
- if (NameLength <= 0) {
- rc = -EINVAL;
- goto errorout;
- }
- } else if (_strnicmp(filename, "/dev/", 5) == 0) {
- } else {
- rc = -EINVAL;
- goto errorout;
- }
- } else {
- rc = -EINVAL;
- goto errorout;
- }
-
- /* Analyze the flags settings */
-
- if (cfs_is_flag_set(oflag, O_WRONLY)) {
- DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE);
- ShareAccess = 0;
- } else if (cfs_is_flag_set(oflag, O_RDWR)) {
- DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE);
- ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE;
- } else {
- DesiredAccess = (GENERIC_READ | SYNCHRONIZE);
- ShareAccess = FILE_SHARE_READ;
- }
-
- if (cfs_is_flag_set(oflag, O_CREAT)) {
- if (cfs_is_flag_set(oflag, O_EXCL)) {
- CreateDisposition = FILE_CREATE;
- rc = -EINVAL;
- goto errorout;
- } else {
- CreateDisposition = FILE_OPEN_IF;
- }
- } else {
- CreateDisposition = FILE_OPEN;
- }
-
- if (cfs_is_flag_set(oflag, O_TRUNC)) {
- if (cfs_is_flag_set(oflag, O_EXCL)) {
- CreateDisposition = FILE_OVERWRITE;
- } else {
- CreateDisposition = FILE_OVERWRITE_IF;
- }
- }
-
- CreateOptions = 0;
-
- if (cfs_is_flag_set(oflag, O_DIRECTORY)) {
- cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE);
- }
-
- if (cfs_is_flag_set(oflag, O_SYNC)) {
- cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH);
- }
-
- if (cfs_is_flag_set(oflag, O_DIRECT)) {
- cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING);
- }
-
- /* Initialize the unicode path name for the specified file */
- RtlInitUnicodeString(&UnicodeName, LUSTRE_PROC_SYMLNK);
-
- /* Setup the object attributes structure for the file. */
- InitializeObjectAttributes(
- &ObjectAttributes,
- &UnicodeName,
- OBJ_CASE_INSENSITIVE,
- NULL,
- NULL );
-
- /* building EA for the proc entry ... */
- Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
- Ea->NextEntryOffset = 0;
- Ea->Flags = 0;
- Ea->EaNameLength = (UCHAR)NameLength;
- Ea->EaValueLength = 0;
- RtlCopyMemory(
- &(Ea->EaName),
- filename,
- NameLength + 1
- );
- EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 +
- Ea->EaNameLength + 1;
-
- /* Now to open or create the file now */
- status = ZwCreateFile(
- &FileHandle,
- DesiredAccess,
- &ObjectAttributes,
- &iosb,
- 0,
- FILE_ATTRIBUTE_NORMAL,
- ShareAccess,
- CreateDisposition,
- CreateOptions,
- Ea,
- EaLength );
-
- /* Check the returned status of Iosb ... */
-
- if (!NT_SUCCESS(status)) {
- rc = cfs_error_code(status);
- goto errorout;
- }
-
-errorout:
-
- return FileHandle;
-}
-
-int cfs_proc_close(HANDLE handle)
-{
- if (handle) {
- NtClose((HANDLE)handle);
- }
-
- return 0;
-}
-
-int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count)
-{
- NTSTATUS status;
- IO_STATUS_BLOCK iosb;
- LARGE_INTEGER offset;
-
-
- offset.QuadPart = 0;
-
- /* read file data */
- status = NtReadFile(
- (HANDLE)handle,
- 0,
- NULL,
- NULL,
- &iosb,
- buffer,
- count,
- &offset,
- NULL);
-
- /* check the return status */
- if (!NT_SUCCESS(status)) {
- printf("NtReadFile request failed 0x%0x\n", status);
- goto errorout;
- }
-
-errorout:
-
- if (NT_SUCCESS(status)) {
- return iosb.Information;
- }
-
- return cfs_error_code(status);
-}
-
-
-int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count)
-{
- NTSTATUS status;
- IO_STATUS_BLOCK iosb;
- LARGE_INTEGER offset;
-
- offset.QuadPart = -1;
-
- /* write buffer to the opened file */
- status = NtWriteFile(
- (HANDLE)handle,
- 0,
- NULL,
- NULL,
- &iosb,
- buffer,
- count,
- &offset,
- NULL);
-
- /* check the return status */
- if (!NT_SUCCESS(status)) {
- printf("NtWriteFile request failed 0x%0x\n", status);
- goto errorout;
- }
-
-errorout:
-
- if (NT_SUCCESS(status)) {
- return iosb.Information;
- }
-
- return cfs_error_code(status);
-}
-
-int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer)
-{
- PUCHAR procdat = NULL;
- CFS_PROC_IOCTL procctl;
- ULONG length = 0;
- ULONG extra = 0;
-
- NTSTATUS status;
- IO_STATUS_BLOCK iosb;
-
- procctl.cmd = cmd;
-
- if(_IOC_TYPE(cmd) == IOC_LIBCFS_TYPE) {
- struct libcfs_ioctl_data * portal;
- portal = (struct libcfs_ioctl_data *) buffer;
- length = portal->ioc_len;
- } else if (_IOC_TYPE(cmd) == 'f') {
- struct obd_ioctl_data * obd;
- obd = (struct obd_ioctl_data *) buffer;
- length = obd->ioc_len;
- extra = size_round(obd->ioc_plen1) + size_round(obd->ioc_plen2);
- } else if(_IOC_TYPE(cmd) == 'u') {
- length = 4;
- extra = 0;
- } else {
- printf("user:winnt-proc:cfs_proc_ioctl: un-supported ioctl type ...\n");
- cfs_enter_debugger();
- status = STATUS_INVALID_PARAMETER;
- goto errorout;
- }
-
- procctl.len = length + extra;
- procdat = malloc(length + extra + sizeof(CFS_PROC_IOCTL));
-
- if (NULL == procdat) {
- printf("user:winnt-proc:cfs_proc_ioctl: no enough memory ...\n");
- status = STATUS_INSUFFICIENT_RESOURCES;
- cfs_enter_debugger();
- goto errorout;
- }
- memset(procdat, 0, length + extra + sizeof(CFS_PROC_IOCTL));
- memcpy(procdat, &procctl, sizeof(CFS_PROC_IOCTL));
- memcpy(&procdat[sizeof(CFS_PROC_IOCTL)], buffer, length);
- length += sizeof(CFS_PROC_IOCTL);
-
- if (_IOC_TYPE(cmd) == 'f') {
-
- char *ptr;
- struct obd_ioctl_data * data;
- struct obd_ioctl_data * obd;
-
- data = (struct obd_ioctl_data *) buffer;
- obd = (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL));
- ptr = obd->ioc_bulk;
-
- if (data->ioc_inlbuf1) {
- obd->ioc_inlbuf1 = ptr;
- LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
- }
-
- if (data->ioc_inlbuf2) {
- obd->ioc_inlbuf2 = ptr;
- LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
- }
- if (data->ioc_inlbuf3) {
- obd->ioc_inlbuf3 = ptr;
- LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
- }
- if (data->ioc_inlbuf4) {
- obd->ioc_inlbuf4 = ptr;
- LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
- }
-
- if ( cmd != (ULONG)OBD_IOC_BRW_WRITE &&
- cmd != (ULONG)OBD_IOC_BRW_READ ) {
-
- if (data->ioc_pbuf1 && data->ioc_plen1) {
- obd->ioc_pbuf1 = &procdat[length];
- memcpy(obd->ioc_pbuf1, data->ioc_pbuf1, data->ioc_plen1);
- length += size_round(data->ioc_plen1);
- }
-
- if (data->ioc_pbuf2 && data->ioc_plen2) {
- obd->ioc_pbuf2 = &procdat[length];
- memcpy(obd->ioc_pbuf2, data->ioc_pbuf2, data->ioc_plen2);
- length += size_round(data->ioc_plen2);
- }
- }
-
- if (obd_ioctl_is_invalid(obd)) {
- cfs_enter_debugger();
- }
- }
-
- status = NtDeviceIoControlFile(
- (HANDLE)handle,
- NULL, NULL, NULL, &iosb,
- IOCTL_LIBCFS_ENTRY,
- procdat, length,
- procdat, length );
-
-
- if (NT_SUCCESS(status)) {
- memcpy(buffer, &procdat[sizeof(CFS_PROC_IOCTL)], procctl.len);
- }
-
-errorout:
-
- if (procdat) {
- free(procdat);
- }
-
- return cfs_error_code(status);
-}
-
-#endif /* __KERNEL__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LIBCFS
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-
-
-/*
- * Wait queue routines
- */
-
-/*
- * cfs_waitq_init
- * To initialize the wait queue
- *
- * Arguments:
- * waitq: pointer to the cfs_waitq_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitq_init(cfs_waitq_t *waitq)
-{
- waitq->magic = CFS_WAITQ_MAGIC;
- waitq->flags = 0;
- INIT_LIST_HEAD(&(waitq->waiters));
- spin_lock_init(&(waitq->guard));
-}
-
-/*
- * cfs_waitlink_init
- * To initialize the wake link node
- *
- * Arguments:
- * link: pointer to the cfs_waitlink_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitlink_init(cfs_waitlink_t *link)
-{
- cfs_task_t * task = cfs_current();
- PTASK_SLOT slot = NULL;
-
- if (!task) {
- /* should bugchk here */
- cfs_enter_debugger();
- return;
- }
-
- slot = CONTAINING_RECORD(task, TASK_SLOT, task);
- cfs_assert(slot->Magic == TASKSLT_MAGIC);
-
- memset(link, 0, sizeof(cfs_waitlink_t));
-
- link->magic = CFS_WAITLINK_MAGIC;
- link->flags = 0;
-
- link->event = &(slot->Event);
- link->hits = &(slot->hits);
-
- atomic_inc(&slot->count);
-
- INIT_LIST_HEAD(&(link->waitq[0].link));
- INIT_LIST_HEAD(&(link->waitq[1].link));
-
- link->waitq[0].waitl = link->waitq[1].waitl = link;
-}
-
-
-/*
- * cfs_waitlink_fini
- * To finilize the wake link node
- *
- * Arguments:
- * link: pointer to the cfs_waitlink_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitlink_fini(cfs_waitlink_t *link)
-{
- cfs_task_t * task = cfs_current();
- PTASK_SLOT slot = NULL;
-
- if (!task) {
- /* should bugchk here */
- cfs_enter_debugger();
- return;
- }
-
- slot = CONTAINING_RECORD(task, TASK_SLOT, task);
- cfs_assert(slot->Magic == TASKSLT_MAGIC);
- cfs_assert(link->magic == CFS_WAITLINK_MAGIC);
- cfs_assert(link->waitq[0].waitq == NULL);
- cfs_assert(link->waitq[1].waitq == NULL);
-
- atomic_dec(&slot->count);
-}
-
-
-/*
- * cfs_waitq_add_internal
- * To queue the wait link node to the wait queue
- *
- * Arguments:
- * waitq: pointer to the cfs_waitq_t structure
- * link: pointer to the cfs_waitlink_t structure
- * int: queue no (Normal or Forward waitq)
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitq_add_internal(cfs_waitq_t *waitq,
- cfs_waitlink_t *link,
- __u32 waitqid )
-{
- LASSERT(waitq != NULL);
- LASSERT(link != NULL);
- LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
- LASSERT(link->magic == CFS_WAITLINK_MAGIC);
- LASSERT(waitqid < CFS_WAITQ_CHANNELS);
-
- spin_lock(&(waitq->guard));
- LASSERT(link->waitq[waitqid].waitq == NULL);
- link->waitq[waitqid].waitq = waitq;
- if (link->flags & CFS_WAITQ_EXCLUSIVE) {
- list_add_tail(&link->waitq[waitqid].link, &waitq->waiters);
- } else {
- list_add(&link->waitq[waitqid].link, &waitq->waiters);
- }
- spin_unlock(&(waitq->guard));
-}
-/*
- * cfs_waitq_add
- * To queue the wait link node to the wait queue
- *
- * Arguments:
- * waitq: pointer to the cfs_waitq_t structure
- * link: pointer to the cfs_waitlink_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitq_add(cfs_waitq_t *waitq,
- cfs_waitlink_t *link)
-{
- cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_NORMAL);
-}
-
-/*
- * cfs_waitq_add_exclusive
- * To set the wait link node to exclusive mode
- * and queue it to the wait queue
- *
- * Arguments:
- * waitq: pointer to the cfs_waitq_t structure
- * link: pointer to the cfs_wait_link structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitq_add_exclusive( cfs_waitq_t *waitq,
- cfs_waitlink_t *link)
-{
- LASSERT(waitq != NULL);
- LASSERT(link != NULL);
- LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
- LASSERT(link->magic == CFS_WAITLINK_MAGIC);
-
- link->flags |= CFS_WAITQ_EXCLUSIVE;
- cfs_waitq_add(waitq, link);
-}
-
-/*
- * cfs_waitq_forward
- * To be determinated.
- *
- * Arguments:
- * waitq: pointer to the cfs_waitq_t structure
- * link: pointer to the cfs_waitlink_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitq_forward( cfs_waitlink_t *link,
- cfs_waitq_t *waitq)
-{
- cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_FORWARD);
-}
-
-/*
- * cfs_waitq_del
- * To remove the wait link node from the waitq
- *
- * Arguments:
- * waitq: pointer to the cfs_ waitq_t structure
- * link: pointer to the cfs_waitlink_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitq_del( cfs_waitq_t *waitq,
- cfs_waitlink_t *link)
-{
- int i = 0;
-
- LASSERT(waitq != NULL);
- LASSERT(link != NULL);
-
- LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
- LASSERT(link->magic == CFS_WAITLINK_MAGIC);
-
- spin_lock(&(waitq->guard));
-
- for (i=0; i < CFS_WAITQ_CHANNELS; i++) {
- if (link->waitq[i].waitq == waitq)
- break;
- }
-
- if (i < CFS_WAITQ_CHANNELS) {
- link->waitq[i].waitq = NULL;
- list_del_init(&link->waitq[i].link);
- } else {
- cfs_enter_debugger();
- }
-
- spin_unlock(&(waitq->guard));
-}
-
-/*
- * cfs_waitq_active
- * Is the waitq active (not empty) ?
- *
- * Arguments:
- * waitq: pointer to the cfs_ waitq_t structure
- *
- * Return Value:
- * Zero: the waitq is empty
- * Non-Zero: the waitq is active
- *
- * Notes:
- * We always returns TRUE here, the same to Darwin.
- */
-
-int cfs_waitq_active(cfs_waitq_t *waitq)
-{
- LASSERT(waitq != NULL);
- LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
-
- return (1);
-}
-
-/*
- * cfs_waitq_signal_nr
- * To wake up all the non-exclusive tasks plus nr exclusive
- * ones in the waitq
- *
- * Arguments:
- * waitq: pointer to the cfs_waitq_t structure
- * nr: number of exclusive tasks to be woken up
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-
-void cfs_waitq_signal_nr(cfs_waitq_t *waitq, int nr)
-{
- int result;
- cfs_waitlink_channel_t * scan;
-
- LASSERT(waitq != NULL);
- LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
-
- spin_lock(&waitq->guard);
-
- list_for_each_entry(scan, &waitq->waiters, cfs_waitlink_channel_t, link) {
-
- cfs_waitlink_t *waitl = scan->waitl;
-
- result = cfs_wake_event(waitl->event);
- LASSERT( result == FALSE || result == TRUE );
-
- if (result) {
- atomic_inc(waitl->hits);
- }
-
- if ((waitl->flags & CFS_WAITQ_EXCLUSIVE) && --nr == 0)
- break;
- }
-
- spin_unlock(&waitq->guard);
- return;
-}
-
-/*
- * cfs_waitq_signal
- * To wake up all the non-exclusive tasks and 1 exclusive
- *
- * Arguments:
- * waitq: pointer to the cfs_waitq_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitq_signal(cfs_waitq_t *waitq)
-{
- cfs_waitq_signal_nr(waitq, 1);
-}
-
-
-/*
- * cfs_waitq_broadcast
- * To wake up all the tasks in the waitq
- *
- * Arguments:
- * waitq: pointer to the cfs_waitq_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitq_broadcast(cfs_waitq_t *waitq)
-{
- LASSERT(waitq != NULL);
- LASSERT(waitq->magic ==CFS_WAITQ_MAGIC);
-
- cfs_waitq_signal_nr(waitq, 0);
-}
-
-/*
- * cfs_waitq_wait
- * To wait on the link node until it is signaled.
- *
- * Arguments:
- * link: pointer to the cfs_waitlink_t structure
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void cfs_waitq_wait(cfs_waitlink_t *link, cfs_task_state_t state)
-{
- LASSERT(link != NULL);
- LASSERT(link->magic == CFS_WAITLINK_MAGIC);
-
- if (atomic_read(link->hits) > 0) {
- atomic_dec(link->hits);
- LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00);
- } else {
- cfs_wait_event(link->event, 0);
- }
-}
-
-/*
- * cfs_waitq_timedwait
- * To wait the link node to be signaled with a timeout limit
- *
- * Arguments:
- * link: pointer to the cfs_waitlink_t structure
- * timeout: the timeout limitation
- *
- * Return Value:
- * Woken up: return the difference of the current time and
- * the timeout
- * Timeout: return 0
- *
- * Notes:
- * What if it happens to be woken up at the just timeout time !?
- */
-
-cfs_duration_t cfs_waitq_timedwait( cfs_waitlink_t *link,
- cfs_task_state_t state,
- cfs_duration_t timeout)
-{
-
- if (atomic_read(link->hits) > 0) {
- atomic_dec(link->hits);
- LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00);
- return TRUE;
- }
-
- return (cfs_duration_t)cfs_wait_event(link->event, timeout);
-}
-
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LIBCFS
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-
-#define TDILND_MODULE_NAME L"Tdilnd"
-
-ks_data_t ks_data;
-
-ULONG
-ks_tdi_send_flags(ULONG SockFlags)
-{
- ULONG TdiFlags = 0;
-
- if (cfs_is_flag_set(SockFlags, MSG_OOB)) {
- cfs_set_flag(TdiFlags, TDI_SEND_EXPEDITED);
- }
-
- if (cfs_is_flag_set(SockFlags, MSG_MORE)) {
- cfs_set_flag(TdiFlags, TDI_SEND_PARTIAL);
- }
-
- if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) {
- cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING);
- }
-
- return TdiFlags;
-}
-
-NTSTATUS
-KsIrpCompletionRoutine(
- IN PDEVICE_OBJECT DeviceObject,
- IN PIRP Irp,
- IN PVOID Context
- )
-{
- if (NULL != Context) {
- KeSetEvent((PKEVENT)Context, IO_NETWORK_INCREMENT, FALSE);
- }
-
- return STATUS_MORE_PROCESSING_REQUIRED;
-
- UNREFERENCED_PARAMETER(DeviceObject);
- UNREFERENCED_PARAMETER(Irp);
-}
-
-
-/*
- * KsBuildTdiIrp
- * Allocate a new IRP and initialize it to be issued to tdi
- *
- * Arguments:
- * DeviceObject: device object created by the underlying
- * TDI transport driver
- *
- * Return Value:
- * PRIP: the allocated Irp in success or NULL in failure.
- *
- * NOTES:
- * N/A
- */
-
-PIRP
-KsBuildTdiIrp(
- IN PDEVICE_OBJECT DeviceObject
- )
-{
- PIRP Irp;
- PIO_STACK_LOCATION IrpSp;
-
- //
- // Allocating the IRP ...
- //
-
- Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE);
-
- if (NULL != Irp) {
-
- //
- // Getting the Next Stack Location ...
- //
-
- IrpSp = IoGetNextIrpStackLocation(Irp);
-
- //
- // Initializing Irp ...
- //
-
- IrpSp->MajorFunction = IRP_MJ_INTERNAL_DEVICE_CONTROL;
- IrpSp->Parameters.DeviceIoControl.IoControlCode = 0;
- }
-
- return Irp;
-}
-
-/*
- * KsSubmitTdiIrp
- * Issue the Irp to the underlying tdi driver
- *
- * Arguments:
- * DeviceObject: the device object created by TDI driver
- * Irp: the I/O request packet to be processed
- * bSynchronous: synchronous or not. If true, we need wait
- * until the process is finished.
- * Information: returned info
- *
- * Return Value:
- * NTSTATUS: kernel status code
- *
- * NOTES:
- * N/A
- */
-
-NTSTATUS
-KsSubmitTdiIrp(
- IN PDEVICE_OBJECT DeviceObject,
- IN PIRP Irp,
- IN BOOLEAN bSynchronous,
- OUT PULONG Information
- )
-{
- NTSTATUS Status;
- KEVENT Event;
-
- if (bSynchronous) {
-
- KeInitializeEvent(
- &Event,
- SynchronizationEvent,
- FALSE
- );
-
-
- IoSetCompletionRoutine(
- Irp,
- KsIrpCompletionRoutine,
- &Event,
- TRUE,
- TRUE,
- TRUE
- );
- }
-
- Status = IoCallDriver(DeviceObject, Irp);
-
- if (bSynchronous) {
-
- if (STATUS_PENDING == Status) {
-
- Status = KeWaitForSingleObject(
- &Event,
- Executive,
- KernelMode,
- FALSE,
- NULL
- );
- }
-
- Status = Irp->IoStatus.Status;
-
- if (Information) {
- *Information = (ULONG)(Irp->IoStatus.Information);
- }
-
- Irp->MdlAddress = NULL;
- IoFreeIrp(Irp);
- }
-
- if (!NT_SUCCESS(Status)) {
-
- KsPrint((2, "KsSubmitTdiIrp: Error when submitting the Irp: Status = %xh (%s) ...\n",
- Status, KsNtStatusToString(Status)));
- }
-
- return (Status);
-}
-
-
-
-/*
- * KsOpenControl
- * Open the Control Channel Object ...
- *
- * Arguments:
- * DeviceName: the device name to be opened
- * Handle: opened handle in success case
- * FileObject: the fileobject of the device
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsOpenControl(
- IN PUNICODE_STRING DeviceName,
- OUT HANDLE * Handle,
- OUT PFILE_OBJECT * FileObject
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
-
- OBJECT_ATTRIBUTES ObjectAttributes;
- IO_STATUS_BLOCK IoStatus;
-
-
- LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
-
- //
- // Initializing ...
- //
-
- InitializeObjectAttributes(
- &ObjectAttributes,
- DeviceName,
- OBJ_CASE_INSENSITIVE |
- OBJ_KERNEL_HANDLE,
- NULL,
- NULL
- );
-
- LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
-
- //
- // Creating the Transport Address Object ...
- //
-
- Status = ZwCreateFile(
- Handle,
- FILE_READ_DATA | FILE_WRITE_DATA,
- &ObjectAttributes,
- &IoStatus,
- 0,
- FILE_ATTRIBUTE_NORMAL,
- FILE_SHARE_READ | FILE_SHARE_WRITE,
- FILE_OPEN,
- 0,
- NULL,
- 0
- );
-
-
- if (NT_SUCCESS(Status)) {
-
- //
- // Now Obtaining the FileObject of the Transport Address ...
- //
-
- Status = ObReferenceObjectByHandle(
- *Handle,
- FILE_ANY_ACCESS,
- NULL,
- KernelMode,
- FileObject,
- NULL
- );
-
- if (!NT_SUCCESS(Status)) {
-
- cfs_enter_debugger();
- ZwClose(*Handle);
- }
-
- } else {
-
- cfs_enter_debugger();
- }
-
- return (Status);
-}
-
-
-/*
- * KsCloseControl
- * Release the Control Channel Handle and FileObject
- *
- * Arguments:
- * Handle: the channel handle to be released
- * FileObject: the fileobject to be released
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsCloseControl(
- IN HANDLE Handle,
- IN PFILE_OBJECT FileObject
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
-
- LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
-
- if (FileObject) {
-
- ObDereferenceObject(FileObject);
- }
-
- if (Handle) {
-
- Status = ZwClose(Handle);
- }
-
- ASSERT(NT_SUCCESS(Status));
-
- return (Status);
-}
-
-
-/*
- * KsOpenAddress
- * Open the tdi address object
- *
- * Arguments:
- * DeviceName: device name of the address object
- * pAddress: tdi address of the address object
- * AddressLength: length in bytes of the tdi address
- * Handle: the newly opened handle
- * FileObject: the newly opened fileobject
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsOpenAddress(
- IN PUNICODE_STRING DeviceName,
- IN PTRANSPORT_ADDRESS pAddress,
- IN ULONG AddressLength,
- OUT HANDLE * Handle,
- OUT PFILE_OBJECT * FileObject
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
-
- PFILE_FULL_EA_INFORMATION Ea = NULL;
- ULONG EaLength;
- UCHAR EaBuffer[EA_MAX_LENGTH];
-
- OBJECT_ATTRIBUTES ObjectAttributes;
- IO_STATUS_BLOCK IoStatus;
-
- //
- // Building EA for the Address Object to be Opened ...
- //
-
- Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
- Ea->NextEntryOffset = 0;
- Ea->Flags = 0;
- Ea->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH;
- Ea->EaValueLength = (USHORT)AddressLength;
- RtlCopyMemory(
- &(Ea->EaName),
- TdiTransportAddress,
- Ea->EaNameLength + 1
- );
- RtlMoveMemory(
- &(Ea->EaName[Ea->EaNameLength + 1]),
- pAddress,
- AddressLength
- );
- EaLength = sizeof(FILE_FULL_EA_INFORMATION) +
- Ea->EaNameLength + AddressLength;
-
- LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
-
-
- //
- // Initializing ...
- //
-
- InitializeObjectAttributes(
- &ObjectAttributes,
- DeviceName,
- OBJ_CASE_INSENSITIVE |
- OBJ_KERNEL_HANDLE,
- NULL,
- NULL
- );
-
- LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
-
- //
- // Creating the Transport Address Object ...
- //
-
- Status = ZwCreateFile(
- Handle,
- FILE_READ_DATA | FILE_WRITE_DATA,
- &ObjectAttributes,
- &IoStatus,
- 0,
- FILE_ATTRIBUTE_NORMAL,
- FILE_SHARE_READ | FILE_SHARE_WRITE, /* 0: DON'T REUSE */
- FILE_OPEN,
- 0,
- Ea,
- EaLength
- );
-
-
- if (NT_SUCCESS(Status)) {
-
- //
- // Now Obtaining the FileObject of the Transport Address ...
- //
-
- Status = ObReferenceObjectByHandle(
- *Handle,
- FILE_ANY_ACCESS,
- NULL,
- KernelMode,
- FileObject,
- NULL
- );
-
- if (!NT_SUCCESS(Status)) {
-
- cfs_enter_debugger();
- ZwClose(*Handle);
- }
-
- } else {
-
- cfs_enter_debugger();
- }
-
- return (Status);
-}
-
-/*
- * KsCloseAddress
- * Release the Hanlde and FileObject of an opened tdi
- * address object
- *
- * Arguments:
- * Handle: the handle to be released
- * FileObject: the fileobject to be released
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsCloseAddress(
- IN HANDLE Handle,
- IN PFILE_OBJECT FileObject
-)
-{
- NTSTATUS Status = STATUS_SUCCESS;
-
- if (FileObject) {
-
- ObDereferenceObject(FileObject);
- }
-
- if (Handle) {
-
- Status = ZwClose(Handle);
- }
-
- ASSERT(NT_SUCCESS(Status));
-
- return (Status);
-}
-
-
-/*
- * KsOpenConnection
- * Open a tdi connection object
- *
- * Arguments:
- * DeviceName: device name of the connection object
- * ConnectionContext: the connection context
- * Handle: the newly opened handle
- * FileObject: the newly opened fileobject
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsOpenConnection(
- IN PUNICODE_STRING DeviceName,
- IN CONNECTION_CONTEXT ConnectionContext,
- OUT HANDLE * Handle,
- OUT PFILE_OBJECT * FileObject
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
-
- PFILE_FULL_EA_INFORMATION Ea = NULL;
- ULONG EaLength;
- UCHAR EaBuffer[EA_MAX_LENGTH];
-
- OBJECT_ATTRIBUTES ObjectAttributes;
- IO_STATUS_BLOCK IoStatus;
-
- //
- // Building EA for the Address Object to be Opened ...
- //
-
- Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
- Ea->NextEntryOffset = 0;
- Ea->Flags = 0;
- Ea->EaNameLength = TDI_CONNECTION_CONTEXT_LENGTH;
- Ea->EaValueLength = (USHORT)sizeof(CONNECTION_CONTEXT);
- RtlCopyMemory(
- &(Ea->EaName),
- TdiConnectionContext,
- Ea->EaNameLength + 1
- );
- RtlMoveMemory(
- &(Ea->EaName[Ea->EaNameLength + 1]),
- &ConnectionContext,
- sizeof(CONNECTION_CONTEXT)
- );
- EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 +
- Ea->EaNameLength + 1 + sizeof(CONNECTION_CONTEXT);
-
- LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
-
-
- //
- // Initializing ...
- //
-
- InitializeObjectAttributes(
- &ObjectAttributes,
- DeviceName,
- OBJ_CASE_INSENSITIVE |
- OBJ_KERNEL_HANDLE,
- NULL,
- NULL
- );
-
- LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
-
- //
- // Creating the Connection Object ...
- //
-
- Status = ZwCreateFile(
- Handle,
- FILE_READ_DATA | FILE_WRITE_DATA,
- &ObjectAttributes,
- &IoStatus,
- NULL,
- FILE_ATTRIBUTE_NORMAL,
- 0,
- FILE_OPEN,
- 0,
- Ea,
- EaLength
- );
-
-
- if (NT_SUCCESS(Status)) {
-
- //
- // Now Obtaining the FileObject of the Transport Address ...
- //
-
- Status = ObReferenceObjectByHandle(
- *Handle,
- FILE_ANY_ACCESS,
- NULL,
- KernelMode,
- FileObject,
- NULL
- );
-
- if (!NT_SUCCESS(Status)) {
-
- cfs_enter_debugger();
- ZwClose(*Handle);
- }
-
- } else {
-
- cfs_enter_debugger();
- }
-
- return (Status);
-}
-
-/*
- * KsCloseConnection
- * Release the Hanlde and FileObject of an opened tdi
- * connection object
- *
- * Arguments:
- * Handle: the handle to be released
- * FileObject: the fileobject to be released
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsCloseConnection(
- IN HANDLE Handle,
- IN PFILE_OBJECT FileObject
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
-
- if (FileObject) {
-
- ObDereferenceObject(FileObject);
- }
-
- if (Handle) {
-
- Status = ZwClose(Handle);
- }
-
- ASSERT(NT_SUCCESS(Status));
-
- return (Status);
-}
-
-
-/*
- * KsAssociateAddress
- * Associate an address object with a connection object
- *
- * Arguments:
- * AddressHandle: the handle of the address object
- * ConnectionObject: the FileObject of the connection
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsAssociateAddress(
- IN HANDLE AddressHandle,
- IN PFILE_OBJECT ConnectionObject
- )
-{
- NTSTATUS Status;
- PDEVICE_OBJECT DeviceObject;
- PIRP Irp;
-
- //
- // Getting the DeviceObject from Connection FileObject
- //
-
- DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
-
- //
- // Building Tdi Internal Irp ...
- //
-
- Irp = KsBuildTdiIrp(DeviceObject);
-
- if (NULL == Irp) {
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
-
- } else {
-
- //
- // Assocating the Address Object with the Connection Object
- //
-
- TdiBuildAssociateAddress(
- Irp,
- DeviceObject,
- ConnectionObject,
- NULL,
- NULL,
- AddressHandle
- );
-
- //
- // Calling the Transprot Driver with the Prepared Irp
- //
-
- Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
- }
-
- return (Status);
-}
-
-
-/*
- * KsDisassociateAddress
- * Disassociate the connection object (the relationship will
- * the corresponding address object will be dismissed. )
- *
- * Arguments:
- * ConnectionObject: the FileObject of the connection
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsDisassociateAddress(
- IN PFILE_OBJECT ConnectionObject
- )
-{
- NTSTATUS Status;
- PDEVICE_OBJECT DeviceObject;
- PIRP Irp;
-
- //
- // Getting the DeviceObject from Connection FileObject
- //
-
- DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
-
- //
- // Building Tdi Internal Irp ...
- //
-
- Irp = KsBuildTdiIrp(DeviceObject);
-
- if (NULL == Irp) {
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
-
- } else {
-
- //
- // Disassocating the Address Object with the Connection Object
- //
-
- TdiBuildDisassociateAddress(
- Irp,
- DeviceObject,
- ConnectionObject,
- NULL,
- NULL
- );
-
- //
- // Calling the Transprot Driver with the Prepared Irp
- //
-
- Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
- }
-
- return (Status);
-}
-
-
-/*
-
-//
-// Connection Control Event Callbacks
-//
-
-TDI_EVENT_CONNECT
-TDI_EVENT_DISCONNECT
-TDI_EVENT_ERROR
-
-//
-// Tcp Event Callbacks
-//
-
-TDI_EVENT_RECEIVE
-TDI_EVENT_RECEIVE_EXPEDITED
-TDI_EVENT_CHAINED_RECEIVE
-TDI_EVENT_CHAINED_RECEIVE_EXPEDITED
-
-//
-// Udp Event Callbacks
-//
-
-TDI_EVENT_RECEIVE_DATAGRAM
-TDI_EVENT_CHAINED_RECEIVE_DATAGRAM
-
-*/
-
-
-/*
- * KsSetEventHandlers
- * Set the tdi event callbacks with an address object
- *
- * Arguments:
- * AddressObject: the FileObject of the address object
- * EventContext: the parameter for the callbacks
- * Handlers: the handlers indictor array
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * NOTES:
- * N/A
- */
-
-NTSTATUS
-KsSetEventHandlers(
- IN PFILE_OBJECT AddressObject, // Address File Object
- IN PVOID EventContext, // Context for Handlers
- IN PKS_EVENT_HANDLERS Handlers // Handlers Indictor
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
- PDEVICE_OBJECT DeviceObject;
- USHORT i = 0;
-
- DeviceObject = IoGetRelatedDeviceObject(AddressObject);
-
- for (i=0; i < TDI_EVENT_MAXIMUM_HANDLER; i++) {
-
- //
- // Setup the tdi event callback handler if requested.
- //
-
- if (Handlers->IsActive[i]) {
-
- PIRP Irp;
-
- //
- // Building Tdi Internal Irp ...
- //
-
- Irp = KsBuildTdiIrp(DeviceObject);
-
- if (NULL == Irp) {
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
-
- } else {
-
- //
- // Building the Irp to set the Event Handler ...
- //
-
- TdiBuildSetEventHandler(
- Irp,
- DeviceObject,
- AddressObject,
- NULL,
- NULL,
- i, /* tdi event type */
- Handlers->Handler[i], /* tdi event handler */
- EventContext /* context for the handler */
- );
-
- //
- // Calling the Transprot Driver with the Prepared Irp
- //
-
- Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
-
- //
- // tcp/ip tdi does not support these two event callbacks
- //
-
- if ((!NT_SUCCESS(Status)) && ( i == TDI_EVENT_SEND_POSSIBLE ||
- i == TDI_EVENT_CHAINED_RECEIVE_EXPEDITED )) {
- cfs_enter_debugger();
- Status = STATUS_SUCCESS;
- }
- }
-
- if (!NT_SUCCESS(Status)) {
- cfs_enter_debugger();
- goto errorout;
- }
- }
- }
-
-
-errorout:
-
- if (!NT_SUCCESS(Status)) {
-
- KsPrint((2, "KsSetEventHandlers: Error Status = %xh (%s)\n",
- Status, KsNtStatusToString(Status) ));
- }
-
- return (Status);
-}
-
-
-
-/*
- * KsQueryAddressInfo
- * Query the address of the FileObject specified
- *
- * Arguments:
- * FileObject: the FileObject to be queried
- * AddressInfo: buffer to contain the address info
- * AddressSize: length of the AddressInfo buffer
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsQueryAddressInfo(
- PFILE_OBJECT FileObject,
- PTDI_ADDRESS_INFO AddressInfo,
- PULONG AddressSize
- )
-{
- NTSTATUS Status = STATUS_UNSUCCESSFUL;
- PIRP Irp = NULL;
- PMDL Mdl;
- PDEVICE_OBJECT DeviceObject;
-
- LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
-
- DeviceObject = IoGetRelatedDeviceObject(FileObject);
-
- RtlZeroMemory(AddressInfo, *(AddressSize));
-
- //
- // Allocating the Tdi Setting Irp ...
- //
-
- Irp = KsBuildTdiIrp(DeviceObject);
-
- if (NULL == Irp) {
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
-
- } else {
-
- //
- // Locking the User Buffer / Allocating a MDL for it
- //
-
- Status = KsLockUserBuffer(
- AddressInfo,
- FALSE,
- *(AddressSize),
- IoModifyAccess,
- &Mdl
- );
-
- if (!NT_SUCCESS(Status)) {
-
- IoFreeIrp(Irp);
- Irp = NULL;
- }
- }
-
- if (Irp) {
-
- LASSERT(NT_SUCCESS(Status));
-
- TdiBuildQueryInformation(
- Irp,
- DeviceObject,
- FileObject,
- NULL,
- NULL,
- TDI_QUERY_ADDRESS_INFO,
- Mdl
- );
-
- Status = KsSubmitTdiIrp(
- DeviceObject,
- Irp,
- TRUE,
- AddressSize
- );
-
- KsReleaseMdl(Mdl, FALSE);
- }
-
- if (!NT_SUCCESS(Status)) {
-
- cfs_enter_debugger();
- //TDI_BUFFER_OVERFLOW
- }
-
- return (Status);
-}
-
-/*
- * KsQueryProviderInfo
- * Query the underlying transport device's information
- *
- * Arguments:
- * TdiDeviceName: the transport device's name string
- * ProviderInfo: TDI_PROVIDER_INFO struncture
- *
- * Return Value:
- * NTSTATUS: Nt system status code
- *
- * NOTES:
- * N/A
- */
-
-NTSTATUS
-KsQueryProviderInfo(
- PWSTR TdiDeviceName,
- PTDI_PROVIDER_INFO ProviderInfo
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
-
- PIRP Irp = NULL;
- PMDL Mdl = NULL;
-
- UNICODE_STRING ControlName;
-
- HANDLE Handle;
- PFILE_OBJECT FileObject;
- PDEVICE_OBJECT DeviceObject;
-
- ULONG ProviderSize = 0;
-
- RtlInitUnicodeString(&ControlName, TdiDeviceName);
-
- //
- // Open the Tdi Control Channel
- //
-
- Status = KsOpenControl(
- &ControlName,
- &Handle,
- &FileObject
- );
-
- if (!NT_SUCCESS(Status)) {
-
- KsPrint((2, "KsQueryProviderInfo: Fail to open the tdi control channel.\n"));
- return (Status);
- }
-
- //
- // Obtain The Related Device Object
- //
-
- DeviceObject = IoGetRelatedDeviceObject(FileObject);
-
- ProviderSize = sizeof(TDI_PROVIDER_INFO);
- RtlZeroMemory(ProviderInfo, ProviderSize);
-
- //
- // Allocating the Tdi Setting Irp ...
- //
-
- Irp = KsBuildTdiIrp(DeviceObject);
-
- if (NULL == Irp) {
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
-
- } else {
-
- //
- // Locking the User Buffer / Allocating a MDL for it
- //
-
- Status = KsLockUserBuffer(
- ProviderInfo,
- FALSE,
- ProviderSize,
- IoModifyAccess,
- &Mdl
- );
-
- if (!NT_SUCCESS(Status)) {
-
- IoFreeIrp(Irp);
- Irp = NULL;
- }
- }
-
- if (Irp) {
-
- LASSERT(NT_SUCCESS(Status));
-
- TdiBuildQueryInformation(
- Irp,
- DeviceObject,
- FileObject,
- NULL,
- NULL,
- TDI_QUERY_PROVIDER_INFO,
- Mdl
- );
-
- Status = KsSubmitTdiIrp(
- DeviceObject,
- Irp,
- TRUE,
- &ProviderSize
- );
-
- KsReleaseMdl(Mdl, FALSE);
- }
-
- if (!NT_SUCCESS(Status)) {
-
- cfs_enter_debugger();
- //TDI_BUFFER_OVERFLOW
- }
-
- KsCloseControl(Handle, FileObject);
-
- return (Status);
-}
-
-/*
- * KsQueryConnectionInfo
- * Query the connection info of the FileObject specified
- * (some statics data of the traffic)
- *
- * Arguments:
- * FileObject: the FileObject to be queried
- * ConnectionInfo: buffer to contain the connection info
- * ConnectionSize: length of the ConnectionInfo buffer
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * NOTES:
- * N/A
- */
-
-NTSTATUS
-KsQueryConnectionInfo(
- PFILE_OBJECT ConnectionObject,
- PTDI_CONNECTION_INFO ConnectionInfo,
- PULONG ConnectionSize
- )
-{
- NTSTATUS Status = STATUS_UNSUCCESSFUL;
- PIRP Irp = NULL;
- PMDL Mdl;
- PDEVICE_OBJECT DeviceObject;
-
- LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
-
- DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
-
- RtlZeroMemory(ConnectionInfo, *(ConnectionSize));
-
- //
- // Allocating the Tdi Query Irp ...
- //
-
- Irp = KsBuildTdiIrp(DeviceObject);
-
- if (NULL == Irp) {
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
-
- } else {
-
- //
- // Locking the User Buffer / Allocating a MDL for it
- //
-
- Status = KsLockUserBuffer(
- ConnectionInfo,
- FALSE,
- *(ConnectionSize),
- IoModifyAccess,
- &Mdl
- );
-
- if (NT_SUCCESS(Status)) {
-
- IoFreeIrp(Irp);
- Irp = NULL;
- }
- }
-
- if (Irp) {
-
- LASSERT(NT_SUCCESS(Status));
-
- TdiBuildQueryInformation(
- Irp,
- DeviceObject,
- ConnectionObject,
- NULL,
- NULL,
- TDI_QUERY_CONNECTION_INFO,
- Mdl
- );
-
- Status = KsSubmitTdiIrp(
- DeviceObject,
- Irp,
- TRUE,
- ConnectionSize
- );
-
- KsReleaseMdl(Mdl, FALSE);
- }
-
- return (Status);
-}
-
-
-/*
- * KsInitializeTdiAddress
- * Initialize the tdi addresss
- *
- * Arguments:
- * pTransportAddress: tdi address to be initialized
- * IpAddress: the ip address of object
- * IpPort: the ip port of the object
- *
- * Return Value:
- * ULONG: the total size of the tdi address
- *
- * NOTES:
- * N/A
- */
-
-ULONG
-KsInitializeTdiAddress(
- IN OUT PTA_IP_ADDRESS pTransportAddress,
- IN ULONG IpAddress,
- IN USHORT IpPort
- )
-{
- pTransportAddress->TAAddressCount = 1;
- pTransportAddress->Address[ 0 ].AddressLength = TDI_ADDRESS_LENGTH_IP;
- pTransportAddress->Address[ 0 ].AddressType = TDI_ADDRESS_TYPE_IP;
- pTransportAddress->Address[ 0 ].Address[ 0 ].sin_port = IpPort;
- pTransportAddress->Address[ 0 ].Address[ 0 ].in_addr = IpAddress;
-
- return (FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + TDI_ADDRESS_LENGTH_IP);
-}
-
-/*
- * KsQueryTdiAddressLength
- * Query the total size of the tdi address
- *
- * Arguments:
- * pTransportAddress: tdi address to be queried
- *
- * Return Value:
- * ULONG: the total size of the tdi address
- *
- * NOTES:
- * N/A
- */
-
-ULONG
-KsQueryTdiAddressLength(
- PTRANSPORT_ADDRESS pTransportAddress
- )
-{
- ULONG TotalLength = 0;
- LONG i;
-
- PTA_ADDRESS UNALIGNED pTaAddress = NULL;
-
- ASSERT (NULL != pTransportAddress);
-
- TotalLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address) +
- FIELD_OFFSET(TA_ADDRESS, Address) * pTransportAddress->TAAddressCount;
-
- pTaAddress = (TA_ADDRESS UNALIGNED *)pTransportAddress->Address;
-
- for (i = 0; i < pTransportAddress->TAAddressCount; i++)
- {
- TotalLength += pTaAddress->AddressLength;
- pTaAddress = (TA_ADDRESS UNALIGNED *)((PCHAR)pTaAddress +
- FIELD_OFFSET(TA_ADDRESS,Address) +
- pTaAddress->AddressLength );
- }
-
- return (TotalLength);
-}
-
-
-/*
- * KsQueryIpAddress
- * Query the ip address of the tdi object
- *
- * Arguments:
- * FileObject: tdi object to be queried
- * TdiAddress: TdiAddress buffer, to store the queried
- * tdi ip address
- * AddressLength: buffer length of the TdiAddress
- *
- * Return Value:
- * ULONG: the total size of the tdi ip address
- *
- * NOTES:
- * N/A
- */
-
-NTSTATUS
-KsQueryIpAddress(
- PFILE_OBJECT FileObject,
- PVOID TdiAddress,
- ULONG* AddressLength
- )
-{
- NTSTATUS Status;
-
- PTDI_ADDRESS_INFO TdiAddressInfo;
- ULONG Length;
-
-
- //
- // Maximum length of TDI_ADDRESSS_INFO with one TRANSPORT_ADDRESS
- //
-
- Length = MAX_ADDRESS_LENGTH;
-
- TdiAddressInfo = (PTDI_ADDRESS_INFO)
- ExAllocatePoolWithTag(
- NonPagedPool,
- Length,
- 'KSAI' );
-
- if (NULL == TdiAddressInfo) {
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
-
- Status = KsQueryAddressInfo(
- FileObject,
- TdiAddressInfo,
- &Length
- );
-
-errorout:
-
- if (NT_SUCCESS(Status))
- {
- if (*AddressLength < Length) {
-
- Status = STATUS_BUFFER_TOO_SMALL;
-
- } else {
-
- *AddressLength = Length;
- RtlCopyMemory(
- TdiAddress,
- &(TdiAddressInfo->Address),
- Length
- );
-
- Status = STATUS_SUCCESS;
- }
-
- } else {
-
- }
-
-
- if (NULL != TdiAddressInfo) {
-
- ExFreePool(TdiAddressInfo);
- }
-
- return Status;
-}
-
-
-/*
- * KsErrorEventHandler
- * the common error event handler callback
- *
- * Arguments:
- * TdiEventContext: should be the socket
- * Status: the error code
- *
- * Return Value:
- * Status: STATS_SUCCESS
- *
- * NOTES:
- * We need not do anything in such a severe
- * error case. System will process it for us.
- */
-
-NTSTATUS
-KsErrorEventHandler(
- IN PVOID TdiEventContext,
- IN NTSTATUS Status
- )
-{
- KsPrint((2, "KsErrorEventHandler called at Irql = %xh ...\n",
- KeGetCurrentIrql()));
-
- cfs_enter_debugger();
-
- return (STATUS_SUCCESS);
-}
-
-
-/*
- * ks_set_handlers
- * setup all the event handler callbacks
- *
- * Arguments:
- * tconn: the tdi connecton object
- *
- * Return Value:
- * int: ks error code
- *
- * NOTES:
- * N/A
- */
-
-int
-ks_set_handlers(
- ksock_tconn_t * tconn
- )
-{
- NTSTATUS status = STATUS_SUCCESS;
- KS_EVENT_HANDLERS handlers;
-
- /* to make sure the address object is opened already */
- if (tconn->kstc_addr.FileObject == NULL) {
- goto errorout;
- }
-
- /* initialize the handlers indictor array. for sender and listenr,
- there are different set of callbacks. for child, we just return. */
-
- memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS));
-
- SetEventHandler(handlers, TDI_EVENT_ERROR, KsErrorEventHandler);
- SetEventHandler(handlers, TDI_EVENT_DISCONNECT, KsDisconnectEventHandler);
- SetEventHandler(handlers, TDI_EVENT_RECEIVE, KsTcpReceiveEventHandler);
- SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, KsTcpReceiveExpeditedEventHandler);
- SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, KsTcpChainedReceiveEventHandler);
-
- // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, KsTcpChainedReceiveExpeditedEventHandler);
-
- if (tconn->kstc_type == kstt_listener) {
- SetEventHandler(handlers, TDI_EVENT_CONNECT, KsConnectEventHandler);
- } else if (tconn->kstc_type == kstt_child) {
- goto errorout;
- }
-
- /* set all the event callbacks */
- status = KsSetEventHandlers(
- tconn->kstc_addr.FileObject, /* Address File Object */
- tconn, /* Event Context */
- &handlers /* Event callback handlers */
- );
-
-errorout:
-
- return cfs_error_code(status);
-}
-
-
-/*
- * ks_reset_handlers
- * disable all the event handler callbacks (set to NULL)
- *
- * Arguments:
- * tconn: the tdi connecton object
- *
- * Return Value:
- * int: ks error code
- *
- * NOTES:
- * N/A
- */
-
-int
-ks_reset_handlers(
- ksock_tconn_t * tconn
- )
-{
- NTSTATUS status = STATUS_SUCCESS;
- KS_EVENT_HANDLERS handlers;
-
- /* to make sure the address object is opened already */
- if (tconn->kstc_addr.FileObject == NULL) {
- goto errorout;
- }
-
- /* initialize the handlers indictor array. for sender and listenr,
- there are different set of callbacks. for child, we just return. */
-
- memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS));
-
- SetEventHandler(handlers, TDI_EVENT_ERROR, NULL);
- SetEventHandler(handlers, TDI_EVENT_DISCONNECT, NULL);
- SetEventHandler(handlers, TDI_EVENT_RECEIVE, NULL);
- SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, NULL);
- SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, NULL);
- // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, NULL);
-
- if (tconn->kstc_type == kstt_listener) {
- SetEventHandler(handlers, TDI_EVENT_CONNECT, NULL);
- } else if (tconn->kstc_type == kstt_child) {
- goto errorout;
- }
-
- /* set all the event callbacks */
- status = KsSetEventHandlers(
- tconn->kstc_addr.FileObject, /* Address File Object */
- tconn, /* Event Context */
- &handlers /* Event callback handlers */
- );
-
-errorout:
-
- return cfs_error_code(status);
-}
-
-
-/*
- * KsAcceptCompletionRoutine
- * Irp completion routine for TdiBuildAccept (KsConnectEventHandler)
- *
- * Here system gives us a chance to check the conneciton is built
- * ready or not.
- *
- * Arguments:
- * DeviceObject: the device object of the transport driver
- * Irp: the Irp is being completed.
- * Context: the context we specified when issuing the Irp
- *
- * Return Value:
- * Nt status code
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsAcceptCompletionRoutine(
- IN PDEVICE_OBJECT DeviceObject,
- IN PIRP Irp,
- IN PVOID Context
- )
-{
- ksock_tconn_t * child = (ksock_tconn_t *) Context;
- ksock_tconn_t * parent = child->child.kstc_parent;
-
- KsPrint((2, "KsAcceptCompletionRoutine: called at Irql: %xh\n",
- KeGetCurrentIrql() ));
-
- KsPrint((2, "KsAcceptCompletionRoutine: Context = %xh Status = %xh\n",
- Context, Irp->IoStatus.Status));
-
- LASSERT(child->kstc_type == kstt_child);
-
- spin_lock(&(child->kstc_lock));
-
- LASSERT(parent->kstc_state == ksts_listening);
- LASSERT(child->kstc_state == ksts_connecting);
-
- if (NT_SUCCESS(Irp->IoStatus.Status)) {
-
- child->child.kstc_accepted = TRUE;
-
- child->kstc_state = ksts_connected;
-
- /* wake up the daemon thread which waits on this event */
- KeSetEvent(
- &(parent->listener.kstc_accept_event),
- 0,
- FALSE
- );
-
- spin_unlock(&(child->kstc_lock));
-
- KsPrint((2, "KsAcceptCompletionRoutine: Get %xh now signal the event ...\n", parent));
-
- } else {
-
- /* re-use this child connecton */
- child->child.kstc_accepted = FALSE;
- child->child.kstc_busy = FALSE;
- child->kstc_state = ksts_associated;
-
- spin_unlock(&(child->kstc_lock));
- }
-
- /* now free the Irp */
- IoFreeIrp(Irp);
-
- /* drop the refer count of the child */
- ks_put_tconn(child);
-
- return (STATUS_MORE_PROCESSING_REQUIRED);
-}
-
-
-/*
- * ks_get_vacancy_backlog
- * Get a vacancy listeing child from the backlog list
- *
- * Arguments:
- * parent: the listener daemon connection
- *
- * Return Value:
- * the child listening connection or NULL in failure
- *
- * Notes
- * Parent's lock should be acquired before calling.
- */
-
-ksock_tconn_t *
-ks_get_vacancy_backlog(
- ksock_tconn_t * parent
- )
-{
- ksock_tconn_t * child;
-
- LASSERT(parent->kstc_type == kstt_listener);
- LASSERT(parent->kstc_state == ksts_listening);
-
- if (list_empty(&(parent->listener.kstc_listening.list))) {
-
- child = NULL;
-
- } else {
-
- struct list_head * tmp;
-
- /* check the listening queue and try to get a free connecton */
-
- list_for_each(tmp, &(parent->listener.kstc_listening.list)) {
- child = list_entry (tmp, ksock_tconn_t, child.kstc_link);
- spin_lock(&(child->kstc_lock));
-
- if (!child->child.kstc_busy) {
- LASSERT(child->kstc_state == ksts_associated);
- child->child.kstc_busy = TRUE;
- spin_unlock(&(child->kstc_lock));
- break;
- } else {
- spin_unlock(&(child->kstc_lock));
- child = NULL;
- }
- }
- }
-
- return child;
-}
-
-ks_addr_slot_t *
-KsSearchIpAddress(PUNICODE_STRING DeviceName)
-{
- ks_addr_slot_t * slot = NULL;
- PLIST_ENTRY list = NULL;
-
- spin_lock(&ks_data.ksnd_addrs_lock);
-
- list = ks_data.ksnd_addrs_list.Flink;
- while (list != &ks_data.ksnd_addrs_list) {
- slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
- if (RtlCompareUnicodeString(
- DeviceName,
- &slot->devname,
- TRUE) == 0) {
- break;
- }
- list = list->Flink;
- slot = NULL;
- }
-
- spin_unlock(&ks_data.ksnd_addrs_lock);
-
- return slot;
-}
-
-void
-KsCleanupIpAddresses()
-{
- spin_lock(&ks_data.ksnd_addrs_lock);
-
- while (!IsListEmpty(&ks_data.ksnd_addrs_list)) {
-
- ks_addr_slot_t * slot = NULL;
- PLIST_ENTRY list = NULL;
-
- list = RemoveHeadList(&ks_data.ksnd_addrs_list);
- slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
- cfs_free(slot);
- ks_data.ksnd_naddrs--;
- }
-
- cfs_assert(ks_data.ksnd_naddrs == 0);
- spin_unlock(&ks_data.ksnd_addrs_lock);
-}
-
-VOID
-KsAddAddressHandler(
- IN PTA_ADDRESS Address,
- IN PUNICODE_STRING DeviceName,
- IN PTDI_PNP_CONTEXT Context
- )
-{
- PTDI_ADDRESS_IP IpAddress = NULL;
-
- if ( Address->AddressType == TDI_ADDRESS_TYPE_IP &&
- Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) {
-
- ks_addr_slot_t * slot = NULL;
-
- IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0];
- KsPrint((1, "KsAddAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n",
- DeviceName, Context, IpAddress->in_addr,
- (IpAddress->in_addr & 0xFF000000) >> 24,
- (IpAddress->in_addr & 0x00FF0000) >> 16,
- (IpAddress->in_addr & 0x0000FF00) >> 8,
- (IpAddress->in_addr & 0x000000FF) >> 0 ));
-
- slot = KsSearchIpAddress(DeviceName);
-
- if (slot != NULL) {
- slot->up = TRUE;
- slot->ip_addr = ntohl(IpAddress->in_addr);
- } else {
- slot = cfs_alloc(sizeof(ks_addr_slot_t) + DeviceName->Length, CFS_ALLOC_ZERO);
- if (slot != NULL) {
- spin_lock(&ks_data.ksnd_addrs_lock);
- InsertTailList(&ks_data.ksnd_addrs_list, &slot->link);
- sprintf(slot->iface, "eth%d", ks_data.ksnd_naddrs++);
- slot->ip_addr = ntohl(IpAddress->in_addr);
- slot->up = TRUE;
- RtlMoveMemory(&slot->buffer[0], DeviceName->Buffer, DeviceName->Length);
- slot->devname.Length = DeviceName->Length;
- slot->devname.MaximumLength = DeviceName->Length + sizeof(WCHAR);
- slot->devname.Buffer = slot->buffer;
- spin_unlock(&ks_data.ksnd_addrs_lock);
- }
- }
- }
-}
-
-VOID
-KsDelAddressHandler(
- IN PTA_ADDRESS Address,
- IN PUNICODE_STRING DeviceName,
- IN PTDI_PNP_CONTEXT Context
- )
-{
- PTDI_ADDRESS_IP IpAddress = NULL;
-
- if ( Address->AddressType == TDI_ADDRESS_TYPE_IP &&
- Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) {
-
- ks_addr_slot_t * slot = NULL;
-
- slot = KsSearchIpAddress(DeviceName);
-
- if (slot != NULL) {
- slot->up = FALSE;
- }
-
- IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0];
- KsPrint((1, "KsDelAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n",
- DeviceName, Context, IpAddress->in_addr,
- (IpAddress->in_addr & 0xFF000000) >> 24,
- (IpAddress->in_addr & 0x00FF0000) >> 16,
- (IpAddress->in_addr & 0x0000FF00) >> 8,
- (IpAddress->in_addr & 0x000000FF) >> 0 ));
- }
-}
-
-NTSTATUS
-KsRegisterPnpHandlers()
-{
- TDI20_CLIENT_INTERFACE_INFO ClientInfo;
-
- /* initialize the global ks_data members */
- RtlInitUnicodeString(&ks_data.ksnd_client_name, TDILND_MODULE_NAME);
- spin_lock_init(&ks_data.ksnd_addrs_lock);
- InitializeListHead(&ks_data.ksnd_addrs_list);
-
- /* register the pnp handlers */
- RtlZeroMemory(&ClientInfo, sizeof(ClientInfo));
- ClientInfo.TdiVersion = TDI_CURRENT_VERSION;
-
- ClientInfo.ClientName = &ks_data.ksnd_client_name;
- ClientInfo.AddAddressHandlerV2 = KsAddAddressHandler;
- ClientInfo.DelAddressHandlerV2 = KsDelAddressHandler;
-
- return TdiRegisterPnPHandlers(&ClientInfo, sizeof(ClientInfo),
- &ks_data.ksnd_pnp_handle);
-}
-
-VOID
-KsDeregisterPnpHandlers()
-{
- if (ks_data.ksnd_pnp_handle) {
-
- /* De-register the pnp handlers */
-
- TdiDeregisterPnPHandlers(ks_data.ksnd_pnp_handle);
- ks_data.ksnd_pnp_handle = NULL;
-
- /* cleanup all the ip address slots */
- KsCleanupIpAddresses();
- }
-}
-
-/*
- * KsConnectEventHandler
- * Connect event handler event handler, called by the underlying TDI
- * transport in response to an incoming request to the listening daemon.
- *
- * it will grab a vacancy backlog from the children tconn list, and
- * build an acception Irp with it, then transfer the Irp to TDI driver.
- *
- * Arguments:
- * TdiEventContext: the tdi connnection object of the listening daemon
- * ......
- *
- * Return Value:
- * Nt kernel status code
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsConnectEventHandler(
- IN PVOID TdiEventContext,
- IN LONG RemoteAddressLength,
- IN PVOID RemoteAddress,
- IN LONG UserDataLength,
- IN PVOID UserData,
- IN LONG OptionsLength,
- IN PVOID Options,
- OUT CONNECTION_CONTEXT * ConnectionContext,
- OUT PIRP * AcceptIrp
- )
-{
- ksock_tconn_t * parent;
- ksock_tconn_t * child;
-
- PFILE_OBJECT FileObject;
- PDEVICE_OBJECT DeviceObject;
- NTSTATUS Status;
-
- PIRP Irp = NULL;
- PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL;
-
- KsPrint((2,"KsConnectEventHandler: call at Irql: %u\n", KeGetCurrentIrql()));
- parent = (ksock_tconn_t *) TdiEventContext;
-
- LASSERT(parent->kstc_type == kstt_listener);
-
- spin_lock(&(parent->kstc_lock));
-
- if (parent->kstc_state == ksts_listening) {
-
- /* allocate a new ConnectionInfo to backup the peer's info */
-
- ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag(
- NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) +
- RemoteAddressLength, 'iCsK' );
-
- if (NULL == ConnectionInfo) {
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
- cfs_enter_debugger();
- goto errorout;
- }
-
- /* initializing ConnectionInfo structure ... */
-
- ConnectionInfo->UserDataLength = UserDataLength;
- ConnectionInfo->UserData = UserData;
- ConnectionInfo->OptionsLength = OptionsLength;
- ConnectionInfo->Options = Options;
- ConnectionInfo->RemoteAddressLength = RemoteAddressLength;
- ConnectionInfo->RemoteAddress = ConnectionInfo + 1;
-
- RtlCopyMemory(
- ConnectionInfo->RemoteAddress,
- RemoteAddress,
- RemoteAddressLength
- );
-
- /* get the vacancy listening child tdi connections */
-
- child = ks_get_vacancy_backlog(parent);
-
- if (child) {
-
- spin_lock(&(child->kstc_lock));
- child->child.kstc_info.ConnectionInfo = ConnectionInfo;
- child->child.kstc_info.Remote = ConnectionInfo->RemoteAddress;
- child->kstc_state = ksts_connecting;
- spin_unlock(&(child->kstc_lock));
-
- } else {
-
- KsPrint((2, "KsConnectEventHandler: No enough backlogs: Refsued the connectio: %xh\n", parent));
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
-
- goto errorout;
- }
-
- FileObject = child->child.kstc_info.FileObject;
- DeviceObject = IoGetRelatedDeviceObject (FileObject);
-
- Irp = KsBuildTdiIrp(DeviceObject);
-
- TdiBuildAccept(
- Irp,
- DeviceObject,
- FileObject,
- KsAcceptCompletionRoutine,
- child,
- NULL,
- NULL
- );
-
- IoSetNextIrpStackLocation(Irp);
-
- /* grap the refer of the child tdi connection */
- ks_get_tconn(child);
-
- Status = STATUS_MORE_PROCESSING_REQUIRED;
-
- *AcceptIrp = Irp;
- *ConnectionContext = child;
-
- } else {
-
- Status = STATUS_CONNECTION_REFUSED;
- goto errorout;
- }
-
- spin_unlock(&(parent->kstc_lock));
-
- return Status;
-
-errorout:
-
- spin_unlock(&(parent->kstc_lock));
-
- {
- *AcceptIrp = NULL;
- *ConnectionContext = NULL;
-
- if (ConnectionInfo) {
-
- ExFreePool(ConnectionInfo);
- }
-
- if (Irp) {
-
- IoFreeIrp (Irp);
- }
- }
-
- return Status;
-}
-
-/*
- * KsDisconnectCompletionRoutine
- * the Irp completion routine for TdiBuildDisconect
- *
- * We just signal the event and return MORE_PRO... to
- * let the caller take the responsibility of the Irp.
- *
- * Arguments:
- * DeviceObject: the device object of the transport
- * Irp: the Irp is being completed.
- * Context: the event specified by the caller
- *
- * Return Value:
- * Nt status code
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsDisconectCompletionRoutine (
- IN PDEVICE_OBJECT DeviceObject,
- IN PIRP Irp,
- IN PVOID Context
- )
-{
-
- KeSetEvent((PKEVENT) Context, 0, FALSE);
-
- return STATUS_MORE_PROCESSING_REQUIRED;
-
- UNREFERENCED_PARAMETER(DeviceObject);
-}
-
-
-/*
- * KsDisconnectHelper
- * the routine to be executed in the WorkItem procedure
- * this routine is to disconnect a tdi connection
- *
- * Arguments:
- * Workitem: the context transferred to the workitem
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * tconn is already referred in abort_connecton ...
- */
-
-VOID
-KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem)
-{
- ksock_tconn_t * tconn = WorkItem->tconn;
-
- DbgPrint("KsDisconnectHelper: disconnecting tconn=%p\n", tconn);
- ks_disconnect_tconn(tconn, WorkItem->Flags);
-
- KeSetEvent(&(WorkItem->Event), 0, FALSE);
-
- spin_lock(&(tconn->kstc_lock));
- cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
- spin_unlock(&(tconn->kstc_lock));
- ks_put_tconn(tconn);
-}
-
-
-/*
- * KsDisconnectEventHandler
- * Disconnect event handler event handler, called by the underlying TDI transport
- * in response to an incoming disconnection notification from a remote node.
- *
- * Arguments:
- * ConnectionContext: tdi connnection object
- * DisconnectFlags: specifies the nature of the disconnection
- * ......
- *
- * Return Value:
- * Nt kernel status code
- *
- * Notes:
- * N/A
- */
-
-
-NTSTATUS
-KsDisconnectEventHandler(
- IN PVOID TdiEventContext,
- IN CONNECTION_CONTEXT ConnectionContext,
- IN LONG DisconnectDataLength,
- IN PVOID DisconnectData,
- IN LONG DisconnectInformationLength,
- IN PVOID DisconnectInformation,
- IN ULONG DisconnectFlags
- )
-{
- ksock_tconn_t * tconn;
- NTSTATUS Status;
- PKS_DISCONNECT_WORKITEM WorkItem;
-
- tconn = (ksock_tconn_t *)ConnectionContext;
-
- KsPrint((2, "KsTcpDisconnectEventHandler: called at Irql: %xh\n",
- KeGetCurrentIrql() ));
-
- KsPrint((2, "tconn = %x DisconnectFlags= %xh\n",
- tconn, DisconnectFlags));
-
- ks_get_tconn(tconn);
- spin_lock(&(tconn->kstc_lock));
-
- WorkItem = &(tconn->kstc_disconnect);
-
- if (tconn->kstc_state != ksts_connected) {
-
- Status = STATUS_SUCCESS;
-
- } else {
-
- if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_ABORT)) {
-
- Status = STATUS_REMOTE_DISCONNECT;
-
- } else if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_RELEASE)) {
-
- Status = STATUS_GRACEFUL_DISCONNECT;
- }
-
- if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) {
-
- ks_get_tconn(tconn);
-
- WorkItem->Flags = DisconnectFlags;
- WorkItem->tconn = tconn;
-
- cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
-
- /* queue the workitem to call */
- ExQueueWorkItem(&(WorkItem->WorkItem), DelayedWorkQueue);
- }
- }
-
- spin_unlock(&(tconn->kstc_lock));
- ks_put_tconn(tconn);
-
- return (Status);
-}
-
-NTSTATUS
-KsTcpReceiveCompletionRoutine(
- IN PIRP Irp,
- IN PKS_TCP_COMPLETION_CONTEXT Context
- )
-{
- NTSTATUS Status = Irp->IoStatus.Status;
-
- if (NT_SUCCESS(Status)) {
-
- ksock_tconn_t *tconn = Context->tconn;
-
- PKS_TSDU_DAT KsTsduDat = Context->CompletionContext;
- PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext;
-
- KsPrint((1, "KsTcpReceiveCompletionRoutine: Total %xh bytes.\n",
- Context->KsTsduMgr->TotalBytes ));
-
- spin_lock(&(tconn->kstc_lock));
-
- if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
- if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) {
- cfs_clear_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING);
- } else {
- cfs_enter_debugger();
- }
- } else {
- ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
- if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) {
- cfs_clear_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING);
- } else {
- cfs_enter_debugger();
- }
- }
-
- spin_unlock(&(tconn->kstc_lock));
-
- /* wake up the thread waiting for the completion of this Irp */
- KeSetEvent(Context->Event, 0, FALSE);
-
- /* re-active the ks connection and wake up the scheduler */
- if (tconn->kstc_conn && tconn->kstc_sched_cb) {
- tconn->kstc_sched_cb( tconn, FALSE, NULL,
- Context->KsTsduMgr->TotalBytes );
- }
-
- } else {
-
- /* un-expected errors occur, we must abort the connection */
- ks_abort_tconn(Context->tconn);
- }
-
- if (Context) {
-
- /* Freeing the Context structure... */
- ExFreePool(Context);
- Context = NULL;
- }
-
-
- /* free the Irp */
- if (Irp) {
- IoFreeIrp(Irp);
- }
-
- return (Status);
-}
-
-
-/*
- * KsTcpCompletionRoutine
- * the Irp completion routine for TdiBuildSend and TdiBuildReceive ...
- * We need call the use's own CompletionRoutine if specified. Or
- * it's a synchronous case, we need signal the event.
- *
- * Arguments:
- * DeviceObject: the device object of the transport
- * Irp: the Irp is being completed.
- * Context: the context we specified when issuing the Irp
- *
- * Return Value:
- * Nt status code
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsTcpCompletionRoutine(
- IN PDEVICE_OBJECT DeviceObject,
- IN PIRP Irp,
- IN PVOID Context
- )
-{
- if (Context) {
-
- PKS_TCP_COMPLETION_CONTEXT CompletionContext = NULL;
- ksock_tconn_t * tconn = NULL;
-
- CompletionContext = (PKS_TCP_COMPLETION_CONTEXT) Context;
- tconn = CompletionContext->tconn;
-
- /* release the chained mdl */
- KsReleaseMdl(Irp->MdlAddress, FALSE);
- Irp->MdlAddress = NULL;
-
- if (CompletionContext->CompletionRoutine) {
-
- if ( CompletionContext->bCounted &&
- InterlockedDecrement(&CompletionContext->ReferCount) != 0 ) {
- goto errorout;
- }
-
- //
- // Giving control to user specified CompletionRoutine ...
- //
-
- CompletionContext->CompletionRoutine(
- Irp,
- CompletionContext
- );
-
- } else {
-
- //
- // Signaling the Event ...
- //
-
- KeSetEvent(CompletionContext->Event, 0, FALSE);
- }
-
- /* drop the reference count of the tconn object */
- ks_put_tconn(tconn);
-
- } else {
-
- cfs_enter_debugger();
- }
-
-errorout:
-
- return STATUS_MORE_PROCESSING_REQUIRED;
-}
-
-/*
- * KsTcpSendCompletionRoutine
- * the user specified Irp completion routine for asynchronous
- * data transmission requests.
- *
- * It will do th cleanup job of the ksock_tx_t and wake up the
- * ks scheduler thread
- *
- * Arguments:
- * Irp: the Irp is being completed.
- * Context: the context we specified when issuing the Irp
- *
- * Return Value:
- * Nt status code
- *
- * Notes:
- * N/A
- */
-
-NTSTATUS
-KsTcpSendCompletionRoutine(
- IN PIRP Irp,
- IN PKS_TCP_COMPLETION_CONTEXT Context
- )
-{
- NTSTATUS Status = Irp->IoStatus.Status;
- ULONG rc = Irp->IoStatus.Information;
- ksock_tconn_t * tconn = Context->tconn;
- PKS_TSDUMGR KsTsduMgr = Context->KsTsduMgr;
-
- ENTRY;
-
- LASSERT(tconn) ;
-
- if (NT_SUCCESS(Status)) {
-
- if (Context->bCounted) {
- PVOID tx = Context->CompletionContext;
-
- ASSERT(tconn->kstc_update_tx != NULL);
-
- /* update the tx, rebasing the kiov or iov pointers */
- tx = tconn->kstc_update_tx(tconn, tx, rc);
-
- /* update the KsTsudMgr total bytes */
- spin_lock(&tconn->kstc_lock);
- KsTsduMgr->TotalBytes -= rc;
- spin_unlock(&tconn->kstc_lock);
-
- /*
- * now it's time to re-queue the conns into the
- * scheduler queue and wake the scheduler thread.
- */
-
- if (tconn->kstc_conn && tconn->kstc_sched_cb) {
- tconn->kstc_sched_cb( tconn, TRUE, tx, 0);
- }
-
- } else {
-
- PKS_TSDU KsTsdu = Context->CompletionContext;
- PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext2;
- PKS_TSDU_DAT KsTsduDat = Context->CompletionContext2;
-
- spin_lock(&tconn->kstc_lock);
- /* This is bufferred sending ... */
- ASSERT(KsTsduBuf->StartOffset == 0);
-
- if (KsTsduBuf->DataLength > Irp->IoStatus.Information) {
- /* not fully sent .... we have to abort the connection */
- spin_unlock(&tconn->kstc_lock);
- ks_abort_tconn(tconn);
- goto errorout;
- }
-
- if (KsTsduBuf->TsduType == TSDU_TYPE_BUF) {
- /* free the buffer */
- ExFreePool(KsTsduBuf->UserBuffer);
- KsTsduMgr->TotalBytes -= KsTsduBuf->DataLength;
- KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
- } else if (KsTsduDat->TsduType == TSDU_TYPE_DAT) {
- KsTsduMgr->TotalBytes -= KsTsduDat->DataLength;
- KsTsdu->StartOffset += KsTsduDat->TotalLength;
- } else {
- cfs_enter_debugger(); /* shoult not get here */
- }
-
- if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
-
- list_del(&KsTsdu->Link);
- KsTsduMgr->NumOfTsdu--;
- KsPutKsTsdu(KsTsdu);
- }
-
- spin_unlock(&tconn->kstc_lock);
- }
-
- } else {
-
- /* cfs_enter_debugger(); */
-
- /*
- * for the case that the transmission is ussuccessful,
- * we need abort the tdi connection, but not destroy it.
- * the socknal conn will drop the refer count, then the
- * tdi connection will be freed.
- */
-
- ks_abort_tconn(tconn);
- }
-
-errorout:
-
- /* freeing the Context structure... */
-
- if (Context) {
- ExFreePool(Context);
- Context = NULL;
- }
-
- /* it's our duty to free the Irp. */
-
- if (Irp) {
- IoFreeIrp(Irp);
- Irp = NULL;
- }
-
- EXIT;
-
- return Status;
-}
-
-/*
- * Normal receive event handler
- *
- * It will move data from system Tsdu to our TsduList
- */
-
-NTSTATUS
-KsTcpReceiveEventHandler(
- IN PVOID TdiEventContext,
- IN CONNECTION_CONTEXT ConnectionContext,
- IN ULONG ReceiveFlags,
- IN ULONG BytesIndicated,
- IN ULONG BytesAvailable,
- OUT ULONG * BytesTaken,
- IN PVOID Tsdu,
- OUT PIRP * IoRequestPacket
- )
-{
- NTSTATUS Status;
-
- ksock_tconn_t * tconn;
-
- PKS_CHAIN KsChain;
- PKS_TSDUMGR KsTsduMgr;
- PKS_TSDU KsTsdu;
- PKS_TSDU_DAT KsTsduDat;
- PKS_TSDU_BUF KsTsduBuf;
-
- BOOLEAN bIsExpedited;
- BOOLEAN bIsCompleteTsdu;
-
- BOOLEAN bNewTsdu = FALSE;
- BOOLEAN bNewBuff = FALSE;
-
- PCHAR Buffer = NULL;
-
- PIRP Irp = NULL;
- PMDL Mdl = NULL;
- PFILE_OBJECT FileObject;
- PDEVICE_OBJECT DeviceObject;
-
- ULONG BytesReceived = 0;
-
- PKS_TCP_COMPLETION_CONTEXT context = NULL;
-
-
- tconn = (ksock_tconn_t *) ConnectionContext;
-
- ks_get_tconn(tconn);
-
- /* check whether the whole body of payload is received or not */
- if ( (cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_ENTIRE_MESSAGE)) &&
- (BytesIndicated == BytesAvailable) ) {
- bIsCompleteTsdu = TRUE;
- } else {
- bIsCompleteTsdu = FALSE;
- }
-
- bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED);
-
- KsPrint((2, "KsTcpReceiveEventHandler BytesIndicated = %d BytesAvailable = %d ...\n", BytesIndicated, BytesAvailable));
- KsPrint((2, "bIsCompleteTsdu = %d bIsExpedited = %d\n", bIsCompleteTsdu, bIsExpedited ));
-
- spin_lock(&(tconn->kstc_lock));
-
- /* check whether we are conntected or not listener ¡Â*/
- if ( !((tconn->kstc_state == ksts_connected) &&
- (tconn->kstc_type == kstt_sender ||
- tconn->kstc_type == kstt_child))) {
-
- *BytesTaken = BytesIndicated;
-
- spin_unlock(&(tconn->kstc_lock));
- ks_put_tconn(tconn);
-
- return (STATUS_SUCCESS);
- }
-
- if (tconn->kstc_type == kstt_sender) {
- KsChain = &(tconn->sender.kstc_recv);
- } else {
- LASSERT(tconn->kstc_type == kstt_child);
- KsChain = &(tconn->child.kstc_recv);
- }
-
- if (bIsExpedited) {
- KsTsduMgr = &(KsChain->Expedited);
- } else {
- KsTsduMgr = &(KsChain->Normal);
- }
-
- /* if the Tsdu is even larger than the biggest Tsdu, we have
- to allocate new buffer and use TSDU_TYOE_BUF to store it */
-
- if ( KS_TSDU_STRU_SIZE(BytesAvailable) > ks_data.ksnd_tsdu_size -
- KS_DWORD_ALIGN(sizeof(KS_TSDU))) {
- bNewBuff = TRUE;
- }
-
- /* retrieve the latest Tsdu buffer form TsduMgr
- list if the list is not empty. */
-
- if (list_empty(&(KsTsduMgr->TsduList))) {
-
- LASSERT(KsTsduMgr->NumOfTsdu == 0);
- KsTsdu = NULL;
-
- } else {
-
- LASSERT(KsTsduMgr->NumOfTsdu > 0);
- KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
-
- /* if this Tsdu does not contain enough space, we need
- allocate a new Tsdu queue. */
-
- if (bNewBuff) {
- if ( KsTsdu->LastOffset + sizeof(KS_TSDU_BUF) >
- KsTsdu->TotalLength ) {
- KsTsdu = NULL;
- }
- } else {
- if ( KS_TSDU_STRU_SIZE(BytesAvailable) >
- KsTsdu->TotalLength - KsTsdu->LastOffset ) {
- KsTsdu = NULL;
- }
- }
- }
-
- /* allocating the buffer for TSDU_TYPE_BUF */
- if (bNewBuff) {
- Buffer = ExAllocatePool(NonPagedPool, BytesAvailable);
- if (NULL == Buffer) {
- /* there's no enough memory for us. We just try to
- receive maximum bytes with a new Tsdu */
- bNewBuff = FALSE;
- KsTsdu = NULL;
- }
- }
-
- /* allocate a new Tsdu in case we are not statisfied. */
-
- if (NULL == KsTsdu) {
-
- KsTsdu = KsAllocateKsTsdu();
-
- if (NULL == KsTsdu) {
- goto errorout;
- } else {
- bNewTsdu = TRUE;
- }
- }
-
- KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
- KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
-
- if (bNewBuff) {
-
- /* setup up the KS_TSDU_BUF record */
-
- KsTsduBuf->TsduType = TSDU_TYPE_BUF;
- KsTsduBuf->TsduFlags = 0;
- KsTsduBuf->StartOffset = 0;
- KsTsduBuf->UserBuffer = Buffer;
- KsTsduBuf->DataLength = BytesReceived = BytesAvailable;
-
- KsTsdu->LastOffset += sizeof(KS_TSDU_BUF);
-
- } else {
-
- /* setup the KS_TSDU_DATA to contain all the messages */
-
- KsTsduDat->TsduType = TSDU_TYPE_DAT;
- KsTsduDat->TsduFlags = 0;
-
- if ( KsTsdu->TotalLength - KsTsdu->LastOffset >=
- KS_TSDU_STRU_SIZE(BytesAvailable) ) {
- BytesReceived = BytesAvailable;
- } else {
- BytesReceived = KsTsdu->TotalLength - KsTsdu->LastOffset -
- FIELD_OFFSET(KS_TSDU_DAT, Data);
- BytesReceived &= (~((ULONG)3));
- }
- KsTsduDat->DataLength = BytesReceived;
- KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE(BytesReceived);
- KsTsduDat->StartOffset = 0;
-
- Buffer = &KsTsduDat->Data[0];
-
- KsTsdu->LastOffset += KsTsduDat->TotalLength;
- }
-
- KsTsduMgr->TotalBytes += BytesReceived;
-
- if (bIsCompleteTsdu) {
-
- /* It's a complete receive, we just move all
- the data from system to our Tsdu */
-
- RtlMoveMemory(
- Buffer,
- Tsdu,
- BytesReceived
- );
-
- *BytesTaken = BytesReceived;
- Status = STATUS_SUCCESS;
-
- if (bNewTsdu) {
- list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
- KsTsduMgr->NumOfTsdu++;
- }
-
- KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
-
- /* re-active the ks connection and wake up the scheduler */
- if (tconn->kstc_conn && tconn->kstc_sched_cb) {
- tconn->kstc_sched_cb( tconn, FALSE, NULL,
- KsTsduMgr->TotalBytes );
- }
-
- } else {
-
- /* there's still data in tdi internal queue, we need issue a new
- Irp to receive all of them. first allocate the tcp context */
-
- context = ExAllocatePoolWithTag(
- NonPagedPool,
- sizeof(KS_TCP_COMPLETION_CONTEXT),
- 'cTsK');
-
- if (!context) {
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
- /* setup the context */
- RtlZeroMemory(context, sizeof(KS_TCP_COMPLETION_CONTEXT));
-
- context->tconn = tconn;
- context->CompletionRoutine = KsTcpReceiveCompletionRoutine;
- context->CompletionContext = KsTsdu;
- context->CompletionContext = bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat;
- context->KsTsduMgr = KsTsduMgr;
- context->Event = &(KsTsduMgr->Event);
-
- if (tconn->kstc_type == kstt_sender) {
- FileObject = tconn->sender.kstc_info.FileObject;
- } else {
- FileObject = tconn->child.kstc_info.FileObject;
- }
-
- DeviceObject = IoGetRelatedDeviceObject(FileObject);
-
- /* build new tdi Irp and setup it. */
- Irp = KsBuildTdiIrp(DeviceObject);
-
- if (NULL == Irp) {
- goto errorout;
- }
-
- Status = KsLockUserBuffer(
- Buffer,
- FALSE,
- BytesReceived,
- IoModifyAccess,
- &Mdl
- );
-
- if (!NT_SUCCESS(Status)) {
- goto errorout;
- }
-
- TdiBuildReceive(
- Irp,
- DeviceObject,
- FileObject,
- KsTcpCompletionRoutine,
- context,
- Mdl,
- ReceiveFlags & (TDI_RECEIVE_NORMAL | TDI_RECEIVE_EXPEDITED),
- BytesReceived
- );
-
- IoSetNextIrpStackLocation(Irp);
-
- /* return the newly built Irp to transport driver,
- it will process it to receive all the data */
-
- *IoRequestPacket = Irp;
- *BytesTaken = 0;
-
- if (bNewTsdu) {
-
- list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
- KsTsduMgr->NumOfTsdu++;
- }
-
- if (bNewBuff) {
- cfs_set_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING);
- } else {
- cfs_set_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING);
- }
- ks_get_tconn(tconn);
- Status = STATUS_MORE_PROCESSING_REQUIRED;
- }
-
- spin_unlock(&(tconn->kstc_lock));
- ks_put_tconn(tconn);
-
- return (Status);
-
-errorout:
-
- spin_unlock(&(tconn->kstc_lock));
-
- if (bNewTsdu && (KsTsdu != NULL)) {
- KsFreeKsTsdu(KsTsdu);
- }
-
- if (Mdl) {
- KsReleaseMdl(Mdl, FALSE);
- }
-
- if (Irp) {
- IoFreeIrp(Irp);
- }
-
- if (context) {
- ExFreePool(context);
- }
-
- ks_abort_tconn(tconn);
- ks_put_tconn(tconn);
-
- *BytesTaken = BytesAvailable;
- Status = STATUS_SUCCESS;
-
- return (Status);
-}
-
-/*
- * Expedited receive event handler
- */
-
-NTSTATUS
-KsTcpReceiveExpeditedEventHandler(
- IN PVOID TdiEventContext,
- IN CONNECTION_CONTEXT ConnectionContext,
- IN ULONG ReceiveFlags,
- IN ULONG BytesIndicated,
- IN ULONG BytesAvailable,
- OUT ULONG * BytesTaken,
- IN PVOID Tsdu,
- OUT PIRP * IoRequestPacket
- )
-{
- return KsTcpReceiveEventHandler(
- TdiEventContext,
- ConnectionContext,
- ReceiveFlags | TDI_RECEIVE_EXPEDITED,
- BytesIndicated,
- BytesAvailable,
- BytesTaken,
- Tsdu,
- IoRequestPacket
- );
-}
-
-
-/*
- * Bulk receive event handler
- *
- * It will queue all the system Tsdus to our TsduList.
- * Then later ks_recv_mdl will release them.
- */
-
-NTSTATUS
-KsTcpChainedReceiveEventHandler (
- IN PVOID TdiEventContext, // the event context
- IN CONNECTION_CONTEXT ConnectionContext,
- IN ULONG ReceiveFlags,
- IN ULONG ReceiveLength,
- IN ULONG StartingOffset, // offset of start of client data in TSDU
- IN PMDL Tsdu, // TSDU data chain
- IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives
- )
-{
-
- NTSTATUS Status;
-
- ksock_tconn_t * tconn;
-
- PKS_CHAIN KsChain;
- PKS_TSDUMGR KsTsduMgr;
- PKS_TSDU KsTsdu;
- PKS_TSDU_MDL KsTsduMdl;
-
- BOOLEAN bIsExpedited;
- BOOLEAN bNewTsdu = FALSE;
-
- tconn = (ksock_tconn_t *) ConnectionContext;
-
- bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED);
-
- KsPrint((2, "KsTcpChainedReceive: ReceiveLength = %xh bIsExpedited = %d\n", ReceiveLength, bIsExpedited));
-
- ks_get_tconn(tconn);
- spin_lock(&(tconn->kstc_lock));
-
- /* check whether we are conntected or not listener ¡Â*/
- if ( !((tconn->kstc_state == ksts_connected) &&
- (tconn->kstc_type == kstt_sender ||
- tconn->kstc_type == kstt_child))) {
-
- spin_unlock(&(tconn->kstc_lock));
- ks_put_tconn(tconn);
-
- return (STATUS_SUCCESS);
- }
-
- /* get the latest Tsdu buffer form TsduMgr list.
- just set NULL if the list is empty. */
-
- if (tconn->kstc_type == kstt_sender) {
- KsChain = &(tconn->sender.kstc_recv);
- } else {
- LASSERT(tconn->kstc_type == kstt_child);
- KsChain = &(tconn->child.kstc_recv);
- }
-
- if (bIsExpedited) {
- KsTsduMgr = &(KsChain->Expedited);
- } else {
- KsTsduMgr = &(KsChain->Normal);
- }
-
- if (list_empty(&(KsTsduMgr->TsduList))) {
-
- LASSERT(KsTsduMgr->NumOfTsdu == 0);
- KsTsdu = NULL;
-
- } else {
-
- LASSERT(KsTsduMgr->NumOfTsdu > 0);
- KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
- LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
-
- if (sizeof(KS_TSDU_MDL) > KsTsdu->TotalLength - KsTsdu->LastOffset) {
- KsTsdu = NULL;
- }
- }
-
- /* if there's no Tsdu or the free size is not enough for this
- KS_TSDU_MDL structure. We need re-allocate a new Tsdu. */
-
- if (NULL == KsTsdu) {
-
- KsTsdu = KsAllocateKsTsdu();
-
- if (NULL == KsTsdu) {
- goto errorout;
- } else {
- bNewTsdu = TRUE;
- }
- }
-
- /* just queue the KS_TSDU_MDL to the Tsdu buffer */
-
- KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
-
- KsTsduMdl->TsduType = TSDU_TYPE_MDL;
- KsTsduMdl->DataLength = ReceiveLength;
- KsTsduMdl->StartOffset = StartingOffset;
- KsTsduMdl->Mdl = Tsdu;
- KsTsduMdl->Descriptor = TsduDescriptor;
-
- KsTsdu->LastOffset += sizeof(KS_TSDU_MDL);
- KsTsduMgr->TotalBytes += ReceiveLength;
-
- KsPrint((2, "KsTcpChainedReceiveEventHandler: Total %xh bytes.\n",
- KsTsduMgr->TotalBytes ));
-
- Status = STATUS_PENDING;
-
- /* attach it to the TsduMgr list if the Tsdu is newly created. */
- if (bNewTsdu) {
-
- list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
- KsTsduMgr->NumOfTsdu++;
- }
-
- spin_unlock(&(tconn->kstc_lock));
-
- /* wake up the threads waiing in ks_recv_mdl */
- KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
-
- if (tconn->kstc_conn && tconn->kstc_sched_cb) {
- tconn->kstc_sched_cb( tconn, FALSE, NULL,
- KsTsduMgr->TotalBytes );
- }
-
- ks_put_tconn(tconn);
-
- /* Return STATUS_PENDING to system because we are still
- owning the MDL resources. ks_recv_mdl is expected
- to free the MDL resources. */
-
- return (Status);
-
-errorout:
-
- spin_unlock(&(tconn->kstc_lock));
-
- if (bNewTsdu && (KsTsdu != NULL)) {
- KsFreeKsTsdu(KsTsdu);
- }
-
- /* abort the tdi connection */
- ks_abort_tconn(tconn);
- ks_put_tconn(tconn);
-
-
- Status = STATUS_SUCCESS;
-
- return (Status);
-}
-
-
-/*
- * Expedited & Bulk receive event handler
- */
-
-NTSTATUS
-KsTcpChainedReceiveExpeditedEventHandler (
- IN PVOID TdiEventContext, // the event context
- IN CONNECTION_CONTEXT ConnectionContext,
- IN ULONG ReceiveFlags,
- IN ULONG ReceiveLength,
- IN ULONG StartingOffset, // offset of start of client data in TSDU
- IN PMDL Tsdu, // TSDU data chain
- IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives
- )
-{
- return KsTcpChainedReceiveEventHandler(
- TdiEventContext,
- ConnectionContext,
- ReceiveFlags | TDI_RECEIVE_EXPEDITED,
- ReceiveLength,
- StartingOffset,
- Tsdu,
- TsduDescriptor );
-}
-
-
-VOID
-KsPrintProviderInfo(
- PWSTR DeviceName,
- PTDI_PROVIDER_INFO ProviderInfo
- )
-{
- KsPrint((2, "%ws ProviderInfo:\n", DeviceName));
-
- KsPrint((2, " Version : 0x%4.4X\n", ProviderInfo->Version ));
- KsPrint((2, " MaxSendSize : %d\n", ProviderInfo->MaxSendSize ));
- KsPrint((2, " MaxConnectionUserData: %d\n", ProviderInfo->MaxConnectionUserData ));
- KsPrint((2, " MaxDatagramSize : %d\n", ProviderInfo->MaxDatagramSize ));
- KsPrint((2, " ServiceFlags : 0x%8.8X\n", ProviderInfo->ServiceFlags ));
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTION_MODE) {
- KsPrint((2, " CONNECTION_MODE\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_ORDERLY_RELEASE) {
- KsPrint((2, " ORDERLY_RELEASE\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTIONLESS_MODE) {
- KsPrint((2, " CONNECTIONLESS_MODE\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_ERROR_FREE_DELIVERY) {
- KsPrint((2, " ERROR_FREE_DELIVERY\n"));
- }
-
- if( ProviderInfo->ServiceFlags & TDI_SERVICE_SECURITY_LEVEL ) {
- KsPrint((2, " SECURITY_LEVEL\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_BROADCAST_SUPPORTED) {
- KsPrint((2, " BROADCAST_SUPPORTED\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_MULTICAST_SUPPORTED) {
- KsPrint((2, " MULTICAST_SUPPORTED\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_DELAYED_ACCEPTANCE) {
- KsPrint((2, " DELAYED_ACCEPTANCE\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_EXPEDITED_DATA) {
- KsPrint((2, " EXPEDITED_DATA\n"));
- }
-
- if( ProviderInfo->ServiceFlags & TDI_SERVICE_INTERNAL_BUFFERING) {
- KsPrint((2, " INTERNAL_BUFFERING\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_ROUTE_DIRECTED) {
- KsPrint((2, " ROUTE_DIRECTED\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_NO_ZERO_LENGTH) {
- KsPrint((2, " NO_ZERO_LENGTH\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_POINT_TO_POINT) {
- KsPrint((2, " POINT_TO_POINT\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_MESSAGE_MODE) {
- KsPrint((2, " MESSAGE_MODE\n"));
- }
-
- if (ProviderInfo->ServiceFlags & TDI_SERVICE_HALF_DUPLEX) {
- KsPrint((2, " HALF_DUPLEX\n"));
- }
-
- KsPrint((2, " MinimumLookaheadData : %d\n", ProviderInfo->MinimumLookaheadData ));
- KsPrint((2, " MaximumLookaheadData : %d\n", ProviderInfo->MaximumLookaheadData ));
- KsPrint((2, " NumberOfResources : %d\n", ProviderInfo->NumberOfResources ));
-}
-
-
-/*
- * KsAllocateKsTsdu
- * Reuse a Tsdu from the freelist or allocate a new Tsdu
- * from the LookAsideList table or the NonPagedPool
- *
- * Arguments:
- * N/A
- *
- * Return Value:
- * PKS_Tsdu: the new Tsdu or NULL if it fails
- *
- * Notes:
- * N/A
- */
-
-PKS_TSDU
-KsAllocateKsTsdu()
-{
- PKS_TSDU KsTsdu = NULL;
-
- spin_lock(&(ks_data.ksnd_tsdu_lock));
-
- if (!list_empty (&(ks_data.ksnd_freetsdus))) {
-
- LASSERT(ks_data.ksnd_nfreetsdus > 0);
-
- KsTsdu = list_entry(ks_data.ksnd_freetsdus.next, KS_TSDU, Link);
- list_del(&(KsTsdu->Link));
- ks_data.ksnd_nfreetsdus--;
-
- } else {
-
- KsTsdu = (PKS_TSDU) cfs_mem_cache_alloc(
- ks_data.ksnd_tsdu_slab, 0);
- }
-
- spin_unlock(&(ks_data.ksnd_tsdu_lock));
-
- if (NULL != KsTsdu) {
- KsInitializeKsTsdu(KsTsdu, ks_data.ksnd_tsdu_size);
- }
-
- return (KsTsdu);
-}
-
-
-/*
- * KsPutKsTsdu
- * Move the Tsdu to the free tsdu list in ks_data.
- *
- * Arguments:
- * KsTsdu: Tsdu to be moved.
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-VOID
-KsPutKsTsdu(
- PKS_TSDU KsTsdu
- )
-{
- spin_lock(&(ks_data.ksnd_tsdu_lock));
-
- list_add_tail( &(KsTsdu->Link), &(ks_data.ksnd_freetsdus));
- ks_data.ksnd_nfreetsdus++;
-
- spin_unlock(&(ks_data.ksnd_tsdu_lock));
-}
-
-
-/*
- * KsFreeKsTsdu
- * Release a Tsdu: uninitialize then free it.
- *
- * Arguments:
- * KsTsdu: Tsdu to be freed.
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-VOID
-KsFreeKsTsdu(
- PKS_TSDU KsTsdu
- )
-{
- cfs_mem_cache_free(
- ks_data.ksnd_tsdu_slab,
- KsTsdu );
-}
-
-
-/*
- * KsInitializeKsTsdu
- * Initialize the Tsdu buffer header
- *
- * Arguments:
- * KsTsdu: the Tsdu to be initialized
- * Length: the total length of the Tsdu
- *
- * Return Value:
- * VOID
- *
- * NOTES:
- * N/A
- */
-
-VOID
-KsInitializeKsTsdu(
- PKS_TSDU KsTsdu,
- ULONG Length
- )
-{
- RtlZeroMemory(KsTsdu, Length);
- KsTsdu->Magic = KS_TSDU_MAGIC;
- KsTsdu->TotalLength = Length;
- KsTsdu->StartOffset = KsTsdu->LastOffset =
- KS_DWORD_ALIGN(sizeof(KS_TSDU));
-}
-
-
-/*
- * KsInitializeKsTsduMgr
- * Initialize the management structure of
- * Tsdu buffers
- *
- * Arguments:
- * TsduMgr: the TsduMgr to be initialized
- *
- * Return Value:
- * VOID
- *
- * NOTES:
- * N/A
- */
-
-VOID
-KsInitializeKsTsduMgr(
- PKS_TSDUMGR TsduMgr
- )
-{
- KeInitializeEvent(
- &(TsduMgr->Event),
- NotificationEvent,
- FALSE
- );
-
- CFS_INIT_LIST_HEAD(
- &(TsduMgr->TsduList)
- );
-
- TsduMgr->NumOfTsdu = 0;
- TsduMgr->TotalBytes = 0;
-}
-
-
-/*
- * KsInitializeKsChain
- * Initialize the China structure for receiving
- * or transmitting
- *
- * Arguments:
- * KsChain: the KsChain to be initialized
- *
- * Return Value:
- * VOID
- *
- * NOTES:
- * N/A
- */
-
-VOID
-KsInitializeKsChain(
- PKS_CHAIN KsChain
- )
-{
- KsInitializeKsTsduMgr(&(KsChain->Normal));
- KsInitializeKsTsduMgr(&(KsChain->Expedited));
-}
-
-
-/*
- * KsCleanupTsduMgr
- * Clean up all the Tsdus in the TsduMgr list
- *
- * Arguments:
- * KsTsduMgr: the Tsdu list manager
- *
- * Return Value:
- * NTSTATUS: nt status code
- *
- * NOTES:
- * N/A
- */
-
-NTSTATUS
-KsCleanupTsduMgr(
- PKS_TSDUMGR KsTsduMgr
- )
-{
- PKS_TSDU KsTsdu;
- PKS_TSDU_DAT KsTsduDat;
- PKS_TSDU_BUF KsTsduBuf;
- PKS_TSDU_MDL KsTsduMdl;
-
- LASSERT(NULL != KsTsduMgr);
-
- KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
-
- while (!list_empty(&KsTsduMgr->TsduList)) {
-
- KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link);
- LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
-
- if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
-
- //
- // KsTsdu is empty now, we need free it ...
- //
-
- list_del(&(KsTsdu->Link));
- KsTsduMgr->NumOfTsdu--;
-
- KsFreeKsTsdu(KsTsdu);
-
- } else {
-
- KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
- KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
- KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
-
- if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
-
- KsTsdu->StartOffset += KsTsduDat->TotalLength;
-
- } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) {
-
- ASSERT(KsTsduBuf->UserBuffer != NULL);
-
- if (KsTsduBuf->DataLength > KsTsduBuf->StartOffset) {
- ExFreePool(KsTsduBuf->UserBuffer);
- } else {
- cfs_enter_debugger();
- }
-
- KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
-
- } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) {
-
- //
- // MDL Tsdu Unit ...
- //
-
- TdiReturnChainedReceives(
- &(KsTsduMdl->Descriptor),
- 1 );
-
- KsTsdu->StartOffset += sizeof(KS_TSDU_MDL);
- }
- }
- }
-
- return STATUS_SUCCESS;
-}
-
-
-/*
- * KsCleanupKsChain
- * Clean up the TsduMgrs of the KsChain
- *
- * Arguments:
- * KsChain: the chain managing TsduMgr
- *
- * Return Value:
- * NTSTATUS: nt status code
- *
- * NOTES:
- * N/A
- */
-
-NTSTATUS
-KsCleanupKsChain(
- PKS_CHAIN KsChain
- )
-{
- NTSTATUS Status;
-
- LASSERT(NULL != KsChain);
-
- Status = KsCleanupTsduMgr(
- &(KsChain->Normal)
- );
-
- if (!NT_SUCCESS(Status)) {
- cfs_enter_debugger();
- goto errorout;
- }
-
- Status = KsCleanupTsduMgr(
- &(KsChain->Expedited)
- );
-
- if (!NT_SUCCESS(Status)) {
- cfs_enter_debugger();
- goto errorout;
- }
-
-errorout:
-
- return Status;
-}
-
-
-/*
- * KsCleanupTsdu
- * Clean up all the Tsdus of a tdi connected object
- *
- * Arguments:
- * tconn: the tdi connection which is connected already.
- *
- * Return Value:
- * Nt status code
- *
- * NOTES:
- * N/A
- */
-
-NTSTATUS
-KsCleanupTsdu(
- ksock_tconn_t * tconn
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
-
-
- if (tconn->kstc_type != kstt_sender &&
- tconn->kstc_type != kstt_child ) {
-
- goto errorout;
- }
-
- if (tconn->kstc_type == kstt_sender) {
-
- Status = KsCleanupKsChain(
- &(tconn->sender.kstc_recv)
- );
-
- if (!NT_SUCCESS(Status)) {
- cfs_enter_debugger();
- goto errorout;
- }
-
- Status = KsCleanupKsChain(
- &(tconn->sender.kstc_send)
- );
-
- if (!NT_SUCCESS(Status)) {
- cfs_enter_debugger();
- goto errorout;
- }
-
- } else {
-
- Status = KsCleanupKsChain(
- &(tconn->child.kstc_recv)
- );
-
- if (!NT_SUCCESS(Status)) {
- cfs_enter_debugger();
- goto errorout;
- }
-
- Status = KsCleanupKsChain(
- &(tconn->child.kstc_send)
- );
-
- if (!NT_SUCCESS(Status)) {
- cfs_enter_debugger();
- goto errorout;
- }
-
- }
-
-errorout:
-
- return (Status);
-}
-
-
-/*
- * KsCopyMdlChainToMdlChain
- * Copy data from a [chained] Mdl to anther [chained] Mdl.
- * Tdi library does not provide this function. We have to
- * realize it ourselives.
- *
- * Arguments:
- * SourceMdlChain: the source mdl
- * SourceOffset: start offset of the source
- * DestinationMdlChain: the dst mdl
- * DestinationOffset: the offset where data are to be copied.
- * BytesTobecopied: the expteced bytes to be copied
- * BytesCopied: to store the really copied data length
- *
- * Return Value:
- * NTSTATUS: STATUS_SUCCESS or other error code
- *
- * NOTES:
- * The length of source mdl must be >= SourceOffset + BytesTobecopied
- */
-
-NTSTATUS
-KsCopyMdlChainToMdlChain(
- IN PMDL SourceMdlChain,
- IN ULONG SourceOffset,
- IN PMDL DestinationMdlChain,
- IN ULONG DestinationOffset,
- IN ULONG BytesTobecopied,
- OUT PULONG BytesCopied
- )
-{
- PMDL SrcMdl = SourceMdlChain;
- PMDL DstMdl = DestinationMdlChain;
-
- PUCHAR SrcBuf = NULL;
- PUCHAR DstBuf = NULL;
-
- ULONG dwBytes = 0;
-
- NTSTATUS Status = STATUS_SUCCESS;
-
-
- while (dwBytes < BytesTobecopied) {
-
- ULONG Length = 0;
-
- while (MmGetMdlByteCount(SrcMdl) <= SourceOffset) {
-
- SourceOffset -= MmGetMdlByteCount(SrcMdl);
-
- SrcMdl = SrcMdl->Next;
-
- if (NULL == SrcMdl) {
-
- Status = STATUS_INVALID_PARAMETER;
- goto errorout;
- }
- }
-
- while (MmGetMdlByteCount(DstMdl) <= DestinationOffset) {
-
- DestinationOffset -= MmGetMdlByteCount(DstMdl);
-
- DstMdl = DstMdl->Next;
-
- if (NULL == DstMdl) {
-
- Status = STATUS_INVALID_PARAMETER;
- goto errorout;
- }
- }
-
- DstBuf = (PUCHAR)KsMapMdlBuffer(DstMdl);
-
- if ((NULL == DstBuf)) {
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
- //
- // Here we need skip the OVERFLOW case via RtlCopyMemory :-(
- //
-
- if ( KsQueryMdlsSize(SrcMdl) - SourceOffset >
- MmGetMdlByteCount(DstMdl) - DestinationOffset ) {
-
- Length = BytesTobecopied - dwBytes;
-
- if (Length > KsQueryMdlsSize(SrcMdl) - SourceOffset) {
- Length = KsQueryMdlsSize(SrcMdl) - SourceOffset;
- }
-
- if (Length > MmGetMdlByteCount(DstMdl) - DestinationOffset) {
- Length = MmGetMdlByteCount(DstMdl) - DestinationOffset;
- }
-
- SrcBuf = (PUCHAR)KsMapMdlBuffer(SrcMdl);
-
- if ((NULL == DstBuf)) {
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
- RtlCopyMemory(
- DstBuf + DestinationOffset,
- SrcBuf + SourceOffset,
- Length
- );
-
- } else {
-
- Status = TdiCopyMdlToBuffer(
- SrcMdl,
- SourceOffset,
- DstBuf,
- DestinationOffset,
- MmGetMdlByteCount(DstMdl),
- &Length
- );
-
- if (STATUS_BUFFER_OVERFLOW == Status) {
- cfs_enter_debugger();
- } else if (!NT_SUCCESS(Status)) {
- cfs_enter_debugger();
- goto errorout;
- }
- }
-
- SourceOffset += Length;
- DestinationOffset += Length;
- dwBytes += Length;
- }
-
-errorout:
-
- if (NT_SUCCESS(Status)) {
- *BytesCopied = dwBytes;
- } else {
- *BytesCopied = 0;
- }
-
- return Status;
-}
-
-
-
-/*
- * KsQueryMdlSize
- * Query the whole size of a MDL (may be chained)
- *
- * Arguments:
- * Mdl: the Mdl to be queried
- *
- * Return Value:
- * ULONG: the total size of the mdl
- *
- * NOTES:
- * N/A
- */
-
-ULONG
-KsQueryMdlsSize (PMDL Mdl)
-{
- PMDL Next = Mdl;
- ULONG Length = 0;
-
-
- //
- // Walking the MDL Chain ...
- //
-
- while (Next) {
- Length += MmGetMdlByteCount(Next);
- Next = Next->Next;
- }
-
- return (Length);
-}
-
-
-/*
- * KsLockUserBuffer
- * Allocate MDL for the buffer and lock the pages into
- * nonpaged pool
- *
- * Arguments:
- * UserBuffer: the user buffer to be locked
- * Length: length in bytes of the buffer
- * Operation: read or write access
- * pMdl: the result of the created mdl
- *
- * Return Value:
- * NTSTATUS: kernel status code (STATUS_SUCCESS
- * or other error code)
- *
- * NOTES:
- * N/A
- */
-
-NTSTATUS
-KsLockUserBuffer (
- IN PVOID UserBuffer,
- IN BOOLEAN bPaged,
- IN ULONG Length,
- IN LOCK_OPERATION Operation,
- OUT PMDL * pMdl
- )
-{
- NTSTATUS Status;
- PMDL Mdl = NULL;
-
- LASSERT(UserBuffer != NULL);
-
- *pMdl = NULL;
-
- Mdl = IoAllocateMdl(
- UserBuffer,
- Length,
- FALSE,
- FALSE,
- NULL
- );
-
- if (Mdl == NULL) {
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
-
- } else {
-
- __try {
-
- if (bPaged) {
- MmProbeAndLockPages(
- Mdl,
- KernelMode,
- Operation
- );
- } else {
- MmBuildMdlForNonPagedPool(
- Mdl
- );
- }
-
- Status = STATUS_SUCCESS;
-
- *pMdl = Mdl;
-
- } __except (EXCEPTION_EXECUTE_HANDLER) {
-
- IoFreeMdl(Mdl);
-
- Mdl = NULL;
-
- cfs_enter_debugger();
-
- Status = STATUS_INVALID_USER_BUFFER;
- }
- }
-
- return Status;
-}
-
-/*
- * KsMapMdlBuffer
- * Map the mdl into a buffer in kernel space
- *
- * Arguments:
- * Mdl: the mdl to be mapped
- *
- * Return Value:
- * PVOID: the buffer mapped or NULL in failure
- *
- * NOTES:
- * N/A
- */
-
-PVOID
-KsMapMdlBuffer (PMDL Mdl)
-{
- LASSERT(Mdl != NULL);
-
- return MmGetSystemAddressForMdlSafe(
- Mdl,
- NormalPagePriority
- );
-}
-
-
-/*
- * KsReleaseMdl
- * Unlock all the pages in the mdl
- *
- * Arguments:
- * Mdl: memory description list to be released
- *
- * Return Value:
- * N/A
- *
- * NOTES:
- * N/A
- */
-
-VOID
-KsReleaseMdl (IN PMDL Mdl,
- IN int Paged )
-{
- LASSERT(Mdl != NULL);
-
- while (Mdl) {
-
- PMDL Next;
-
- Next = Mdl->Next;
-
- if (Paged) {
- MmUnlockPages(Mdl);
- }
-
- IoFreeMdl(Mdl);
-
- Mdl = Next;
- }
-}
-
-
-/*
- * ks_lock_buffer
- * allocate MDL for the user spepcified buffer and lock (paging-in)
- * all the pages of the buffer into system memory
- *
- * Arguments:
- * buffer: the user buffer to be locked
- * length: length in bytes of the buffer
- * access: read or write access
- * mdl: the result of the created mdl
- *
- * Return Value:
- * int: the ks error code: 0: success / -x: failture
- *
- * Notes:
- * N/A
- */
-
-int
-ks_lock_buffer (
- void * buffer,
- int paged,
- int length,
- LOCK_OPERATION access,
- ksock_mdl_t ** kmdl
- )
-{
- NTSTATUS status;
-
- status = KsLockUserBuffer(
- buffer,
- paged !=0,
- length,
- access,
- kmdl
- );
-
- return cfs_error_code(status);
-}
-
-
-/*
- * ks_map_mdl
- * Map the mdl pages into kernel space
- *
- * Arguments:
- * mdl: the mdl to be mapped
- *
- * Return Value:
- * void *: the buffer mapped or NULL in failure
- *
- * Notes:
- * N/A
- */
-
-void *
-ks_map_mdl (ksock_mdl_t * mdl)
-{
- LASSERT(mdl != NULL);
-
- return KsMapMdlBuffer(mdl);
-}
-
-/*
- * ks_release_mdl
- * Unlock all the pages in the mdl and release the mdl
- *
- * Arguments:
- * mdl: memory description list to be released
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void
-ks_release_mdl (ksock_mdl_t *mdl, int paged)
-{
- LASSERT(mdl != NULL);
-
- KsReleaseMdl(mdl, paged);
-}
-
-
-/*
- * ks_create_tconn
- * allocate a new tconn structure from the SLAB cache or
- * NonPaged sysetm pool
- *
- * Arguments:
- * N/A
- *
- * Return Value:
- * ksock_tconn_t *: the address of tconn or NULL if it fails
- *
- * NOTES:
- * N/A
- */
-
-ksock_tconn_t *
-ks_create_tconn()
-{
- ksock_tconn_t * tconn = NULL;
-
- /* allocate ksoc_tconn_t from the slab cache memory */
-
- tconn = (ksock_tconn_t *)cfs_mem_cache_alloc(
- ks_data.ksnd_tconn_slab, CFS_ALLOC_ZERO);
-
- if (tconn) {
-
- /* zero tconn elements */
- memset(tconn, 0, sizeof(ksock_tconn_t));
-
- /* initialize the tconn ... */
- tconn->kstc_magic = KS_TCONN_MAGIC;
-
- ExInitializeWorkItem(
- &(tconn->kstc_disconnect.WorkItem),
- KsDisconnectHelper,
- &(tconn->kstc_disconnect)
- );
-
- KeInitializeEvent(
- &(tconn->kstc_disconnect.Event),
- SynchronizationEvent,
- FALSE );
-
- ExInitializeWorkItem(
- &(tconn->kstc_destroy),
- ks_destroy_tconn,
- tconn
- );
-
- spin_lock_init(&(tconn->kstc_lock));
-
- ks_get_tconn(tconn);
-
- spin_lock(&(ks_data.ksnd_tconn_lock));
-
- /* attach it into global list in ks_data */
-
- list_add(&(tconn->kstc_list), &(ks_data.ksnd_tconns));
- ks_data.ksnd_ntconns++;
- spin_unlock(&(ks_data.ksnd_tconn_lock));
-
- tconn->kstc_rcv_wnd = tconn->kstc_snd_wnd = 0x10000;
- }
-
- return (tconn);
-}
-
-
-/*
- * ks_free_tconn
- * free the tconn structure to the SLAB cache or NonPaged
- * sysetm pool
- *
- * Arguments:
- * tconn: the tcon is to be freed
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void
-ks_free_tconn(ksock_tconn_t * tconn)
-{
- LASSERT(atomic_read(&(tconn->kstc_refcount)) == 0);
-
- spin_lock(&(ks_data.ksnd_tconn_lock));
-
- /* remove it from the global list */
- list_del(&tconn->kstc_list);
- ks_data.ksnd_ntconns--;
-
- /* if this is the last tconn, it would be safe for
- ks_tdi_fini_data to quit ... */
- if (ks_data.ksnd_ntconns == 0) {
- cfs_wake_event(&ks_data.ksnd_tconn_exit);
- }
- spin_unlock(&(ks_data.ksnd_tconn_lock));
-
- /* free the structure memory */
- cfs_mem_cache_free(ks_data.ksnd_tconn_slab, tconn);
-}
-
-
-/*
- * ks_init_listener
- * Initialize the tconn as a listener (daemon)
- *
- * Arguments:
- * tconn: the listener tconn
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void
-ks_init_listener(
- ksock_tconn_t * tconn
- )
-{
- /* preparation: intialize the tconn members */
-
- tconn->kstc_type = kstt_listener;
-
- RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
-
- CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_listening.list));
- CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_accepted.list));
-
- cfs_init_event( &(tconn->listener.kstc_accept_event),
- TRUE,
- FALSE );
-
- cfs_init_event( &(tconn->listener.kstc_destroy_event),
- TRUE,
- FALSE );
-
- tconn->kstc_state = ksts_inited;
-}
-
-
-/*
- * ks_init_sender
- * Initialize the tconn as a sender
- *
- * Arguments:
- * tconn: the sender tconn
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void
-ks_init_sender(
- ksock_tconn_t * tconn
- )
-{
- tconn->kstc_type = kstt_sender;
- RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
-
- KsInitializeKsChain(&(tconn->sender.kstc_recv));
- KsInitializeKsChain(&(tconn->sender.kstc_send));
-
- tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
- tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
-
- tconn->kstc_state = ksts_inited;
-}
-
-/*
- * ks_init_child
- * Initialize the tconn as a child
- *
- * Arguments:
- * tconn: the child tconn
- *
- * Return Value:
- * N/A
- *
- * NOTES:
- * N/A
- */
-
-void
-ks_init_child(
- ksock_tconn_t * tconn
- )
-{
- tconn->kstc_type = kstt_child;
- RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
-
- KsInitializeKsChain(&(tconn->child.kstc_recv));
- KsInitializeKsChain(&(tconn->child.kstc_send));
-
- tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
- tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
-
- tconn->kstc_state = ksts_inited;
-}
-
-/*
- * ks_get_tconn
- * increase the reference count of the tconn with 1
- *
- * Arguments:
- * tconn: the tdi connection to be referred
- *
- * Return Value:
- * N/A
- *
- * NOTES:
- * N/A
- */
-
-void
-ks_get_tconn(
- ksock_tconn_t * tconn
- )
-{
- atomic_inc(&(tconn->kstc_refcount));
-}
-
-/*
- * ks_put_tconn
- * decrease the reference count of the tconn and destroy
- * it if the refercount becomes 0.
- *
- * Arguments:
- * tconn: the tdi connection to be dereferred
- *
- * Return Value:
- * N/A
- *
- * NOTES:
- * N/A
- */
-
-void
-ks_put_tconn(
- ksock_tconn_t *tconn
- )
-{
- if (atomic_dec_and_test(&(tconn->kstc_refcount))) {
-
- spin_lock(&(tconn->kstc_lock));
-
- if ( ( tconn->kstc_type == kstt_child ||
- tconn->kstc_type == kstt_sender ) &&
- ( tconn->kstc_state == ksts_connected ) ) {
-
- spin_unlock(&(tconn->kstc_lock));
-
- ks_abort_tconn(tconn);
-
- } else {
-
- if (cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY)) {
- cfs_enter_debugger();
- } else {
- ExQueueWorkItem(
- &(tconn->kstc_destroy),
- DelayedWorkQueue
- );
-
- cfs_set_flag(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY);
- }
-
- spin_unlock(&(tconn->kstc_lock));
- }
- }
-}
-
-/*
- * ks_destroy_tconn
- * cleanup the tdi connection and free it
- *
- * Arguments:
- * tconn: the tdi connection to be cleaned.
- *
- * Return Value:
- * N/A
- *
- * NOTES:
- * N/A
- */
-
-void
-ks_destroy_tconn(
- ksock_tconn_t * tconn
- )
-{
- LASSERT(tconn->kstc_refcount.counter == 0);
-
- if (tconn->kstc_type == kstt_listener) {
-
- ks_reset_handlers(tconn);
-
- /* for listener, we just need to close the address object */
- KsCloseAddress(
- tconn->kstc_addr.Handle,
- tconn->kstc_addr.FileObject
- );
-
- tconn->kstc_state = ksts_inited;
-
- } else if (tconn->kstc_type == kstt_child) {
-
- /* for child tdi conections */
-
- /* disassociate the relation between it's connection object
- and the address object */
-
- if (tconn->kstc_state == ksts_associated) {
- KsDisassociateAddress(
- tconn->child.kstc_info.FileObject
- );
- }
-
- /* release the connection object */
-
- KsCloseConnection(
- tconn->child.kstc_info.Handle,
- tconn->child.kstc_info.FileObject
- );
-
- /* release it's refer of it's parent's address object */
- KsCloseAddress(
- NULL,
- tconn->kstc_addr.FileObject
- );
-
- spin_lock(&tconn->child.kstc_parent->kstc_lock);
- spin_lock(&tconn->kstc_lock);
-
- tconn->kstc_state = ksts_inited;
-
- /* remove it frome it's parent's queues */
-
- if (tconn->child.kstc_queued) {
-
- list_del(&(tconn->child.kstc_link));
-
- if (tconn->child.kstc_queueno) {
-
- LASSERT(tconn->child.kstc_parent->listener.kstc_accepted.num > 0);
- tconn->child.kstc_parent->listener.kstc_accepted.num -= 1;
-
- } else {
-
- LASSERT(tconn->child.kstc_parent->listener.kstc_listening.num > 0);
- tconn->child.kstc_parent->listener.kstc_listening.num -= 1;
- }
-
- tconn->child.kstc_queued = FALSE;
- }
-
- spin_unlock(&tconn->kstc_lock);
- spin_unlock(&tconn->child.kstc_parent->kstc_lock);
-
- /* drop the reference of the parent tconn */
- ks_put_tconn(tconn->child.kstc_parent);
-
- } else if (tconn->kstc_type == kstt_sender) {
-
- ks_reset_handlers(tconn);
-
- /* release the connection object */
-
- KsCloseConnection(
- tconn->sender.kstc_info.Handle,
- tconn->sender.kstc_info.FileObject
- );
-
- /* release it's refer of it's parent's address object */
- KsCloseAddress(
- tconn->kstc_addr.Handle,
- tconn->kstc_addr.FileObject
- );
-
- tconn->kstc_state = ksts_inited;
-
- } else {
- cfs_enter_debugger();
- }
-
- /* free the tconn structure ... */
-
- ks_free_tconn(tconn);
-}
-
-int
-ks_query_data(
- ksock_tconn_t * tconn,
- size_t * size,
- int bIsExpedited )
-{
- int rc = 0;
-
- PKS_CHAIN KsChain;
- PKS_TSDUMGR KsTsduMgr;
-
- *size = 0;
-
- ks_get_tconn(tconn);
- spin_lock(&(tconn->kstc_lock));
-
- if ( tconn->kstc_type != kstt_sender &&
- tconn->kstc_type != kstt_child) {
- rc = -EINVAL;
- spin_unlock(&(tconn->kstc_lock));
- goto errorout;
- }
-
- if (tconn->kstc_state != ksts_connected) {
- rc = -ENOTCONN;
- spin_unlock(&(tconn->kstc_lock));
- goto errorout;
- }
-
- if (tconn->kstc_type == kstt_sender) {
- KsChain = &(tconn->sender.kstc_recv);
- } else {
- LASSERT(tconn->kstc_type == kstt_child);
- KsChain = &(tconn->child.kstc_recv);
- }
-
- if (bIsExpedited) {
- KsTsduMgr = &(KsChain->Expedited);
- } else {
- KsTsduMgr = &(KsChain->Normal);
- }
-
- *size = KsTsduMgr->TotalBytes;
- spin_unlock(&(tconn->kstc_lock));
-
-errorout:
-
- ks_put_tconn(tconn);
-
- return (rc);
-}
-
-/*
- * ks_get_tcp_option
- * Query the the options of the tcp stream connnection
- *
- * Arguments:
- * tconn: the tdi connection
- * ID: option id
- * OptionValue: buffer to store the option value
- * Length: the length of the value, to be returned
- *
- * Return Value:
- * int: ks return code
- *
- * NOTES:
- * N/A
- */
-
-int
-ks_get_tcp_option (
- ksock_tconn_t * tconn,
- ULONG ID,
- PVOID OptionValue,
- PULONG Length
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
-
- IO_STATUS_BLOCK IoStatus;
-
- TCP_REQUEST_QUERY_INFORMATION_EX QueryInfoEx;
-
- PFILE_OBJECT ConnectionObject;
- PDEVICE_OBJECT DeviceObject = NULL;
-
- PIRP Irp = NULL;
- PIO_STACK_LOCATION IrpSp = NULL;
-
- KEVENT Event;
-
- /* make sure the tdi connection is connected ? */
-
- ks_get_tconn(tconn);
-
- if (tconn->kstc_state != ksts_connected) {
- Status = STATUS_INVALID_PARAMETER;
- goto errorout;
- }
-
- LASSERT(tconn->kstc_type == kstt_sender ||
- tconn->kstc_type == kstt_child);
-
- if (tconn->kstc_type == kstt_sender) {
- ConnectionObject = tconn->sender.kstc_info.FileObject;
- } else {
- ConnectionObject = tconn->child.kstc_info.FileObject;
- }
-
- QueryInfoEx.ID.toi_id = ID;
- QueryInfoEx.ID.toi_type = INFO_TYPE_CONNECTION;
- QueryInfoEx.ID.toi_class = INFO_CLASS_PROTOCOL;
- QueryInfoEx.ID.toi_entity.tei_entity = CO_TL_ENTITY;
- QueryInfoEx.ID.toi_entity.tei_instance = 0;
-
- RtlZeroMemory(&(QueryInfoEx.Context), CONTEXT_SIZE);
-
- KeInitializeEvent(&Event, NotificationEvent, FALSE);
- DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
-
- Irp = IoBuildDeviceIoControlRequest(
- IOCTL_TCP_QUERY_INFORMATION_EX,
- DeviceObject,
- &QueryInfoEx,
- sizeof(TCP_REQUEST_QUERY_INFORMATION_EX),
- OptionValue,
- *Length,
- FALSE,
- &Event,
- &IoStatus
- );
-
- if (Irp == NULL) {
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
- IrpSp = IoGetNextIrpStackLocation(Irp);
-
- if (IrpSp == NULL) {
-
- IoFreeIrp(Irp);
- Irp = NULL;
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
- IrpSp->FileObject = ConnectionObject;
- IrpSp->DeviceObject = DeviceObject;
-
- Status = IoCallDriver(DeviceObject, Irp);
-
- if (Status == STATUS_PENDING) {
-
- KeWaitForSingleObject(
- &Event,
- Executive,
- KernelMode,
- FALSE,
- NULL
- );
-
- Status = IoStatus.Status;
- }
-
-
- if (NT_SUCCESS(Status)) {
- *Length = IoStatus.Information;
- } else {
- cfs_enter_debugger();
- memset(OptionValue, 0, *Length);
- Status = STATUS_SUCCESS;
- }
-
-errorout:
-
- ks_put_tconn(tconn);
-
- return cfs_error_code(Status);
-}
-
-/*
- * ks_set_tcp_option
- * Set the the options for the tcp stream connnection
- *
- * Arguments:
- * tconn: the tdi connection
- * ID: option id
- * OptionValue: buffer containing the new option value
- * Length: the length of the value
- *
- * Return Value:
- * int: ks return code
- *
- * NOTES:
- * N/A
- */
-
-NTSTATUS
-ks_set_tcp_option (
- ksock_tconn_t * tconn,
- ULONG ID,
- PVOID OptionValue,
- ULONG Length
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
-
- IO_STATUS_BLOCK IoStatus;
-
- ULONG SetInfoExLength;
- PTCP_REQUEST_SET_INFORMATION_EX SetInfoEx = NULL;
-
- PFILE_OBJECT ConnectionObject;
- PDEVICE_OBJECT DeviceObject = NULL;
-
- PIRP Irp = NULL;
- PIO_STACK_LOCATION IrpSp = NULL;
-
- PKEVENT Event;
-
- /* make sure the tdi connection is connected ? */
-
- ks_get_tconn(tconn);
-
- if (tconn->kstc_state != ksts_connected) {
- Status = STATUS_INVALID_PARAMETER;
- goto errorout;
- }
-
- LASSERT(tconn->kstc_type == kstt_sender ||
- tconn->kstc_type == kstt_child);
-
- if (tconn->kstc_type == kstt_sender) {
- ConnectionObject = tconn->sender.kstc_info.FileObject;
- } else {
- ConnectionObject = tconn->child.kstc_info.FileObject;
- }
-
- SetInfoExLength = sizeof(TCP_REQUEST_SET_INFORMATION_EX) - 1 + Length + sizeof(KEVENT);
-
- SetInfoEx = ExAllocatePoolWithTag(
- NonPagedPool,
- SetInfoExLength,
- 'TSSK'
- );
-
- if (SetInfoEx == NULL) {
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
- SetInfoEx->ID.toi_id = ID;
-
- SetInfoEx->ID.toi_type = INFO_TYPE_CONNECTION;
- SetInfoEx->ID.toi_class = INFO_CLASS_PROTOCOL;
- SetInfoEx->ID.toi_entity.tei_entity = CO_TL_ENTITY;
- SetInfoEx->ID.toi_entity.tei_instance = TL_INSTANCE;
-
- SetInfoEx->BufferSize = Length;
- RtlCopyMemory(&(SetInfoEx->Buffer[0]), OptionValue, Length);
-
- Event = (PKEVENT)(&(SetInfoEx->Buffer[Length]));
- KeInitializeEvent(Event, NotificationEvent, FALSE);
-
- DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
-
- Irp = IoBuildDeviceIoControlRequest(
- IOCTL_TCP_SET_INFORMATION_EX,
- DeviceObject,
- SetInfoEx,
- SetInfoExLength,
- NULL,
- 0,
- FALSE,
- Event,
- &IoStatus
- );
-
- if (Irp == NULL) {
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
- IrpSp = IoGetNextIrpStackLocation(Irp);
-
- if (IrpSp == NULL) {
- IoFreeIrp(Irp);
- Irp = NULL;
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
- IrpSp->FileObject = ConnectionObject;
- IrpSp->DeviceObject = DeviceObject;
-
- Status = IoCallDriver(DeviceObject, Irp);
-
- if (Status == STATUS_PENDING) {
-
- KeWaitForSingleObject(
- Event,
- Executive,
- KernelMode,
- FALSE,
- NULL
- );
-
- Status = IoStatus.Status;
- }
-
-errorout:
-
- if (SetInfoEx) {
- ExFreePool(SetInfoEx);
- }
-
- if (!NT_SUCCESS(Status)) {
- printk("ks_set_tcp_option: error setup tcp option: ID (%d), Status = %xh\n",
- ID, Status);
- Status = STATUS_SUCCESS;
- }
-
- ks_put_tconn(tconn);
-
- return cfs_error_code(Status);
-}
-
-/*
- * ks_bind_tconn
- * bind the tdi connection object with an address
- *
- * Arguments:
- * tconn: tconn to be bound
- * parent: the parent tconn object
- * ipaddr: the ip address
- * port: the port number
- *
- * Return Value:
- * int: 0 for success or ks error codes.
- *
- * NOTES:
- * N/A
- */
-
-int
-ks_bind_tconn (
- ksock_tconn_t * tconn,
- ksock_tconn_t * parent,
- ulong_ptr addr,
- unsigned short port
- )
-{
- NTSTATUS status;
- int rc = 0;
-
- ksock_tdi_addr_t taddr;
-
- memset(&taddr, 0, sizeof(ksock_tdi_addr_t));
-
- if (tconn->kstc_state != ksts_inited) {
-
- status = STATUS_INVALID_PARAMETER;
- rc = cfs_error_code(status);
-
- goto errorout;
-
- } else if (tconn->kstc_type == kstt_child) {
-
- if (NULL == parent) {
- status = STATUS_INVALID_PARAMETER;
- rc = cfs_error_code(status);
-
- goto errorout;
- }
-
- /* refer it's parent's address object */
-
- taddr = parent->kstc_addr;
- ObReferenceObject(taddr.FileObject);
-
- ks_get_tconn(parent);
-
- } else {
-
- PTRANSPORT_ADDRESS TdiAddress = &(taddr.Tdi);
- ULONG AddrLen = 0;
-
- /* intialize the tdi address*/
-
- TdiAddress->TAAddressCount = 1;
- TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP;
- TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP;
-
- ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port);
- ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr);
-
- memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8);
-
-
- /* open the transport address object */
-
- AddrLen = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) +
- TDI_ADDRESS_LENGTH_IP;
-
- status = KsOpenAddress(
- &(tconn->kstc_dev),
- &(taddr.Tdi),
- AddrLen,
- &(taddr.Handle),
- &(taddr.FileObject)
- );
-
- if (!NT_SUCCESS(status)) {
-
- KsPrint((0, "ks_bind_tconn: failed to open ip addr object (%x:%d), status = %xh\n",
- addr, port, status ));
- rc = cfs_error_code(status);
- goto errorout;
- }
- }
-
- if (tconn->kstc_type == kstt_child) {
- tconn->child.kstc_parent = parent;
- }
-
- tconn->kstc_state = ksts_bind;
- tconn->kstc_addr = taddr;
-
-errorout:
-
- return (rc);
-}
-
-/*
- * ks_build_tconn
- * build tcp/streaming connection to remote peer
- *
- * Arguments:
- * tconn: tconn to be connected to the peer
- * addr: the peer's ip address
- * port: the peer's port number
- *
- * Return Value:
- * int: 0 for success or ks error codes.
- *
- * Notes:
- * N/A
- */
-
-int
-ks_build_tconn(
- ksock_tconn_t * tconn,
- ulong_ptr addr,
- unsigned short port
- )
-{
- int rc = 0;
- NTSTATUS status = STATUS_SUCCESS;
-
-
- PFILE_OBJECT ConnectionObject = NULL;
- PDEVICE_OBJECT DeviceObject = NULL;
-
- PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL;
- ULONG AddrLength;
-
- PIRP Irp = NULL;
-
- LASSERT(tconn->kstc_type == kstt_sender);
- LASSERT(tconn->kstc_state == ksts_bind);
-
- ks_get_tconn(tconn);
-
- {
- /* set the event callbacks */
- rc = ks_set_handlers(tconn);
-
- if (rc < 0) {
- cfs_enter_debugger();
- goto errorout;
- }
- }
-
- /* create the connection file handle / object */
- status = KsOpenConnection(
- &(tconn->kstc_dev),
- (CONNECTION_CONTEXT)tconn,
- &(tconn->sender.kstc_info.Handle),
- &(tconn->sender.kstc_info.FileObject)
- );
-
- if (!NT_SUCCESS(status)) {
- rc = cfs_error_code(status);
- cfs_enter_debugger();
- goto errorout;
- }
-
- /* associdate the the connection with the adress object of the tconn */
-
- status = KsAssociateAddress(
- tconn->kstc_addr.Handle,
- tconn->sender.kstc_info.FileObject
- );
-
- if (!NT_SUCCESS(status)) {
- rc = cfs_error_code(status);
- cfs_enter_debugger();
- goto errorout;
- }
-
- tconn->kstc_state = ksts_associated;
-
- /* Allocating Connection Info Together with the Address */
- AddrLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address)
- + TDI_ADDRESS_LENGTH_IP;
-
- ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag(
- NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + AddrLength, 'iCsK');
-
- if (NULL == ConnectionInfo) {
-
- status = STATUS_INSUFFICIENT_RESOURCES;
- rc = cfs_error_code(status);
- cfs_enter_debugger();
- goto errorout;
- }
-
- /* Initializing ConnectionInfo ... */
- {
- PTRANSPORT_ADDRESS TdiAddress;
-
- /* ConnectionInfo settings */
-
- ConnectionInfo->UserDataLength = 0;
- ConnectionInfo->UserData = NULL;
- ConnectionInfo->OptionsLength = 0;
- ConnectionInfo->Options = NULL;
- ConnectionInfo->RemoteAddressLength = AddrLength;
- ConnectionInfo->RemoteAddress = ConnectionInfo + 1;
-
-
- /* intialize the tdi address*/
-
- TdiAddress = ConnectionInfo->RemoteAddress;
-
- TdiAddress->TAAddressCount = 1;
- TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP;
- TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP;
-
- ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port);
- ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr);
-
- memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8);
- }
-
- /* Now prepare to connect the remote peer ... */
-
- ConnectionObject = tconn->sender.kstc_info.FileObject;
- DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
-
- /* allocate a new Irp */
-
- Irp = KsBuildTdiIrp(DeviceObject);
-
- if (NULL == Irp) {
-
- status = STATUS_INSUFFICIENT_RESOURCES;
- rc = cfs_error_code(status);
- cfs_enter_debugger();
- goto errorout;
- }
-
- /* setup the Irp */
-
- TdiBuildConnect(
- Irp,
- DeviceObject,
- ConnectionObject,
- NULL,
- NULL,
- NULL,
- ConnectionInfo,
- NULL
- );
-
-
- /* sumbit the Irp to the underlying transport driver */
- status = KsSubmitTdiIrp(
- DeviceObject,
- Irp,
- TRUE,
- NULL
- );
-
- spin_lock(&(tconn->kstc_lock));
-
- if (NT_SUCCESS(status)) {
-
- /* Connected! the conneciton is built successfully. */
-
- tconn->kstc_state = ksts_connected;
-
- tconn->sender.kstc_info.ConnectionInfo = ConnectionInfo;
- tconn->sender.kstc_info.Remote = ConnectionInfo->RemoteAddress;
-
- spin_unlock(&(tconn->kstc_lock));
-
- } else {
-
- /* Not connected! Abort it ... */
-
- if (rc != 0) {
- cfs_enter_debugger();
- }
-
- Irp = NULL;
- rc = cfs_error_code(status);
-
- tconn->kstc_state = ksts_associated;
- spin_unlock(&(tconn->kstc_lock));
-
- /* disassocidate the connection and the address object,
- after cleanup, it's safe to set the state to abort ... */
-
- if ( NT_SUCCESS(KsDisassociateAddress(
- tconn->sender.kstc_info.FileObject))) {
- tconn->kstc_state = ksts_aborted;
- }
-
- /* reset the event callbacks */
- rc = ks_reset_handlers(tconn);
-
- goto errorout;
- }
-
-errorout:
-
- if (NT_SUCCESS(status)) {
-
- ks_query_local_ipaddr(tconn);
-
- } else {
-
- if (ConnectionInfo) {
- ExFreePool(ConnectionInfo);
- }
- if (Irp) {
- IoFreeIrp(Irp);
- }
- }
-
- ks_put_tconn(tconn);
-
- return (rc);
-}
-
-
-/*
- * ks_disconnect_tconn
- * disconnect the tconn from a connection
- *
- * Arguments:
- * tconn: the tdi connecton object connected already
- * flags: flags & options for disconnecting
- *
- * Return Value:
- * int: ks error code
- *
- * Notes:
- * N/A
- */
-
-int
-ks_disconnect_tconn(
- ksock_tconn_t * tconn,
- ulong_ptr flags
- )
-{
- NTSTATUS status = STATUS_SUCCESS;
-
- ksock_tconn_info_t * info;
-
- PFILE_OBJECT ConnectionObject;
- PDEVICE_OBJECT DeviceObject = NULL;
-
- PIRP Irp = NULL;
-
- KEVENT Event;
-
- ks_get_tconn(tconn);
-
- /* make sure tt's connected already and it
- must be a sender or a child ... */
-
- LASSERT(tconn->kstc_state == ksts_connected);
- LASSERT( tconn->kstc_type == kstt_sender ||
- tconn->kstc_type == kstt_child);
-
- /* reset all the event handlers to NULL */
-
- if (tconn->kstc_type != kstt_child) {
- ks_reset_handlers (tconn);
- }
-
- /* Disconnecting to the remote peer ... */
-
- if (tconn->kstc_type == kstt_sender) {
- info = &(tconn->sender.kstc_info);
- } else {
- info = &(tconn->child.kstc_info);
- }
-
- ConnectionObject = info->FileObject;
- DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
-
- /* allocate an Irp and setup it */
-
- Irp = KsBuildTdiIrp(DeviceObject);
-
- if (NULL == Irp) {
-
- status = STATUS_INSUFFICIENT_RESOURCES;
- cfs_enter_debugger();
- goto errorout;
- }
-
- KeInitializeEvent(
- &Event,
- SynchronizationEvent,
- FALSE
- );
-
- TdiBuildDisconnect(
- Irp,
- DeviceObject,
- ConnectionObject,
- KsDisconectCompletionRoutine,
- &Event,
- NULL,
- flags,
- NULL,
- NULL
- );
-
- /* issue the Irp to the underlying transport
- driver to disconnect the connection */
-
- status = IoCallDriver(DeviceObject, Irp);
-
- if (STATUS_PENDING == status) {
-
- status = KeWaitForSingleObject(
- &Event,
- Executive,
- KernelMode,
- FALSE,
- NULL
- );
-
- status = Irp->IoStatus.Status;
- }
-
- KsPrint((2, "KsDisconnect: Disconnection is done with Status = %xh (%s) ...\n",
- status, KsNtStatusToString(status)));
-
- IoFreeIrp(Irp);
-
- if (info->ConnectionInfo) {
-
- /* disassociate the association between connection/address objects */
-
- status = KsDisassociateAddress(ConnectionObject);
-
- if (!NT_SUCCESS(status)) {
- cfs_enter_debugger();
- }
-
- spin_lock(&(tconn->kstc_lock));
-
- /* cleanup the tsdumgr Lists */
- KsCleanupTsdu (tconn);
-
- /* set the state of the tconn */
- if (NT_SUCCESS(status)) {
- tconn->kstc_state = ksts_disconnected;
- } else {
- tconn->kstc_state = ksts_associated;
- }
-
- /* free the connection info to system pool*/
- ExFreePool(info->ConnectionInfo);
- info->ConnectionInfo = NULL;
- info->Remote = NULL;
-
- spin_unlock(&(tconn->kstc_lock));
- }
-
- status = STATUS_SUCCESS;
-
-errorout:
-
- ks_put_tconn(tconn);
-
- return cfs_error_code(status);
-}
-
-
-/*
- * ks_abort_tconn
- * The connection is broken un-expectedly. We need do
- * some cleanup.
- *
- * Arguments:
- * tconn: the tdi connection
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void
-ks_abort_tconn(
- ksock_tconn_t * tconn
- )
-{
- PKS_DISCONNECT_WORKITEM WorkItem = NULL;
-
- WorkItem = &(tconn->kstc_disconnect);
-
- ks_get_tconn(tconn);
- spin_lock(&(tconn->kstc_lock));
-
- if (tconn->kstc_state != ksts_connected) {
- ks_put_tconn(tconn);
- } else {
-
- if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) {
-
- WorkItem->Flags = TDI_DISCONNECT_ABORT;
- WorkItem->tconn = tconn;
-
- cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
-
- ExQueueWorkItem(
- &(WorkItem->WorkItem),
- DelayedWorkQueue
- );
- }
- }
-
- spin_unlock(&(tconn->kstc_lock));
-}
-
-
-/*
- * ks_query_local_ipaddr
- * query the local connection ip address
- *
- * Arguments:
- * tconn: the tconn which is connected
- *
- * Return Value:
- * int: ks error code
- *
- * Notes:
- * N/A
- */
-
-int
-ks_query_local_ipaddr(
- ksock_tconn_t * tconn
- )
-{
- PFILE_OBJECT FileObject = NULL;
- NTSTATUS status;
-
- PTRANSPORT_ADDRESS TdiAddress;
- ULONG AddressLength;
-
- if (tconn->kstc_type == kstt_sender) {
- FileObject = tconn->sender.kstc_info.FileObject;
- } else if (tconn->kstc_type == kstt_child) {
- FileObject = tconn->child.kstc_info.FileObject;
- } else {
- status = STATUS_INVALID_PARAMETER;
- goto errorout;
- }
-
- TdiAddress = &(tconn->kstc_addr.Tdi);
- AddressLength = MAX_ADDRESS_LENGTH;
-
- status = KsQueryIpAddress(FileObject, TdiAddress, &AddressLength);
-
- if (NT_SUCCESS(status)) {
-
- KsPrint((0, "ks_query_local_ipaddr: Local ip address = %xh port = %xh\n",
- ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr,
- ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->sin_port ));
- } else {
- KsPrint((0, "KsQueryonnectionIpAddress: Failed to query the connection local ip address.\n"));
- }
-
-errorout:
-
- return cfs_error_code(status);
-}
-
-/*
- * ks_send_mdl
- * send MDL chain to the peer for a stream connection
- *
- * Arguments:
- * tconn: tdi connection object
- * tx: the transmit context
- * mdl: the mdl chain containing the data
- * len: length of the data
- * flags: flags of the transmission
- *
- * Return Value:
- * ks return code
- *
- * Notes:
- * N/A
- */
-
-int
-ks_send_mdl(
- ksock_tconn_t * tconn,
- void * tx,
- ksock_mdl_t * mdl,
- int len,
- int flags
- )
-{
- NTSTATUS Status;
- int rc = 0;
- ulong_ptr length;
- ulong_ptr tflags;
- ksock_tdi_tx_t * context;
-
- PKS_CHAIN KsChain;
- PKS_TSDUMGR KsTsduMgr;
- PKS_TSDU KsTsdu;
- PKS_TSDU_BUF KsTsduBuf;
- PKS_TSDU_DAT KsTsduDat;
-
- BOOLEAN bNewTsdu = FALSE; /* newly allocated */
- BOOLEAN bNewBuff = FALSE; /* newly allocated */
-
- BOOLEAN bBuffed; /* bufferred sending */
-
- PUCHAR Buffer = NULL;
- ksock_mdl_t * NewMdl = NULL;
-
- PIRP Irp = NULL;
- PFILE_OBJECT ConnObject;
- PDEVICE_OBJECT DeviceObject;
-
- BOOLEAN bIsNonBlock;
-
- ks_get_tconn(tconn);
-
- tflags = ks_tdi_send_flags(flags);
- bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT);
-
- spin_lock(&tconn->kstc_lock);
-
- LASSERT( tconn->kstc_type == kstt_sender ||
- tconn->kstc_type == kstt_child );
-
- if (tconn->kstc_state != ksts_connected) {
- spin_unlock(&tconn->kstc_lock);
- ks_put_tconn(tconn);
- return -ENOTCONN;
- }
-
- /* get the latest Tsdu buffer form TsduMgr list.
- just set NULL if the list is empty. */
-
- if (tconn->kstc_type == kstt_sender) {
- KsChain = &(tconn->sender.kstc_send);
- } else {
- LASSERT(tconn->kstc_type == kstt_child);
- KsChain = &(tconn->child.kstc_send);
- }
-
- if (cfs_is_flag_set(tflags, TDI_SEND_EXPEDITED)) {
- KsTsduMgr = &(KsChain->Expedited);
- } else {
- KsTsduMgr = &(KsChain->Normal);
- }
-
- if (KsTsduMgr->TotalBytes + len <= tconn->kstc_snd_wnd) {
- bBuffed = TRUE;
- } else {
- bBuffed = FALSE;
- }
-
- /* do the preparation work for bufferred sending */
-
- if (bBuffed) {
-
- /* if the data is even larger than the biggest Tsdu, we have
- to allocate new buffer and use TSDU_TYOE_BUF to store it */
-
- if ( KS_TSDU_STRU_SIZE((ULONG)len) > ks_data.ksnd_tsdu_size
- - KS_DWORD_ALIGN(sizeof(KS_TSDU))) {
- bNewBuff = TRUE;
- }
-
- if (list_empty(&(KsTsduMgr->TsduList))) {
-
- LASSERT(KsTsduMgr->NumOfTsdu == 0);
- KsTsdu = NULL;
-
- } else {
-
- LASSERT(KsTsduMgr->NumOfTsdu > 0);
- KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
- LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
-
-
- /* check whether KsTsdu free space is enough, or we need alloc new Tsdu */
- if (bNewBuff) {
- if (sizeof(KS_TSDU_BUF) + KsTsdu->LastOffset > KsTsdu->TotalLength) {
- KsTsdu = NULL;
- }
- } else {
- if ( KS_TSDU_STRU_SIZE((ULONG)len) >
- KsTsdu->TotalLength - KsTsdu->LastOffset ) {
- KsTsdu = NULL;
- }
- }
- }
-
- /* if there's no Tsdu or the free size is not enough for the
- KS_TSDU_BUF or KS_TSDU_DAT. We need re-allocate a new Tsdu. */
-
- if (NULL == KsTsdu) {
-
- KsTsdu = KsAllocateKsTsdu();
-
- if (NULL == KsTsdu) {
- bBuffed = FALSE;
- bNewBuff = FALSE;
- } else {
- bNewTsdu = TRUE;
- }
- }
-
- /* process the case that a new buffer is to be allocated from system memory */
- if (bNewBuff) {
-
- /* now allocating internal buffer to contain the payload */
- Buffer = ExAllocatePool(NonPagedPool, len);
-
- if (NULL == Buffer) {
- bBuffed = FALSE;
- }
- }
- }
-
- if (bBuffed) {
-
- if (bNewBuff) {
-
- /* queue a new KS_TSDU_BUF to the Tsdu buffer */
- KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
-
- KsTsduBuf->TsduFlags = 0;
- KsTsduBuf->DataLength = (ULONG)len;
- KsTsduBuf->StartOffset = 0;
- KsTsduBuf->UserBuffer = Buffer;
- } else {
- /* queue a new KS_TSDU_BUF to the Tsdu buffer */
- KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
-
- KsTsduDat->TsduFlags = 0;
- KsTsduDat->DataLength = (ULONG)len;
- KsTsduDat->StartOffset = 0;
- KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE((ULONG)len);
-
- Buffer = &KsTsduDat->Data[0];
- }
-
- /* now locking the Buffer and copy user payload into the buffer */
- ASSERT(Buffer != NULL);
-
- rc = ks_lock_buffer(Buffer, FALSE, len, IoReadAccess, &NewMdl);
- if (rc != 0) {
- printk("ks_send_mdl: bufferred: error allocating mdl.\n");
- bBuffed = FALSE;
- } else {
- ULONG BytesCopied = 0;
- TdiCopyMdlToBuffer(mdl, 0, Buffer, 0, (ULONG)len, &BytesCopied);
- if (BytesCopied != (ULONG) len) {
- bBuffed = FALSE;
- }
- }
-
- /* Do the finializing job if we succeed to to lock the buffer and move
- user data. Or we need do cleaning up ... */
- if (bBuffed) {
-
- if (bNewBuff) {
- KsTsduBuf->TsduType = TSDU_TYPE_BUF;
- KsTsdu->LastOffset += sizeof(KS_TSDU_BUF);
-
- } else {
- KsTsduDat->TsduType = TSDU_TYPE_DAT;
- KsTsdu->LastOffset += KsTsduDat->TotalLength;
- }
-
- /* attach it to the TsduMgr list if the Tsdu is newly created. */
- if (bNewTsdu) {
-
- list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
- KsTsduMgr->NumOfTsdu++;
- }
-
- } else {
-
- if (NewMdl) {
- ks_release_mdl(NewMdl, FALSE);
- NewMdl = NULL;
- }
-
- if (bNewBuff) {
- ExFreePool(Buffer);
- Buffer = NULL;
- bNewBuff = FALSE;
- }
- }
- }
-
- /* update the TotalBytes being in sending */
- KsTsduMgr->TotalBytes += (ULONG)len;
-
- spin_unlock(&tconn->kstc_lock);
-
- /* cleanup the Tsdu if not successful */
- if (!bBuffed && bNewTsdu) {
- KsPutKsTsdu(KsTsdu);
- bNewTsdu = FALSE;
- KsTsdu = NULL;
- }
-
- /* we need allocate the ksock_tx_t structure from memory pool. */
-
- context = cfs_alloc(sizeof(ksock_tdi_tx_t) + sizeof(KEVENT),0);
- if (!context) {
- /* release the chained mdl */
- ks_release_mdl(mdl, FALSE);
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
- /* intialize the TcpContext */
-
- memset(context,0, sizeof(ksock_tdi_tx_t) + sizeof(KEVENT));
-
- context->tconn = tconn;
- context->Event = (PKEVENT) ((PUCHAR)context + sizeof(ksock_tdi_tx_t));
-
- KeInitializeEvent(context->Event, SynchronizationEvent, FALSE);
-
- if (bBuffed) {
-
- /* for bufferred transmission, we need set
- the internal completion routine. */
-
- context->CompletionRoutine = KsTcpSendCompletionRoutine;
- context->KsTsduMgr = KsTsduMgr;
- context->CompletionContext = KsTsdu;
- context->CompletionContext2 = (bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat);
- context->bCounted = FALSE;
-
- } else if (bIsNonBlock) {
-
- /* for non-blocking transmission, we need set
- the internal completion routine too. */
-
- context->CompletionRoutine = KsTcpSendCompletionRoutine;
- context->CompletionContext = tx;
- context->KsTsduMgr = KsTsduMgr;
- context->bCounted = TRUE;
- context->ReferCount = 2;
- }
-
- if (tconn->kstc_type == kstt_sender) {
- ConnObject = tconn->sender.kstc_info.FileObject;
- } else {
- LASSERT(tconn->kstc_type == kstt_child);
- ConnObject = tconn->child.kstc_info.FileObject;
- }
-
- DeviceObject = IoGetRelatedDeviceObject(ConnObject);
-
- Irp = KsBuildTdiIrp(DeviceObject);
-
- if (NULL == Irp) {
-
- /* release the chained mdl */
- ks_release_mdl(mdl, FALSE);
-
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto errorout;
- }
-
- length = KsQueryMdlsSize(mdl);
-
- LASSERT((ULONG)len <= length);
-
- ks_get_tconn(tconn);
-
- TdiBuildSend(
- Irp,
- DeviceObject,
- ConnObject,
- KsTcpCompletionRoutine,
- context,
- (bBuffed ? NewMdl : mdl),
- (bBuffed ? (tflags | TDI_SEND_NON_BLOCKING) : tflags),
- (ULONG)len;
- );
-
- Status = IoCallDriver(DeviceObject, Irp);
-
- if (bBuffed) {
- ks_release_mdl(mdl, FALSE);
- NewMdl = NULL;
- }
-
- if (!NT_SUCCESS(Status)) {
- cfs_enter_debugger();
- rc = cfs_error_code(Status);
- goto errorout;
- }
-
- if (bBuffed) {
- Status = STATUS_SUCCESS;
- rc = len;
- context = NULL;
- } else {
- if (bIsNonBlock) {
- if (InterlockedDecrement(&context->ReferCount) == 0) {
- Status = Irp->IoStatus.Status;
- } else {
- Status = STATUS_PENDING;
- context = NULL;
- }
- } else {
- if (STATUS_PENDING == Status) {
- Status = KeWaitForSingleObject(
- context->Event,
- Executive,
- KernelMode,
- FALSE,
- NULL
- );
-
- if (NT_SUCCESS(Status)) {
- Status = Irp->IoStatus.Status;
- }
- }
- }
-
- if (Status == STATUS_SUCCESS) {
- rc = (int)(Irp->IoStatus.Information);
-
- spin_lock(&tconn->kstc_lock);
- KsTsduMgr->TotalBytes -= rc;
- spin_unlock(&tconn->kstc_lock);
-
- } else {
- rc = cfs_error_code(Status);
- }
- }
-
-errorout:
-
- if (bBuffed) {
-
- if (NewMdl) {
- ks_release_mdl(NewMdl, FALSE);
- NewMdl = NULL;
- }
-
- if (bNewBuff) {
- if (!NT_SUCCESS(Status)) {
- ExFreePool(Buffer);
- Buffer = NULL;
- }
- }
-
- } else {
-
- if (Status != STATUS_PENDING) {
-
- if (Irp) {
-
- /* Freeing the Irp ... */
-
- IoFreeIrp(Irp);
- Irp = NULL;
- }
- }
- }
-
- if (!NT_SUCCESS(Status)) {
-
- spin_lock(&tconn->kstc_lock);
-
- KsTsduMgr->TotalBytes -= (ULONG)len;
-
- if (bBuffed) {
-
- /* attach it to the TsduMgr list if the Tsdu is newly created. */
- if (bNewTsdu) {
-
- list_del(&(KsTsdu->Link));
- KsTsduMgr->NumOfTsdu--;
-
- KsPutKsTsdu(KsTsdu);
- } else {
- if (bNewBuff) {
- if ( (ulong_ptr)KsTsduBuf + sizeof(KS_TSDU_BUF) ==
- (ulong_ptr)KsTsdu + KsTsdu->LastOffset) {
- KsTsdu->LastOffset -= sizeof(KS_TSDU_BUF);
- KsTsduBuf->TsduType = 0;
- } else {
- cfs_enter_debugger();
- KsTsduBuf->StartOffset = KsTsduBuf->DataLength;
- }
- } else {
- if ( (ulong_ptr)KsTsduDat + KsTsduDat->TotalLength ==
- (ulong_ptr)KsTsdu + KsTsdu->LastOffset) {
- KsTsdu->LastOffset -= KsTsduDat->TotalLength;
- KsTsduDat->TsduType = 0;
- } else {
- cfs_enter_debugger();
- KsTsduDat->StartOffset = KsTsduDat->DataLength;
- }
- }
- }
- }
-
- spin_unlock(&tconn->kstc_lock);
- }
-
- /* free the context if is not used at all */
- if (context) {
- cfs_free(context);
- }
-
- ks_put_tconn(tconn);
-
- return rc;
-}
-
-/*
- * ks_recv_mdl
- * Receive data from the peer for a stream connection
- *
- * Arguments:
- * tconn: tdi connection object
- * mdl: the mdl chain to contain the incoming data
- * len: length of the data
- * flags: flags of the receiving
- *
- * Return Value:
- * ks return code
- *
- * Notes:
- * N/A
- */
-
-int
-ks_recv_mdl(
- ksock_tconn_t * tconn,
- ksock_mdl_t * mdl,
- int size,
- int flags
- )
-{
- NTSTATUS Status = STATUS_SUCCESS;
- int rc = 0;
-
- BOOLEAN bIsNonBlock;
- BOOLEAN bIsExpedited;
-
- PKS_CHAIN KsChain;
- PKS_TSDUMGR KsTsduMgr;
- PKS_TSDU KsTsdu;
- PKS_TSDU_DAT KsTsduDat;
- PKS_TSDU_BUF KsTsduBuf;
- PKS_TSDU_MDL KsTsduMdl;
-
- PUCHAR Buffer;
-
- ULONG BytesRecved = 0;
- ULONG RecvedOnce;
-
- bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT);
- bIsExpedited = cfs_is_flag_set(flags, MSG_OOB);
-
- ks_get_tconn(tconn);
-
-Again:
-
- RecvedOnce = 0;
-
- spin_lock(&(tconn->kstc_lock));
-
- if ( tconn->kstc_type != kstt_sender &&
- tconn->kstc_type != kstt_child) {
-
- rc = -EINVAL;
- spin_unlock(&(tconn->kstc_lock));
-
- goto errorout;
- }
-
- if (tconn->kstc_state != ksts_connected) {
-
- rc = -ENOTCONN;
- spin_unlock(&(tconn->kstc_lock));
-
- goto errorout;
- }
-
- if (tconn->kstc_type == kstt_sender) {
- KsChain = &(tconn->sender.kstc_recv);
- } else {
- LASSERT(tconn->kstc_type == kstt_child);
- KsChain = &(tconn->child.kstc_recv);
- }
-
- if (bIsExpedited) {
- KsTsduMgr = &(KsChain->Expedited);
- } else {
- KsTsduMgr = &(KsChain->Normal);
- }
-
-NextTsdu:
-
- if (list_empty(&(KsTsduMgr->TsduList))) {
-
- //
- // It's a notification event. We need reset it to
- // un-signaled state in case there no any tsdus.
- //
-
- KeResetEvent(&(KsTsduMgr->Event));
-
- } else {
-
- KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link);
- LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
-
- /* remove the KsTsdu from TsduMgr list to release the lock */
- list_del(&(KsTsdu->Link));
- KsTsduMgr->NumOfTsdu--;
-
- spin_unlock(&(tconn->kstc_lock));
-
- while ((ULONG)size > BytesRecved) {
-
- ULONG BytesCopied = 0;
- ULONG BytesToCopy = 0;
- ULONG StartOffset = 0;
-
- KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
- KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
- KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
-
- if ( TSDU_TYPE_DAT == KsTsduDat->TsduType ||
- TSDU_TYPE_BUF == KsTsduBuf->TsduType ) {
-
-
- //
- // Data Tsdu Unit ...
- //
-
- if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
-
- if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) {
- /* data is not ready yet*/
- KeResetEvent(&(KsTsduMgr->Event));
- printk("ks_recv_mdl: KsTsduDat (%xh) is not ready yet !!!!!!!\n", KsTsduDat);
- break;
- }
-
- Buffer = &KsTsduDat->Data[0];
- StartOffset = KsTsduDat->StartOffset;
- if (KsTsduDat->DataLength - KsTsduDat->StartOffset > size - BytesRecved) {
- /* Recvmsg requst could be statisfied ... */
- BytesToCopy = size - BytesRecved;
- } else {
- BytesToCopy = KsTsduDat->DataLength - KsTsduDat->StartOffset;
- }
-
- } else {
-
- if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) {
- /* data is not ready yet*/
- KeResetEvent(&(KsTsduMgr->Event));
- DbgPrint("ks_recv_mdl: KsTsduBuf (%xh) is not ready yet !!!!!!!\n", KsTsduBuf);
- break;
- }
-
- ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
- Buffer = KsTsduBuf->UserBuffer;
- StartOffset = KsTsduBuf->StartOffset;
-
- if (KsTsduBuf->DataLength - KsTsduBuf->StartOffset > size - BytesRecved) {
- /* Recvmsg requst could be statisfied ... */
- BytesToCopy = size - BytesRecved;
- } else {
- BytesToCopy = KsTsduBuf->DataLength - KsTsduBuf->StartOffset;
- }
- }
-
- if (BytesToCopy > 0) {
- Status = TdiCopyBufferToMdl(
- Buffer,
- StartOffset,
- BytesToCopy,
- mdl,
- BytesRecved,
- &BytesCopied
- );
-
- if (NT_SUCCESS(Status)) {
-
- if (BytesToCopy != BytesCopied) {
- cfs_enter_debugger();
- }
-
- BytesRecved += BytesCopied;
- RecvedOnce += BytesCopied;
-
- } else {
-
- cfs_enter_debugger();
-
- if (STATUS_BUFFER_OVERFLOW == Status) {
- }
- }
- }
-
- if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
-
- KsTsduDat->StartOffset += BytesCopied;
-
- if (KsTsduDat->StartOffset == KsTsduDat->DataLength) {
- KsTsdu->StartOffset += KsTsduDat->TotalLength;
- }
-
- } else {
-
- ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
- KsTsduBuf->StartOffset += BytesCopied;
- if (KsTsduBuf->StartOffset == KsTsduBuf->DataLength) {
- KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
- /* now we need release the buf to system pool */
- ExFreePool(KsTsduBuf->UserBuffer);
- }
- }
-
- } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) {
-
- //
- // MDL Tsdu Unit ...
- //
-
- if (KsTsduMdl->DataLength > size - BytesRecved) {
-
- /* Recvmsg requst could be statisfied ... */
-
- BytesToCopy = size - BytesRecved;
-
- } else {
-
- BytesToCopy = KsTsduMdl->DataLength;
- }
-
- Status = KsCopyMdlChainToMdlChain(
- KsTsduMdl->Mdl,
- KsTsduMdl->StartOffset,
- mdl,
- BytesRecved,
- BytesToCopy,
- &BytesCopied
- );
-
- if (NT_SUCCESS(Status)) {
-
- if (BytesToCopy != BytesCopied) {
- cfs_enter_debugger();
- }
-
- KsTsduMdl->StartOffset += BytesCopied;
- KsTsduMdl->DataLength -= BytesCopied;
-
- BytesRecved += BytesCopied;
- RecvedOnce += BytesCopied;
- } else {
- cfs_enter_debugger();
- }
-
- if (0 == KsTsduMdl->DataLength) {
-
- //
- // Call TdiReturnChainedReceives to release the Tsdu memory
- //
-
- TdiReturnChainedReceives(
- &(KsTsduMdl->Descriptor),
- 1 );
-
- KsTsdu->StartOffset += sizeof(KS_TSDU_MDL);
- }
-
- } else {
- printk("ks_recv_mdl: unknown tsdu slot: slot = %x type = %x Start= %x\n",
- KsTsduDat, KsTsduDat->TsduType, KsTsduDat->StartOffset, KsTsduDat->DataLength);
- printk(" Tsdu = %x Magic=%x: Start = %x Last = %x Length = %x",
- KsTsdu, KsTsdu->Magic, KsTsdu->StartOffset, KsTsdu->LastOffset, KsTsdu->TotalLength);
- cfs_enter_debugger();
- }
-
- if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
-
- //
- // KsTsdu is empty now, we need free it ...
- //
-
- KsPutKsTsdu(KsTsdu);
- KsTsdu = NULL;
-
- break;
- }
- }
-
- spin_lock(&(tconn->kstc_lock));
-
- /* we need attach the KsTsdu to the list header */
- if (KsTsdu) {
- KsTsduMgr->NumOfTsdu++;
- list_add(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
- } else if ((ULONG)size > BytesRecved) {
- goto NextTsdu;
- }
- }
-
- if (KsTsduMgr->TotalBytes < RecvedOnce) {
- cfs_enter_debugger();
- KsTsduMgr->TotalBytes = 0;
- } else {
- KsTsduMgr->TotalBytes -= RecvedOnce;
- }
-
- spin_unlock(&(tconn->kstc_lock));
-
- if (NT_SUCCESS(Status)) {
-
- if ((BytesRecved < (ulong_ptr)size) && (!bIsNonBlock)) {
-
- KeWaitForSingleObject(
- &(KsTsduMgr->Event),
- Executive,
- KernelMode,
- FALSE,
- NULL
- );
-
- goto Again;
- }
-
- if (bIsNonBlock && (BytesRecved == 0)) {
- rc = -EAGAIN;
- } else {
- rc = BytesRecved;
- }
- }
-
-errorout:
-
- ks_put_tconn(tconn);
-
- if (rc > 0) {
- KsPrint((1, "ks_recv_mdl: recvieving %d bytes ...\n", rc));
- } else {
- KsPrint((0, "ks_recv_mdl: recvieving error code = %d Stauts = %xh ...\n", rc, Status));
- }
-
- /* release the chained mdl */
- ks_release_mdl(mdl, FALSE);
-
- return (rc);
-}
-
-
-/*
- * ks_init_tdi_data
- * initialize the global data in ksockal_data
- *
- * Arguments:
- * N/A
- *
- * Return Value:
- * int: ks error code
- *
- * Notes:
- * N/A
- */
-
-int
-ks_init_tdi_data()
-{
- int rc = 0;
-
- /* initialize tconn related globals */
- RtlZeroMemory(&ks_data, sizeof(ks_data_t));
-
- spin_lock_init(&ks_data.ksnd_tconn_lock);
- CFS_INIT_LIST_HEAD(&ks_data.ksnd_tconns);
- cfs_init_event(&ks_data.ksnd_tconn_exit, TRUE, FALSE);
-
- ks_data.ksnd_tconn_slab = cfs_mem_cache_create(
- "tcon", sizeof(ksock_tconn_t) , 0, 0);
-
- if (!ks_data.ksnd_tconn_slab) {
- rc = -ENOMEM;
- goto errorout;
- }
-
- /* initialize tsdu related globals */
-
- spin_lock_init(&ks_data.ksnd_tsdu_lock);
- CFS_INIT_LIST_HEAD(&ks_data.ksnd_freetsdus);
- ks_data.ksnd_tsdu_size = TDINAL_TSDU_DEFAULT_SIZE; /* 64k */
- ks_data.ksnd_tsdu_slab = cfs_mem_cache_create(
- "tsdu", ks_data.ksnd_tsdu_size, 0, 0);
-
- if (!ks_data.ksnd_tsdu_slab) {
- rc = -ENOMEM;
- cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab);
- ks_data.ksnd_tconn_slab = NULL;
- goto errorout;
- }
-
- /* initialize daemon related globals */
-
- spin_lock_init(&ks_data.ksnd_daemon_lock);
- CFS_INIT_LIST_HEAD(&ks_data.ksnd_daemons);
- cfs_init_event(&ks_data.ksnd_daemon_exit, TRUE, FALSE);
-
- KsRegisterPnpHandlers();
-
-errorout:
-
- return rc;
-}
-
-
-/*
- * ks_fini_tdi_data
- * finalize the global data in ksockal_data
- *
- * Arguments:
- * N/A
- *
- * Return Value:
- * int: ks error code
- *
- * Notes:
- * N/A
- */
-
-void
-ks_fini_tdi_data()
-{
- PKS_TSDU KsTsdu = NULL;
- struct list_head * list = NULL;
-
- /* clean up the pnp handler and address slots */
- KsDeregisterPnpHandlers();
-
- /* we need wait until all the tconn are freed */
- spin_lock(&(ks_data.ksnd_tconn_lock));
-
- if (list_empty(&(ks_data.ksnd_tconns))) {
- cfs_wake_event(&ks_data.ksnd_tconn_exit);
- }
- spin_unlock(&(ks_data.ksnd_tconn_lock));
-
- /* now wait on the tconn exit event */
- cfs_wait_event(&ks_data.ksnd_tconn_exit, 0);
-
- /* it's safe to delete the tconn slab ... */
- cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab);
- ks_data.ksnd_tconn_slab = NULL;
-
- /* clean up all the tsud buffers in the free list */
- spin_lock(&(ks_data.ksnd_tsdu_lock));
- list_for_each (list, &ks_data.ksnd_freetsdus) {
- KsTsdu = list_entry (list, KS_TSDU, Link);
-
- cfs_mem_cache_free(
- ks_data.ksnd_tsdu_slab,
- KsTsdu );
- }
- spin_unlock(&(ks_data.ksnd_tsdu_lock));
-
- /* it's safe to delete the tsdu slab ... */
- cfs_mem_cache_destroy(ks_data.ksnd_tsdu_slab);
- ks_data.ksnd_tsdu_slab = NULL;
-
- /* good! it's smooth to do the cleaning up...*/
-}
-
-/*
- * ks_create_child_tconn
- * Create the backlog child connection for a listener
- *
- * Arguments:
- * parent: the listener daemon connection
- *
- * Return Value:
- * the child connection or NULL in failure
- *
- * Notes:
- * N/A
- */
-
-ksock_tconn_t *
-ks_create_child_tconn(
- ksock_tconn_t * parent
- )
-{
- NTSTATUS status;
- ksock_tconn_t * backlog;
-
- /* allocate the tdi connecton object */
- backlog = ks_create_tconn();
-
- if (!backlog) {
- goto errorout;
- }
-
- /* initialize the tconn as a child */
- ks_init_child(backlog);
-
-
- /* now bind it */
- if (ks_bind_tconn(backlog, parent, 0, 0) < 0) {
- ks_free_tconn(backlog);
- backlog = NULL;
- goto errorout;
- }
-
- /* open the connection object */
- status = KsOpenConnection(
- &(backlog->kstc_dev),
- (PVOID)backlog,
- &(backlog->child.kstc_info.Handle),
- &(backlog->child.kstc_info.FileObject)
- );
-
- if (!NT_SUCCESS(status)) {
-
- ks_put_tconn(backlog);
- backlog = NULL;
- cfs_enter_debugger();
- goto errorout;
- }
-
- /* associate it now ... */
- status = KsAssociateAddress(
- backlog->kstc_addr.Handle,
- backlog->child.kstc_info.FileObject
- );
-
- if (!NT_SUCCESS(status)) {
-
- ks_put_tconn(backlog);
- backlog = NULL;
- cfs_enter_debugger();
- goto errorout;
- }
-
- backlog->kstc_state = ksts_associated;
-
-errorout:
-
- return backlog;
-}
-
-/*
- * ks_replenish_backlogs(
- * to replenish the backlogs listening...
- *
- * Arguments:
- * tconn: the parent listen tdi connect
- * nbacklog: number fo child connections in queue
- *
- * Return Value:
- * N/A
- *
- * Notes:
- * N/A
- */
-
-void
-ks_replenish_backlogs(
- ksock_tconn_t * parent,
- int nbacklog
- )
-{
- ksock_tconn_t * backlog;
- int n = 0;
-
- /* calculate how many backlogs needed */
- if ( ( parent->listener.kstc_listening.num +
- parent->listener.kstc_accepted.num ) < nbacklog ) {
- n = nbacklog - ( parent->listener.kstc_listening.num +
- parent->listener.kstc_accepted.num );
- } else {
- n = 0;
- }
-
- while (n--) {
-
- /* create the backlog child tconn */
- backlog = ks_create_child_tconn(parent);
-
- spin_lock(&(parent->kstc_lock));
-
- if (backlog) {
- spin_lock(&backlog->kstc_lock);
- /* attch it into the listing list of daemon */
- list_add( &backlog->child.kstc_link,
- &parent->listener.kstc_listening.list );
- parent->listener.kstc_listening.num++;
-
- backlog->child.kstc_queued = TRUE;
- spin_unlock(&backlog->kstc_lock);
- } else {
- cfs_enter_debugger();
- }
-
- spin_unlock(&(parent->kstc_lock));
- }
-}
-
-/*
- * ks_start_listen
- * setup the listener tdi connection and make it listen
- * on the user specified ip address and port.
- *
- * Arguments:
- * tconn: the parent listen tdi connect
- * nbacklog: number fo child connections in queue
- *
- * Return Value:
- * ks error code >=: success; otherwise error.
- *
- * Notes:
- * N/A
- */
-
-int
-ks_start_listen(ksock_tconn_t *tconn, int nbacklog)
-{
- int rc = 0;
-
- /* now replenish the backlogs */
- ks_replenish_backlogs(tconn, nbacklog);
-
- /* set the event callback handlers */
- rc = ks_set_handlers(tconn);
-
- if (rc < 0) {
- return rc;
- }
-
- spin_lock(&(tconn->kstc_lock));
- tconn->listener.nbacklog = nbacklog;
- tconn->kstc_state = ksts_listening;
- cfs_set_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED);
- spin_unlock(&(tconn->kstc_lock));
-
- return rc;
-}
-
-void
-ks_stop_listen(ksock_tconn_t *tconn)
-{
- struct list_head * list;
- ksock_tconn_t * backlog;
-
- /* reset all tdi event callbacks to NULL */
- ks_reset_handlers (tconn);
-
- spin_lock(&tconn->kstc_lock);
-
- cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED);
-
- /* cleanup all the listening backlog child connections */
- list_for_each (list, &(tconn->listener.kstc_listening.list)) {
- backlog = list_entry(list, ksock_tconn_t, child.kstc_link);
-
- /* destory and free it */
- ks_put_tconn(backlog);
- }
-
- spin_unlock(&tconn->kstc_lock);
-
- /* wake up it from the waiting on new incoming connections */
- KeSetEvent(&tconn->listener.kstc_accept_event, 0, FALSE);
-
- /* free the listening daemon tconn */
- ks_put_tconn(tconn);
-}
-
-
-/*
- * ks_wait_child_tconn
- * accept a child connection from peer
- *
- * Arguments:
- * parent: the daemon tdi connection listening
- * child: to contain the accepted connection
- *
- * Return Value:
- * ks error code;
- *
- * Notes:
- * N/A
- */
-
-int
-ks_wait_child_tconn(
- ksock_tconn_t * parent,
- ksock_tconn_t ** child
- )
-{
- struct list_head * tmp;
- ksock_tconn_t * backlog = NULL;
-
- ks_replenish_backlogs(parent, parent->listener.nbacklog);
-
- spin_lock(&(parent->kstc_lock));
-
- if (parent->listener.kstc_listening.num <= 0) {
- spin_unlock(&(parent->kstc_lock));
- return -1;
- }
-
-again:
-
- /* check the listening queue and try to search the accepted connecton */
-
- list_for_each(tmp, &(parent->listener.kstc_listening.list)) {
- backlog = list_entry (tmp, ksock_tconn_t, child.kstc_link);
-
- spin_lock(&(backlog->kstc_lock));
-
- if (backlog->child.kstc_accepted) {
-
- LASSERT(backlog->kstc_state == ksts_connected);
- LASSERT(backlog->child.kstc_busy);
-
- list_del(&(backlog->child.kstc_link));
- list_add(&(backlog->child.kstc_link),
- &(parent->listener.kstc_accepted.list));
- parent->listener.kstc_accepted.num++;
- parent->listener.kstc_listening.num--;
- backlog->child.kstc_queueno = 1;
-
- spin_unlock(&(backlog->kstc_lock));
-
- break;
- } else {
- spin_unlock(&(backlog->kstc_lock));
- backlog = NULL;
- }
- }
-
- spin_unlock(&(parent->kstc_lock));
-
- /* we need wait until new incoming connections are requested
- or the case of shuting down the listenig daemon thread */
- if (backlog == NULL) {
-
- NTSTATUS Status;
-
- Status = KeWaitForSingleObject(
- &(parent->listener.kstc_accept_event),
- Executive,
- KernelMode,
- FALSE,
- NULL
- );
-
- spin_lock(&(parent->kstc_lock));
-
- /* check whether it's exptected to exit ? */
- if (!cfs_is_flag_set(parent->kstc_flags, KS_TCONN_DAEMON_STARTED)) {
- spin_unlock(&(parent->kstc_lock));
- } else {
- goto again;
- }
- }
-
- if (backlog) {
- /* query the local ip address of the connection */
- ks_query_local_ipaddr(backlog);
- }
-
- *child = backlog;
-
- return 0;
-}
-
-int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
-{
- ks_addr_slot_t * slot = NULL;
- PLIST_ENTRY list = NULL;
-
- spin_lock(&ks_data.ksnd_addrs_lock);
-
- list = ks_data.ksnd_addrs_list.Flink;
- while (list != &ks_data.ksnd_addrs_list) {
- slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
- if (_stricmp(name, &slot->iface[0]) == 0) {
- *up = slot->up;
- *ip = slot->ip_addr;
- *mask = slot->netmask;
- break;
- }
- list = list->Flink;
- slot = NULL;
- }
-
- spin_unlock(&ks_data.ksnd_addrs_lock);
-
- return (int)(slot == NULL);
-}
-
-int libcfs_ipif_enumerate(char ***names)
-{
- ks_addr_slot_t * slot = NULL;
- PLIST_ENTRY list = NULL;
- int nips = 0;
-
- spin_lock(&ks_data.ksnd_addrs_lock);
-
- *names = cfs_alloc(sizeof(char *) * ks_data.ksnd_naddrs, CFS_ALLOC_ZERO);
- if (*names == NULL) {
- goto errorout;
- }
-
- list = ks_data.ksnd_addrs_list.Flink;
- while (list != &ks_data.ksnd_addrs_list) {
- slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
- list = list->Flink;
- (*names)[nips++] = slot->iface;
- cfs_assert(nips <= ks_data.ksnd_naddrs);
- }
-
- cfs_assert(nips == ks_data.ksnd_naddrs);
-
-errorout:
-
- spin_unlock(&ks_data.ksnd_addrs_lock);
- return nips;
-}
-
-void libcfs_ipif_free_enumeration(char **names, int n)
-{
- if (names) {
- cfs_free(names);
- }
-}
-
-int libcfs_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog)
-{
- int rc = 0;
- ksock_tconn_t * parent;
-
- parent = ks_create_tconn();
- if (!parent) {
- rc = -ENOMEM;
- goto errorout;
- }
-
- /* initialize the tconn as a listener */
- ks_init_listener(parent);
-
- /* bind the daemon->tconn */
- rc = ks_bind_tconn(parent, NULL, ip, (unsigned short)port);
-
- if (rc < 0) {
- ks_free_tconn(parent);
- goto errorout;
- }
-
- /* create listening children and make it to listen state*/
- rc = ks_start_listen(parent, backlog);
- if (rc < 0) {
- ks_stop_listen(parent);
- goto errorout;
- }
-
- *sockp = parent;
-
-errorout:
-
- return rc;
-}
-
-int libcfs_sock_accept(struct socket **newsockp, struct socket *sock)
-{
- /* wait for incoming connecitons */
- return ks_wait_child_tconn(sock, newsockp);
-}
-
-void libcfs_sock_abort_accept(struct socket *sock)
-{
- LASSERT(sock->kstc_type == kstt_listener);
-
- spin_lock(&(sock->kstc_lock));
-
- /* clear the daemon flag */
- cfs_clear_flag(sock->kstc_flags, KS_TCONN_DAEMON_STARTED);
-
- /* wake up it from the waiting on new incoming connections */
- KeSetEvent(&sock->listener.kstc_accept_event, 0, FALSE);
-
- spin_unlock(&(sock->kstc_lock));
-}
-
-/*
- * libcfs_sock_connect
- * build a conntion between local ip/port and the peer ip/port.
- *
- * Arguments:
- * laddr: local ip address
- * lport: local port number
- * paddr: peer's ip address
- * pport: peer's port number
- *
- * Return Value:
- * int: return code ...
- *
- * Notes:
- * N/A
- */
-
-
-int libcfs_sock_connect(struct socket **sockp, int *fatal,
- __u32 local_ip, int local_port,
- __u32 peer_ip, int peer_port)
-{
- ksock_tconn_t * tconn = NULL;
- int rc = 0;
-
- *sockp = NULL;
-
- KsPrint((1, "libcfs_sock_connect: connecting to %x:%d with %x:%d...\n",
- peer_ip, peer_port, local_ip, local_port ));
-
- /* create the tdi connecion structure */
- tconn = ks_create_tconn();
- if (!tconn) {
- rc = -ENOMEM;
- goto errorout;
- }
-
- /* initialize the tdi sender connection */
- ks_init_sender(tconn);
-
- /* bind the local ip address with the tconn */
- rc = ks_bind_tconn(tconn, NULL, local_ip, (unsigned short)local_port);
- if (rc < 0) {
- KsPrint((0, "libcfs_sock_connect: failed to bind address %x:%d...\n",
- local_ip, local_port ));
- ks_free_tconn(tconn);
- goto errorout;
- }
-
- /* connect to the remote peer */
- rc = ks_build_tconn(tconn, peer_ip, (unsigned short)peer_port);
- if (rc < 0) {
- KsPrint((0, "libcfs_sock_connect: failed to connect %x:%d ...\n",
- peer_ip, peer_port ));
-
- ks_put_tconn(tconn);
- goto errorout;
- }
-
- *sockp = tconn;
-
-errorout:
-
- return rc;
-}
-
-int libcfs_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize)
-{
- return 0;
-}
-
-int libcfs_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize)
-{
- return 0;
-}
-
-int libcfs_sock_getaddr(struct socket *socket, int remote, __u32 *ip, int *port)
-{
- PTRANSPORT_ADDRESS taddr = NULL;
-
- spin_lock(&socket->kstc_lock);
- if (remote) {
- if (socket->kstc_type == kstt_sender) {
- taddr = socket->sender.kstc_info.Remote;
- } else if (socket->kstc_type == kstt_child) {
- taddr = socket->child.kstc_info.Remote;
- }
- } else {
- taddr = &(socket->kstc_addr.Tdi);
- }
-
- if (taddr) {
- PTDI_ADDRESS_IP addr = (PTDI_ADDRESS_IP)(&(taddr->Address[0].Address));
- if (ip != NULL)
- *ip = ntohl (addr->in_addr);
- if (port != NULL)
- *port = ntohs (addr->sin_port);
- } else {
- spin_unlock(&socket->kstc_lock);
- return -ENOTCONN;
- }
-
- spin_unlock(&socket->kstc_lock);
- return 0;
-}
-
-int libcfs_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- ksock_mdl_t * mdl;
-
- int offset = 0;
-
- while (nob > offset) {
-
- /* lock the user buffer */
- rc = ks_lock_buffer( (char *)buffer + offset,
- FALSE, nob - offset, IoReadAccess, &mdl );
-
- if (rc < 0) {
- return (rc);
- }
-
- /* send out the whole mdl */
- rc = ks_send_mdl( sock, NULL, mdl, nob - offset, 0 );
-
- if (rc > 0) {
- offset += rc;
- } else {
- return (rc);
- }
- }
-
- return (0);
-}
-
-int libcfs_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- ksock_mdl_t * mdl;
-
- int offset = 0;
-
- while (nob > offset) {
-
- /* lock the user buffer */
- rc = ks_lock_buffer( (char *)buffer + offset,
- FALSE, nob - offset, IoWriteAccess, &mdl );
-
- if (rc < 0) {
- return (rc);
- }
-
- /* recv the requested buffer */
- rc = ks_recv_mdl( sock, mdl, nob - offset, 0 );
-
- if (rc > 0) {
- offset += rc;
- } else {
- return (rc);
- }
- }
-
- return (0);
-}
-
-void libcfs_sock_release(struct socket *sock)
-{
- if (sock->kstc_type == kstt_listener &&
- sock->kstc_state == ksts_listening) {
- ks_stop_listen(sock);
- } else {
- ks_put_tconn(sock);
- }
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-
-#include <libcfs/libcfs.h>
-#include <libcfs/kp30.h>
-#include "tracefile.h"
-
-#ifndef get_cpu
-#define get_cpu() smp_processor_id()
-#define put_cpu() do { } while (0)
-#endif
-
-#define TCD_TYPE_MAX 1
-
-event_t tracefile_event;
-
-void tracefile_init_arch()
-{
- int i;
- int j;
- struct trace_cpu_data *tcd;
-
- cfs_init_event(&tracefile_event, TRUE, TRUE);
-
- /* initialize trace_data */
- memset(trace_data, 0, sizeof(trace_data));
- for (i = 0; i < TCD_TYPE_MAX; i++) {
- trace_data[i]=cfs_alloc(sizeof(struct trace_data_union)*NR_CPUS, 0);
- if (trace_data[i] == NULL)
- goto out;
- }
-
- /* arch related info initialized */
- tcd_for_each(tcd, i, j) {
- tcd->tcd_pages_factor = 100; /* Only one type */
- tcd->tcd_cpu = j;
- tcd->tcd_type = i;
- }
-
- memset(trace_console_buffers, 0, sizeof(trace_console_buffers));
-
- for (i = 0; i < NR_CPUS; i++) {
- for (j = 0; j < 1; j++) {
- trace_console_buffers[i][j] =
- cfs_alloc(TRACE_CONSOLE_BUFFER_SIZE,
- CFS_ALLOC_ZERO);
-
- if (trace_console_buffers[i][j] == NULL)
- goto out;
- }
- }
-
- return 0;
-
-out:
- tracefile_fini_arch();
- KsPrint((0, "lnet: No enough memory\n"));
- return -ENOMEM;
-}
-
-void tracefile_fini_arch()
-{
- int i;
- int j;
-
- for (i = 0; i < NR_CPUS; i++) {
- for (j = 0; j < 2; j++) {
- if (trace_console_buffers[i][j] != NULL) {
- cfs_free(trace_console_buffers[i][j]);
- trace_console_buffers[i][j] = NULL;
- }
- }
- }
-
- for (i = 0; trace_data[i] != NULL; i++) {
- cfs_free(trace_data[i]);
- trace_data[i] = NULL;
- }
-}
-
-void tracefile_read_lock()
-{
- cfs_wait_event(&tracefile_event, 0);
-}
-
-void tracefile_read_unlock()
-{
- cfs_wake_event(&tracefile_event);
-}
-
-void tracefile_write_lock()
-{
- cfs_wait_event(&tracefile_event, 0);
-}
-
-void tracefile_write_unlock()
-{
- cfs_wake_event(&tracefile_event);
-}
-
-char *
-trace_get_console_buffer(void)
-{
-#pragma message ("is there possible problem with pre-emption ?")
- int cpu = (int) KeGetCurrentProcessorNumber();
- return trace_console_buffers[cpu][0];
-}
-
-void
-trace_put_console_buffer(char *buffer)
-{
-}
-
-struct trace_cpu_data *
-trace_get_tcd(void)
-{
-#pragma message("todo: return NULL if in interrupt context")
-
- int cpu = (int) KeGetCurrentProcessorNumber();
- return &(*trace_data[0])[cpu].tcd;
-}
-
-void
-trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags)
-{
-}
-
-int
-trace_lock_tcd(struct trace_cpu_data *tcd)
-{
- __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
- return 1;
-}
-
-void
-trace_unlock_tcd(struct trace_cpu_data *tcd)
-{
- __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
-}
-
-void
-set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
- const int line, unsigned long stack)
-{
- struct timeval tv;
-
- do_gettimeofday(&tv);
-
- header->ph_subsys = subsys;
- header->ph_mask = mask;
- header->ph_cpu_id = smp_processor_id();
- header->ph_sec = (__u32)tv.tv_sec;
- header->ph_usec = tv.tv_usec;
- header->ph_stack = stack;
- header->ph_pid = current->pid;
- header->ph_line_num = line;
- header->ph_extern_pid = 0;
- return;
-}
-
-void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf,
- int len, const char *file, const char *fn)
-{
- char *prefix = NULL, *ptype = NULL;
-
- if ((mask & D_EMERG) != 0) {
- prefix = "LustreError";
- ptype = KERN_EMERG;
- } else if ((mask & D_ERROR) != 0) {
- prefix = "LustreError";
- ptype = KERN_ERR;
- } else if ((mask & D_WARNING) != 0) {
- prefix = "Lustre";
- ptype = KERN_WARNING;
- } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) {
- prefix = "Lustre";
- ptype = KERN_INFO;
- }
-
- if ((mask & D_CONSOLE) != 0) {
- printk("%s%s: %s", ptype, prefix, buf);
- } else {
- printk("%s%s: %d:%d:(%s:%d:%s()) %s", ptype, prefix, hdr->ph_pid,
- hdr->ph_extern_pid, file, hdr->ph_line_num, fn, buf);
- }
- return;
-}
-
-int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
-{
- return 1;
-}
-
-int trace_max_debug_mb(void)
-{
- int total_mb = (num_physpages >> (20 - CFS_PAGE_SHIFT));
-
- return MAX(512, (total_mb * 80)/100);
-}
-
-void
-trace_call_on_all_cpus(void (*fn)(void *arg), void *arg)
-{
-#error "tbd"
-}
-
+++ /dev/null
-
-#ifndef __KERNEL__
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <io.h>
-#include <time.h>
-#include <windows.h>
-
-void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
- const int line, unsigned long stack,
- char *format, ...) {
- }
-
-int cfs_proc_mknod(const char *path, unsigned short mode, unsigned int dev)
-{
- return 0;
-}
-
-
-void print_last_error(char* Prefix)
-{
- LPVOID lpMsgBuf;
-
- FormatMessage(
- FORMAT_MESSAGE_ALLOCATE_BUFFER |
- FORMAT_MESSAGE_FROM_SYSTEM |
- FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL,
- GetLastError(),
- 0,
- (LPTSTR) &lpMsgBuf,
- 0,
- NULL
- );
-
- printf("%s %s", Prefix, (LPTSTR) lpMsgBuf);
-
- LocalFree(lpMsgBuf);
-}
-
-//
-// The following declarations are defined in io.h of VC
-// sys/types.h will conflict with io.h, so we need place
-// these declartions here.
-
-#ifdef __cplusplus
-extern "C" {
-#endif
- void
- __declspec (naked) __cdecl _chkesp(void)
- {
-#if _X86_
- __asm { jz exit_chkesp };
- __asm { int 3 };
- exit_chkesp:
- __asm { ret };
-#endif
- }
-#ifdef __cplusplus
-}
-#endif
-
-unsigned int sleep (unsigned int seconds)
-{
- Sleep(seconds * 1000);
- return 0;
-}
-
-int gethostname(char * name, int namelen)
-{
- return 0;
-}
-
-int ioctl (
- int handle,
- int cmd,
- void *buffer
- )
-{
- printf("hello, world\n");
- return 0;
-}
-
-#endif /* __KERNEL__ */
\ No newline at end of file
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=4:tabstop=4:
- *
- * Copyright (c) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or modify it under
- * the terms of version 2 of the GNU General Public License as published by
- * the Free Software Foundation. Lustre is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details. You should have received a
- * copy of the GNU General Public License along with Lustre; if not, write
- * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
-
-/*
- * miscellaneous libcfs stuff
- */
-#define DEBUG_SUBSYSTEM S_LNET
-#include <lnet/types.h>
-
-/*
- * Convert server error code to client format. Error codes are from
- * Linux errno.h, so for Linux client---identity.
- */
-int convert_server_error(__u64 ecode)
-{
- return cfs_error_code((NTSTATUS)ecode);
-}
-
-/*
- * convert <fcntl.h> flag from client to server.
- *
- * nt kernel uses several members to describe the open flags
- * such as DesiredAccess/ShareAccess/CreateDisposition/CreateOptions
- * so it's better to convert when using, not here.
- */
-
-int convert_client_oflag(int cflag, int *result)
-{
- *result = 0;
- return 0;
-}
-
-
-int cfs_error_code(NTSTATUS Status)
-{
- switch (Status) {
-
- case STATUS_ACCESS_DENIED:
- return (-EACCES);
-
- case STATUS_ACCESS_VIOLATION:
- return (-EFAULT);
-
- case STATUS_BUFFER_TOO_SMALL:
- return (-ETOOSMALL);
-
- case STATUS_INVALID_PARAMETER:
- return (-EINVAL);
-
- case STATUS_NOT_IMPLEMENTED:
- case STATUS_NOT_SUPPORTED:
- return (-EOPNOTSUPP);
-
- case STATUS_INVALID_ADDRESS:
- case STATUS_INVALID_ADDRESS_COMPONENT:
- return (-EADDRNOTAVAIL);
-
- case STATUS_NO_SUCH_DEVICE:
- case STATUS_NO_SUCH_FILE:
- case STATUS_OBJECT_NAME_NOT_FOUND:
- case STATUS_OBJECT_PATH_NOT_FOUND:
- case STATUS_NETWORK_BUSY:
- case STATUS_INVALID_NETWORK_RESPONSE:
- case STATUS_UNEXPECTED_NETWORK_ERROR:
- return (-ENETDOWN);
-
- case STATUS_BAD_NETWORK_PATH:
- case STATUS_NETWORK_UNREACHABLE:
- case STATUS_PROTOCOL_UNREACHABLE:
- return (-ENETUNREACH);
-
- case STATUS_LOCAL_DISCONNECT:
- case STATUS_TRANSACTION_ABORTED:
- case STATUS_CONNECTION_ABORTED:
- return (-ECONNABORTED);
-
- case STATUS_REMOTE_DISCONNECT:
- case STATUS_LINK_FAILED:
- case STATUS_CONNECTION_DISCONNECTED:
- case STATUS_CONNECTION_RESET:
- case STATUS_PORT_UNREACHABLE:
- return (-ECONNRESET);
-
- case STATUS_PAGEFILE_QUOTA:
- case STATUS_NO_MEMORY:
- case STATUS_CONFLICTING_ADDRESSES:
- case STATUS_QUOTA_EXCEEDED:
- case STATUS_TOO_MANY_PAGING_FILES:
- case STATUS_INSUFFICIENT_RESOURCES:
- case STATUS_WORKING_SET_QUOTA:
- case STATUS_COMMITMENT_LIMIT:
- case STATUS_TOO_MANY_ADDRESSES:
- case STATUS_REMOTE_RESOURCES:
- return (-ENOBUFS);
-
- case STATUS_INVALID_CONNECTION:
- return (-ENOTCONN);
-
- case STATUS_PIPE_DISCONNECTED:
- return (-ESHUTDOWN);
-
- case STATUS_TIMEOUT:
- case STATUS_IO_TIMEOUT:
- case STATUS_LINK_TIMEOUT:
- return (-ETIMEDOUT);
-
- case STATUS_REMOTE_NOT_LISTENING:
- case STATUS_CONNECTION_REFUSED:
- return (-ECONNREFUSED);
-
- case STATUS_HOST_UNREACHABLE:
- return (-EHOSTUNREACH);
-
- case STATUS_PENDING:
- case STATUS_DEVICE_NOT_READY:
- return (-EAGAIN);
-
- case STATUS_CANCELLED:
- case STATUS_REQUEST_ABORTED:
- return (-EINTR);
-
- case STATUS_BUFFER_OVERFLOW:
- case STATUS_INVALID_BUFFER_SIZE:
- return (-EMSGSIZE);
-
- }
-
- if (NT_SUCCESS(Status))
- return 0;
-
- return (-EINVAL);
-}
-
-
-void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
-{
-}
-
-void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
-{
- return NULL;
-}
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>CFBundleDevelopmentRegion</key>
- <string>English</string>
- <key>CFBundleExecutable</key>
- <string>lnet</string>
- <key>CFBundleIconFile</key>
- <string></string>
- <key>CFBundleIdentifier</key>
- <string>com.clusterfs.lustre.lnet</string>
- <key>CFBundleInfoDictionaryVersion</key>
- <string>6.0</string>
- <key>CFBundlePackageType</key>
- <string>KEXT</string>
- <key>CFBundleSignature</key>
- <string>????</string>
- <key>CFBundleVersion</key>
- <string>1.0.1</string>
- <key>OSBundleCompatibleVersion</key>
- <string>1.0.0</string>
- <key>OSBundleLibraries</key>
- <dict>
- <key>com.apple.kpi.bsd</key>
- <string>8.0.0b1</string>
- <key>com.apple.kpi.libkern</key>
- <string>8.0.0b1</string>
- <key>com.apple.kpi.mach</key>
- <string>8.0.0b1</string>
- <key>com.apple.kpi.unsupported</key>
- <string>8.0.0b1</string>
- <key>com.clusterfs.lustre.libcfs</key>
- <string>1.0.0</string>
- </dict>
-</dict>
-</plist>
+++ /dev/null
-MODULES := lnet
-
-lnet-objs := api-errno.o api-ni.o config.o
-lnet-objs += lib-me.o lib-msg.o lib-eq.o lib-md.o
-lnet-objs += lib-move.o module.o lo.o
-lnet-objs += router.o router_proc.o acceptor.o peer.o
-
-default: all
-
-@INCLUDE_RULES@
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <lnet/lib-lnet.h>
-
-#ifdef __KERNEL__
-static char *accept = "secure";
-CFS_MODULE_PARM(accept, "s", charp, 0444,
- "Accept connections (secure|all|none)");
-
-static int accept_port = 988;
-CFS_MODULE_PARM(accept_port, "i", int, 0444,
- "Acceptor's port (same on all nodes)");
-
-static int accept_backlog = 127;
-CFS_MODULE_PARM(accept_backlog, "i", int, 0444,
- "Acceptor's listen backlog");
-
-static int accept_timeout = 5;
-CFS_MODULE_PARM(accept_timeout, "i", int, 0644,
- "Acceptor's timeout (seconds)");
-
-struct {
- int pta_shutdown;
- cfs_socket_t *pta_sock;
- struct semaphore pta_signal;
-} lnet_acceptor_state;
-
-int
-lnet_acceptor_timeout(void)
-{
- return accept_timeout;
-}
-EXPORT_SYMBOL(lnet_acceptor_timeout);
-
-int
-lnet_acceptor_port(void)
-{
- return accept_port;
-}
-EXPORT_SYMBOL(lnet_acceptor_port);
-
-void
-lnet_connect_console_error (int rc, lnet_nid_t peer_nid,
- __u32 peer_ip, int peer_port)
-{
- switch (rc) {
- /* "normal" errors */
- case -ECONNREFUSED:
- CDEBUG(D_NETERROR, "Connection to %s at host %u.%u.%u.%u "
- "on port %d was refused: "
- "check that Lustre is running on that node.\n",
- libcfs_nid2str(peer_nid),
- HIPQUAD(peer_ip), peer_port);
- break;
- case -EHOSTUNREACH:
- case -ENETUNREACH:
- CDEBUG(D_NETERROR, "Connection to %s at host %u.%u.%u.%u "
- "was unreachable: the network or that node may "
- "be down, or Lustre may be misconfigured.\n",
- libcfs_nid2str(peer_nid), HIPQUAD(peer_ip));
- break;
- case -ETIMEDOUT:
- CDEBUG(D_NETERROR, "Connection to %s at host %u.%u.%u.%u on "
- "port %d took too long: that node may be hung "
- "or experiencing high load.\n",
- libcfs_nid2str(peer_nid),
- HIPQUAD(peer_ip), peer_port);
- break;
- case -ECONNRESET:
- LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %u.%u.%u.%u"
- " on port %d was reset: "
- "is it running a compatible version of "
- "Lustre and is %s one of its NIDs?\n",
- libcfs_nid2str(peer_nid),
- HIPQUAD(peer_ip), peer_port,
- libcfs_nid2str(peer_nid));
- break;
- case -EPROTO:
- LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at "
- "host %u.%u.%u.%u on port %d: is it running "
- "a compatible version of Lustre?\n",
- libcfs_nid2str(peer_nid),
- HIPQUAD(peer_ip), peer_port);
- break;
- case -EADDRINUSE:
- LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to "
- "connect to %s at host %u.%u.%u.%u on port "
- "%d\n", libcfs_nid2str(peer_nid),
- HIPQUAD(peer_ip), peer_port);
- break;
- default:
- LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s"
- " at host %u.%u.%u.%u on port %d\n", rc,
- libcfs_nid2str(peer_nid),
- HIPQUAD(peer_ip), peer_port);
- break;
- }
-}
-EXPORT_SYMBOL(lnet_connect_console_error);
-
-int
-lnet_connect(cfs_socket_t **sockp, lnet_nid_t peer_nid,
- __u32 local_ip, __u32 peer_ip, int peer_port)
-{
- lnet_acceptor_connreq_t cr;
- cfs_socket_t *sock;
- int rc;
- int port;
- int fatal;
-
- CLASSERT (sizeof(cr) <= 16); /* not too big to be on the stack */
-
- for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
- port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
- --port) {
- /* Iterate through reserved ports. */
-
- rc = libcfs_sock_connect(&sock, &fatal,
- local_ip, port,
- peer_ip, peer_port);
- if (rc != 0) {
- if (fatal)
- goto failed;
- continue;
- }
-
- CLASSERT (LNET_PROTO_ACCEPTOR_VERSION == 1);
-
- if (the_lnet.ln_ptlcompat != 2) {
- /* When portals compatibility is "strong", simply
- * connect (i.e. send no acceptor connection request).
- * Othewise send an acceptor connection request. I can
- * have no portals peers so everyone else should
- * understand my protocol. */
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
- cr.acr_nid = peer_nid;
-
- if (the_lnet.ln_testprotocompat != 0) {
- /* single-shot proto check */
- LNET_LOCK();
- if ((the_lnet.ln_testprotocompat & 4) != 0) {
- cr.acr_version++;
- the_lnet.ln_testprotocompat &= ~4;
- }
- if ((the_lnet.ln_testprotocompat & 8) != 0) {
- cr.acr_magic = LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~8;
- }
- LNET_UNLOCK();
- }
-
- rc = libcfs_sock_write(sock, &cr, sizeof(cr),
- accept_timeout);
- if (rc != 0)
- goto failed_sock;
- }
-
- *sockp = sock;
- return 0;
- }
-
- rc = -EADDRINUSE;
- goto failed;
-
- failed_sock:
- libcfs_sock_release(sock);
- failed:
- lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port);
- return rc;
-}
-EXPORT_SYMBOL(lnet_connect);
-
-static inline int
-lnet_accept_magic(__u32 magic, __u32 constant)
-{
- return (magic == constant ||
- magic == __swab32(constant));
-}
-
-int
-lnet_accept(lnet_ni_t *blind_ni, cfs_socket_t *sock, __u32 magic)
-{
- lnet_acceptor_connreq_t cr;
- __u32 peer_ip;
- int peer_port;
- int rc;
- int flip;
- lnet_ni_t *ni;
- char *str;
-
- /* CAVEAT EMPTOR: I may be called by an LND in any thread's context if
- * I passed the new socket "blindly" to the single NI that needed an
- * acceptor. If so, blind_ni != NULL... */
-
- LASSERT (sizeof(cr) <= 16); /* not too big for the stack */
-
- rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- LASSERT (rc == 0); /* we succeeded before */
-
- if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
-
- if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) {
- /* future version compatibility!
- * When LNET unifies protocols over all LNDs, the first
- * thing sent will be a version query. I send back
- * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" */
-
- memset (&cr, 0, sizeof(cr));
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
- rc = libcfs_sock_write(sock, &cr, sizeof(cr),
- accept_timeout);
-
- if (rc != 0)
- CERROR("Error sending magic+version in response"
- "to LNET magic from %u.%u.%u.%u: %d\n",
- HIPQUAD(peer_ip), rc);
- return -EPROTO;
- }
-
- if (magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC))
- str = "'old' socknal/tcpnal";
- else if (lnet_accept_magic(magic, LNET_PROTO_RA_MAGIC))
- str = "'old' ranal";
- else if (lnet_accept_magic(magic, LNET_PROTO_OPENIB_MAGIC))
- str = "'old' openibnal";
- else
- str = "unrecognised";
-
- LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %u.%u.%u.%u"
- " magic %08x: %s acceptor protocol\n",
- HIPQUAD(peer_ip), magic, str);
- return -EPROTO;
- }
-
- flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
-
- rc = libcfs_sock_read(sock, &cr.acr_version,
- sizeof(cr.acr_version),
- accept_timeout);
- if (rc != 0) {
- CERROR("Error %d reading connection request version from "
- "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
- return -EIO;
- }
-
- if (flip)
- __swab32s(&cr.acr_version);
-
- if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) {
- /* future version compatibility!
- * An acceptor-specific protocol rev will first send a version
- * query. I send back my current version to tell her I'm
- * "old". */
- int peer_version = cr.acr_version;
-
- memset (&cr, 0, sizeof(cr));
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
-
- rc = libcfs_sock_write(sock, &cr, sizeof(cr),
- accept_timeout);
-
- if (rc != 0)
- CERROR("Error sending magic+version in response"
- "to version %d from %u.%u.%u.%u: %d\n",
- peer_version, HIPQUAD(peer_ip), rc);
- return -EPROTO;
- }
-
- rc = libcfs_sock_read(sock, &cr.acr_nid,
- sizeof(cr) -
- offsetof(lnet_acceptor_connreq_t, acr_nid),
- accept_timeout);
- if (rc != 0) {
- CERROR("Error %d reading connection request from "
- "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
- return -EIO;
- }
-
- if (flip)
- __swab64s(&cr.acr_nid);
-
- ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
- if (ni == NULL || /* no matching net */
- ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
- if (ni != NULL)
- lnet_ni_decref(ni);
- LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %u.%u.%u.%u"
- " for %s: No matching NI\n",
- HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid));
- return -EPERM;
- }
-
- if (ni->ni_lnd->lnd_accept == NULL) {
- /* This catches a request for the loopback LND */
- lnet_ni_decref(ni);
- LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %u.%u.%u.%u"
- " for %s: NI doesn not accept IP connections\n",
- HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid));
- return -EPERM;
- }
-
- CDEBUG(D_NET, "Accept %s from %u.%u.%u.%u%s\n",
- libcfs_nid2str(cr.acr_nid), HIPQUAD(peer_ip),
- blind_ni == NULL ? "" : " (blind)");
-
- if (blind_ni == NULL) {
- /* called by the acceptor: call into the requested NI... */
- rc = ni->ni_lnd->lnd_accept(ni, sock);
- } else {
- /* portals_compatible set and the (only) NI called me to verify
- * and skip the connection request... */
- LASSERT (the_lnet.ln_ptlcompat != 0);
- LASSERT (ni == blind_ni);
- rc = 0;
- }
-
- lnet_ni_decref(ni);
- return rc;
-}
-EXPORT_SYMBOL(lnet_accept);
-
-int
-lnet_acceptor(void *arg)
-{
- char name[16];
- cfs_socket_t *newsock;
- int rc;
- int n_acceptor_nis;
- __u32 magic;
- __u32 peer_ip;
- int peer_port;
- lnet_ni_t *blind_ni = NULL;
- int secure = (int)((unsigned long)arg);
-
- LASSERT (lnet_acceptor_state.pta_sock == NULL);
-
- if (the_lnet.ln_ptlcompat != 0) {
- /* When portals_compatibility is enabled, peers may connect
- * without sending an acceptor connection request. There is no
- * ambiguity about which network the peer wants to connect to
- * since there can only be 1 network, so I pass connections
- * "blindly" to it. */
- n_acceptor_nis = lnet_count_acceptor_nis(&blind_ni);
- LASSERT (n_acceptor_nis == 1);
- LASSERT (blind_ni != NULL);
- }
-
- snprintf(name, sizeof(name), "acceptor_%03d", accept_port);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- rc = libcfs_sock_listen(&lnet_acceptor_state.pta_sock,
- 0, accept_port, accept_backlog);
- if (rc != 0) {
- if (rc == -EADDRINUSE)
- LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port"
- " %d: port already in use\n",
- accept_port);
- else
- LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port "
- "%d: unexpected error %d\n",
- accept_port, rc);
-
- lnet_acceptor_state.pta_sock = NULL;
- } else {
- LCONSOLE(0, "Accept %s, port %d%s\n",
- accept, accept_port,
- blind_ni == NULL ? "" : " (proto compatible)");
- }
-
- /* set init status and unblock parent */
- lnet_acceptor_state.pta_shutdown = rc;
- mutex_up(&lnet_acceptor_state.pta_signal);
-
- if (rc != 0)
- return rc;
-
- while (!lnet_acceptor_state.pta_shutdown) {
-
- rc = libcfs_sock_accept(&newsock, lnet_acceptor_state.pta_sock);
- if (rc != 0) {
- if (rc != -EAGAIN) {
- CWARN("Accept error %d: pausing...\n", rc);
- cfs_pause(cfs_time_seconds(1));
- }
- continue;
- }
-
- rc = libcfs_sock_getaddr(newsock, 1, &peer_ip, &peer_port);
- if (rc != 0) {
- CERROR("Can't determine new connection's address\n");
- goto failed;
- }
-
- if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
- CERROR("Refusing connection from %u.%u.%u.%u: "
- "insecure port %d\n",
- HIPQUAD(peer_ip), peer_port);
- goto failed;
- }
-
- if (blind_ni != NULL) {
- rc = blind_ni->ni_lnd->lnd_accept(blind_ni, newsock);
- if (rc != 0) {
- CERROR("NI %s refused 'blind' connection from "
- "%u.%u.%u.%u\n",
- libcfs_nid2str(blind_ni->ni_nid),
- HIPQUAD(peer_ip));
- goto failed;
- }
- continue;
- }
-
- rc = libcfs_sock_read(newsock, &magic, sizeof(magic),
- accept_timeout);
- if (rc != 0) {
- CERROR("Error %d reading connection request from "
- "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
- goto failed;
- }
-
- rc = lnet_accept(NULL, newsock, magic);
- if (rc != 0)
- goto failed;
-
- continue;
-
- failed:
- libcfs_sock_release(newsock);
- }
-
- libcfs_sock_release(lnet_acceptor_state.pta_sock);
- lnet_acceptor_state.pta_sock = NULL;
-
- if (blind_ni != NULL)
- lnet_ni_decref(blind_ni);
-
- LCONSOLE(0,"Acceptor stopping\n");
-
- /* unblock lnet_acceptor_stop() */
- mutex_up(&lnet_acceptor_state.pta_signal);
- return 0;
-}
-
-int
-lnet_acceptor_start(void)
-{
- long pid;
- long secure;
-
- LASSERT (lnet_acceptor_state.pta_sock == NULL);
- init_mutex_locked(&lnet_acceptor_state.pta_signal);
-
- if (!strcmp(accept, "secure")) {
- secure = 1;
- } else if (!strcmp(accept, "all")) {
- secure = 0;
- } else if (!strcmp(accept, "none")) {
- return 0;
- } else {
- LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n",
- accept);
- return -EINVAL;
- }
-
- if (lnet_count_acceptor_nis(NULL) == 0) /* not required */
- return 0;
-
- pid = cfs_kernel_thread(lnet_acceptor, (void *)secure, 0);
- if (pid < 0) {
- CERROR("Can't start acceptor thread: %ld\n", pid);
- return -ESRCH;
- }
-
- mutex_down(&lnet_acceptor_state.pta_signal); /* wait for acceptor to startup */
-
- if (!lnet_acceptor_state.pta_shutdown) {
- /* started OK */
- LASSERT (lnet_acceptor_state.pta_sock != NULL);
- return 0;
- }
-
- LASSERT (lnet_acceptor_state.pta_sock == NULL);
- return -ENETDOWN;
-}
-
-void
-lnet_acceptor_stop(void)
-{
- if (lnet_acceptor_state.pta_sock == NULL) /* not running */
- return;
-
- lnet_acceptor_state.pta_shutdown = 1;
- libcfs_sock_abort_accept(lnet_acceptor_state.pta_sock);
-
- /* block until acceptor signals exit */
- mutex_down(&lnet_acceptor_state.pta_signal);
-}
-
-#else /* __KERNEL__ */
-#ifdef HAVE_LIBPTHREAD
-
-static char *accept_type;
-static int accept_port = 988;
-static int accept_backlog;
-static int accept_timeout;
-
-struct {
- int pta_shutdown;
- int pta_sock;
- struct cfs_completion pta_completion;
-} lnet_acceptor_state;
-
-int
-lnet_acceptor_port(void)
-{
- return accept_port;
-}
-
-int
-lnet_parse_int_tunable(int *value, char *name, int dflt)
-{
- char *env = getenv(name);
- char *end;
-
- if (env == NULL) {
- *value = dflt;
- return 0;
- }
-
- *value = strtoull(env, &end, 0);
- if (*end == 0)
- return 0;
-
- CERROR("Can't parse tunable %s=%s\n", name, env);
- return -EINVAL;
-}
-
-int
-lnet_parse_string_tunable(char **value, char *name, char *dflt)
-{
- char *env = getenv(name);
-
- if (env == NULL)
- *value = dflt;
- else
- *value = env;
-
- return 0;
-}
-
-int
-lnet_acceptor_get_tunables()
-{
- int rc;
- rc = lnet_parse_string_tunable(&accept_type, "LNET_ACCEPT", "secure");
-
- if (rc != 0)
- return rc;
-
- rc = lnet_parse_int_tunable(&accept_port, "LNET_ACCEPT_PORT", 988);
-
- if (rc != 0)
- return rc;
-
- rc = lnet_parse_int_tunable(&accept_backlog, "LNET_ACCEPT_BACKLOG", 127);
-
- if (rc != 0)
- return rc;
-
- rc = lnet_parse_int_tunable(&accept_timeout, "LNET_ACCEPT_TIMEOUT", 5);
-
- if (rc != 0)
- return rc;
-
- CDEBUG(D_NET, "accept_type = %s\n", accept_type);
- CDEBUG(D_NET, "accept_port = %d\n", accept_port);
- CDEBUG(D_NET, "accept_backlog = %d\n", accept_backlog);
- CDEBUG(D_NET, "accept_timeout = %d\n", accept_timeout);
- return 0;
-}
-
-static inline int
-lnet_accept_magic(__u32 magic, __u32 constant)
-{
- return (magic == constant ||
- magic == __swab32(constant));
-}
-
-/* user-land lnet_accept() isn't used by any LND's directly. So, we don't
- * do it visible outside acceptor.c and we can change its prototype
- * freely */
-static int
-lnet_accept(int sock, __u32 magic, __u32 peer_ip, int peer_port)
-{
- int rc, flip;
- lnet_acceptor_connreq_t cr;
- lnet_ni_t *ni;
-
- if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
- LCONSOLE_ERROR("Refusing connection from %u.%u.%u.%u magic %08x: "
- "unsupported acceptor protocol\n",
- HIPQUAD(peer_ip), magic);
- return -EPROTO;
- }
-
- flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
-
- rc = libcfs_sock_read(sock, &cr.acr_version,
- sizeof(cr.acr_version),
- accept_timeout);
- if (rc != 0) {
- CERROR("Error %d reading connection request version from "
- "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
- return -EIO;
- }
-
- if (flip)
- __swab32s(&cr.acr_version);
-
- if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION)
- return -EPROTO;
-
- rc = libcfs_sock_read(sock, &cr.acr_nid,
- sizeof(cr) -
- offsetof(lnet_acceptor_connreq_t, acr_nid),
- accept_timeout);
- if (rc != 0) {
- CERROR("Error %d reading connection request from "
- "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
- return -EIO;
- }
-
- if (flip)
- __swab64s(&cr.acr_nid);
-
- ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
-
- if (ni == NULL || /* no matching net */
- ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
- if (ni != NULL)
- lnet_ni_decref(ni);
- LCONSOLE_ERROR("Refusing connection from %u.%u.%u.%u for %s: "
- " No matching NI\n",
- HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid));
- return -EPERM;
- }
-
- if (ni->ni_lnd->lnd_accept == NULL) {
- lnet_ni_decref(ni);
- LCONSOLE_ERROR("Refusing connection from %u.%u.%u.%u for %s: "
- " NI doesn not accept IP connections\n",
- HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid));
- return -EPERM;
- }
-
- CDEBUG(D_NET, "Accept %s from %u.%u.%u.%u\n",
- libcfs_nid2str(cr.acr_nid), HIPQUAD(peer_ip));
-
- rc = ni->ni_lnd->lnd_accept(ni, sock);
-
- lnet_ni_decref(ni);
- return rc;
-}
-
-int
-lnet_acceptor(void *arg)
-{
- char name[16];
- int secure = (int)((unsigned long)arg);
- int rc;
- int newsock;
- __u32 peer_ip;
- int peer_port;
- __u32 magic;
-
- snprintf(name, sizeof(name), "acceptor_%03d", accept_port);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- rc = libcfs_sock_listen(&lnet_acceptor_state.pta_sock,
- 0, accept_port, accept_backlog);
- if (rc != 0) {
- if (rc == -EADDRINUSE)
- LCONSOLE_ERROR("Can't start acceptor on port %d: "
- "port already in use\n",
- accept_port);
- else
- LCONSOLE_ERROR("Can't start acceptor on port %d: "
- "unexpected error %d\n",
- accept_port, rc);
-
- } else {
- LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
- }
-
- /* set init status and unblock parent */
- lnet_acceptor_state.pta_shutdown = rc;
- cfs_complete(&lnet_acceptor_state.pta_completion);
-
- if (rc != 0)
- return rc;
-
- while (!lnet_acceptor_state.pta_shutdown) {
-
- rc = libcfs_sock_accept(&newsock, lnet_acceptor_state.pta_sock,
- &peer_ip, &peer_port);
- if (rc != 0)
- continue;
-
- /* maybe we're waken up with libcfs_sock_abort_accept() */
- if (lnet_acceptor_state.pta_shutdown) {
- close(newsock);
- break;
- }
-
- if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
- CERROR("Refusing connection from %u.%u.%u.%u: "
- "insecure port %d\n",
- HIPQUAD(peer_ip), peer_port);
- goto failed;
- }
-
- rc = libcfs_sock_read(newsock, &magic, sizeof(magic),
- accept_timeout);
- if (rc != 0) {
- CERROR("Error %d reading connection request from "
- "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
- goto failed;
- }
-
- rc = lnet_accept(newsock, magic, peer_ip, peer_port);
- if (rc != 0)
- goto failed;
-
- continue;
-
- failed:
- close(newsock);
- }
-
- close(lnet_acceptor_state.pta_sock);
- LCONSOLE(0,"Acceptor stopping\n");
-
- /* unblock lnet_acceptor_stop() */
- cfs_complete(&lnet_acceptor_state.pta_completion);
-
- return 0;
-}
-
-static int skip_waiting_for_completion;
-
-int
-lnet_acceptor_start(void)
-{
- long secure;
- int rc;
-
- rc = lnet_acceptor_get_tunables();
- if (rc != 0)
- return rc;
-
- /* Do nothing if we're liblustre clients */
- if ((the_lnet.ln_pid & LNET_PID_USERFLAG) != 0)
- return 0;
-
- cfs_init_completion(&lnet_acceptor_state.pta_completion);
-
- if (!strcmp(accept_type, "secure")) {
- secure = 1;
- } else if (!strcmp(accept_type, "all")) {
- secure = 0;
- } else if (!strcmp(accept_type, "none")) {
- skip_waiting_for_completion = 1;
- return 0;
- } else {
- LCONSOLE_ERROR ("Can't parse 'accept_type=\"%s\"'\n", accept_type);
- cfs_fini_completion(&lnet_acceptor_state.pta_completion);
- return -EINVAL;
- }
-
- if (lnet_count_acceptor_nis(NULL) == 0) { /* not required */
- skip_waiting_for_completion = 1;
- return 0;
- }
-
- rc = cfs_create_thread(lnet_acceptor, (void *)secure);
- if (rc != 0) {
- CERROR("Can't start acceptor thread: %d\n", rc);
- cfs_fini_completion(&lnet_acceptor_state.pta_completion);
- return rc;
- }
-
- /* wait for acceptor to startup */
- cfs_wait_for_completion(&lnet_acceptor_state.pta_completion);
-
- if (!lnet_acceptor_state.pta_shutdown)
- return 0;
-
- cfs_fini_completion(&lnet_acceptor_state.pta_completion);
- return -ENETDOWN;
-}
-
-void
-lnet_acceptor_stop(void)
-{
- /* Do nothing if we're liblustre clients */
- if ((the_lnet.ln_pid & LNET_PID_USERFLAG) != 0)
- return;
-
- if (!skip_waiting_for_completion) {
- lnet_acceptor_state.pta_shutdown = 1;
- libcfs_sock_abort_accept(accept_port);
-
- /* block until acceptor signals exit */
- cfs_wait_for_completion(&lnet_acceptor_state.pta_completion);
- }
-
- cfs_fini_completion(&lnet_acceptor_state.pta_completion);
-}
-#else
-int
-lnet_acceptor_start(void)
-{
- return 0;
-}
-
-void
-lnet_acceptor_stop(void)
-{
-}
-#endif /* !HAVE_LIBPTHREAD */
-#endif /* !__KERNEL__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-errno.c
- * Instantiate the string table of errors
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- * This file is not subject to copyright protection.
- */
-
-/* If you change these, you must update the number table in portals/errno.h */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <lnet/lib-lnet.h>
-
-#ifdef __KERNEL__
-#define D_LNI D_CONSOLE
-#else
-#define D_LNI D_CONFIG
-#endif
-
-lnet_t the_lnet; /* THE state of the network */
-
-#ifdef __KERNEL__
-
-static char *ip2nets = "";
-CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
- "LNET network <- IP table");
-
-static char *networks = "";
-CFS_MODULE_PARM(networks, "s", charp, 0444,
- "local networks");
-
-static char *routes = "";
-CFS_MODULE_PARM(routes, "s", charp, 0444,
- "routes to non-local networks");
-
-static char *portals_compatibility = "none";
-CFS_MODULE_PARM(portals_compatibility, "s", charp, 0444,
- "wire protocol compatibility: 'strong'|'weak'|'none'");
-
-char *
-lnet_get_routes(void)
-{
- return routes;
-}
-
-char *
-lnet_get_networks(void)
-{
- char *nets;
- int rc;
-
- if (*networks != 0 && *ip2nets != 0) {
- LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
- "'ip2nets' but not both at once\n");
- return NULL;
- }
-
- if (*ip2nets != 0) {
- rc = lnet_parse_ip2nets(&nets, ip2nets);
- return (rc == 0) ? nets : NULL;
- }
-
- if (*networks != 0)
- return networks;
-
- return "tcp";
-}
-
-int
-lnet_get_portals_compatibility(void)
-{
- if (!strcmp(portals_compatibility, "none")) {
- return 0;
- }
-
- if (!strcmp(portals_compatibility, "weak")) {
- return 1;
- LCONSOLE_WARN("Starting in weak portals-compatible mode\n");
- }
-
- if (!strcmp(portals_compatibility, "strong")) {
- return 2;
- LCONSOLE_WARN("Starting in strong portals-compatible mode\n");
- }
-
- LCONSOLE_ERROR_MSG(0x102, "portals_compatibility=\"%s\" not supported\n",
- portals_compatibility);
- return -EINVAL;
-}
-
-void
-lnet_init_locks(void)
-{
- spin_lock_init (&the_lnet.ln_lock);
- cfs_waitq_init (&the_lnet.ln_waitq);
- init_mutex(&the_lnet.ln_lnd_mutex);
- init_mutex(&the_lnet.ln_api_mutex);
-}
-
-void
-lnet_fini_locks(void)
-{
-}
-
-#else
-
-char *
-lnet_get_routes(void)
-{
- char *str = getenv("LNET_ROUTES");
-
- return (str == NULL) ? "" : str;
-}
-
-char *
-lnet_get_networks (void)
-{
- static char default_networks[256];
- char *networks = getenv ("LNET_NETWORKS");
- char *ip2nets = getenv ("LNET_IP2NETS");
- char *str;
- char *sep;
- int len;
- int nob;
- int rc;
- struct list_head *tmp;
-
-#ifdef NOT_YET
- if (networks != NULL && ip2nets != NULL) {
- LCONSOLE_ERROR_MSG(0x103, "Please set EITHER 'LNET_NETWORKS' or"
- " 'LNET_IP2NETS' but not both at once\n");
- return NULL;
- }
-
- if (ip2nets != NULL) {
- rc = lnet_parse_ip2nets(&networks, ip2nets);
- return (rc == 0) ? networks : NULL;
- }
-#else
- ip2nets = NULL;
- rc = 0;
-#endif
- if (networks != NULL)
- return networks;
-
- /* In userland, the default 'networks=' is the list of known net types */
-
- len = sizeof(default_networks);
- str = default_networks;
- *str = 0;
- sep = "";
-
- list_for_each (tmp, &the_lnet.ln_lnds) {
- lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
-
- nob = snprintf(str, len, "%s%s", sep,
- libcfs_lnd2str(lnd->lnd_type));
- len -= nob;
- if (len < 0) {
- /* overflowed the string; leave it where it was */
- *str = 0;
- break;
- }
-
- str += nob;
- sep = ",";
- }
-
- return default_networks;
-}
-
-int
-lnet_get_portals_compatibility(void)
-{
- return 0;
-}
-
-# ifndef HAVE_LIBPTHREAD
-
-void lnet_init_locks(void)
-{
- the_lnet.ln_lock = 0;
- the_lnet.ln_lnd_mutex = 0;
- the_lnet.ln_api_mutex = 0;
-}
-
-void lnet_fini_locks(void)
-{
- LASSERT (the_lnet.ln_api_mutex == 0);
- LASSERT (the_lnet.ln_lnd_mutex == 0);
- LASSERT (the_lnet.ln_lock == 0);
-}
-
-# else
-
-void lnet_init_locks(void)
-{
- pthread_cond_init(&the_lnet.ln_cond, NULL);
- pthread_mutex_init(&the_lnet.ln_lock, NULL);
- pthread_mutex_init(&the_lnet.ln_lnd_mutex, NULL);
- pthread_mutex_init(&the_lnet.ln_api_mutex, NULL);
-}
-
-void lnet_fini_locks(void)
-{
- pthread_mutex_destroy(&the_lnet.ln_api_mutex);
- pthread_mutex_destroy(&the_lnet.ln_lnd_mutex);
- pthread_mutex_destroy(&the_lnet.ln_lock);
- pthread_cond_destroy(&the_lnet.ln_cond);
-}
-
-# endif
-#endif
-
-void lnet_assert_wire_constants (void)
-{
- /* Wire protocol assertions generated by 'wirecheck'
- * running on Linux robert.bartonsoftware.com 2.6.8-1.521
- * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
- * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
-
- /* Constants... */
- CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
- CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
- CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
- CLASSERT (LNET_MSG_ACK == 0);
- CLASSERT (LNET_MSG_PUT == 1);
- CLASSERT (LNET_MSG_GET == 2);
- CLASSERT (LNET_MSG_REPLY == 3);
- CLASSERT (LNET_MSG_HELLO == 4);
-
- /* Checks for struct ptl_handle_wire_t */
- CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
- CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
- CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
- CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
- CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
-
- /* Checks for struct lnet_magicversion_t */
- CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
- CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
- CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
- CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
- CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
- CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
- CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
-
- /* Checks for struct lnet_hdr_t */
- CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
- CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
- CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
- CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
- CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
- CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
- CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
-
- /* Ack */
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
-
- /* Put */
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
-
- /* Get */
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
-
- /* Reply */
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
-
- /* Hello */
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
- CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
- CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
-}
-
-lnd_t *
-lnet_find_lnd_by_type (int type)
-{
- lnd_t *lnd;
- struct list_head *tmp;
-
- /* holding lnd mutex */
- list_for_each (tmp, &the_lnet.ln_lnds) {
- lnd = list_entry(tmp, lnd_t, lnd_list);
-
- if (lnd->lnd_type == type)
- return lnd;
- }
-
- return NULL;
-}
-
-void
-lnet_register_lnd (lnd_t *lnd)
-{
- LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
-
- LASSERT (the_lnet.ln_init);
- LASSERT (libcfs_isknown_lnd(lnd->lnd_type));
- LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
-
- list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds);
- lnd->lnd_refcount = 0;
-
- CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
-
- LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
-}
-
-void
-lnet_unregister_lnd (lnd_t *lnd)
-{
- LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
-
- LASSERT (the_lnet.ln_init);
- LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
- LASSERT (lnd->lnd_refcount == 0);
-
- list_del (&lnd->lnd_list);
- CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
-
- LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
-}
-
-#ifndef LNET_USE_LIB_FREELIST
-
-int
-lnet_descriptor_setup (void)
-{
- return 0;
-}
-
-void
-lnet_descriptor_cleanup (void)
-{
-}
-
-#else
-
-int
-lnet_freelist_init (lnet_freelist_t *fl, int n, int size)
-{
- char *space;
-
- LASSERT (n > 0);
-
- size += offsetof (lnet_freeobj_t, fo_contents);
-
- LIBCFS_ALLOC(space, n * size);
- if (space == NULL)
- return (-ENOMEM);
-
- CFS_INIT_LIST_HEAD (&fl->fl_list);
- fl->fl_objs = space;
- fl->fl_nobjs = n;
- fl->fl_objsize = size;
-
- do
- {
- memset (space, 0, size);
- list_add ((struct list_head *)space, &fl->fl_list);
- space += size;
- } while (--n != 0);
-
- return (0);
-}
-
-void
-lnet_freelist_fini (lnet_freelist_t *fl)
-{
- struct list_head *el;
- int count;
-
- if (fl->fl_nobjs == 0)
- return;
-
- count = 0;
- for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
- count++;
-
- LASSERT (count == fl->fl_nobjs);
-
- LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
- memset (fl, 0, sizeof (fl));
-}
-
-int
-lnet_descriptor_setup (void)
-{
- /* NB on failure caller must still call lnet_descriptor_cleanup */
- /* ****** */
- int rc;
-
- memset (&the_lnet.ln_free_mes, 0, sizeof (the_lnet.ln_free_mes));
- memset (&the_lnet.ln_free_msgs, 0, sizeof (the_lnet.ln_free_msgs));
- memset (&the_lnet.ln_free_mds, 0, sizeof (the_lnet.ln_free_mds));
- memset (&the_lnet.ln_free_eqs, 0, sizeof (the_lnet.ln_free_eqs));
-
- rc = lnet_freelist_init(&the_lnet.ln_free_mes,
- MAX_MES, sizeof (lnet_me_t));
- if (rc != 0)
- return (rc);
-
- rc = lnet_freelist_init(&the_lnet.ln_free_msgs,
- MAX_MSGS, sizeof (lnet_msg_t));
- if (rc != 0)
- return (rc);
-
- rc = lnet_freelist_init(&the_lnet.ln_free_mds,
- MAX_MDS, sizeof (lnet_libmd_t));
- if (rc != 0)
- return (rc);
-
- rc = lnet_freelist_init(&the_lnet.ln_free_eqs,
- MAX_EQS, sizeof (lnet_eq_t));
- return (rc);
-}
-
-void
-lnet_descriptor_cleanup (void)
-{
- lnet_freelist_fini (&the_lnet.ln_free_mes);
- lnet_freelist_fini (&the_lnet.ln_free_msgs);
- lnet_freelist_fini (&the_lnet.ln_free_mds);
- lnet_freelist_fini (&the_lnet.ln_free_eqs);
-}
-
-#endif
-
-__u64
-lnet_create_interface_cookie (void)
-{
- /* NB the interface cookie in wire handles guards against delayed
- * replies and ACKs appearing valid after reboot. Initialisation time,
- * even if it's only implemented to millisecond resolution is probably
- * easily good enough. */
- struct timeval tv;
- __u64 cookie;
-#ifndef __KERNEL__
- int rc = gettimeofday (&tv, NULL);
- LASSERT (rc == 0);
-#else
- do_gettimeofday(&tv);
-#endif
- cookie = tv.tv_sec;
- cookie *= 1000000;
- cookie += tv.tv_usec;
- return cookie;
-}
-
-int
-lnet_setup_handle_hash (void)
-{
- int i;
-
- /* Arbitrary choice of hash table size */
-#ifdef __KERNEL__
- the_lnet.ln_lh_hash_size = CFS_PAGE_SIZE / sizeof (struct list_head);
-#else
- the_lnet.ln_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
-#endif
- LIBCFS_ALLOC(the_lnet.ln_lh_hash_table,
- the_lnet.ln_lh_hash_size * sizeof (struct list_head));
- if (the_lnet.ln_lh_hash_table == NULL)
- return (-ENOMEM);
-
- for (i = 0; i < the_lnet.ln_lh_hash_size; i++)
- CFS_INIT_LIST_HEAD (&the_lnet.ln_lh_hash_table[i]);
-
- the_lnet.ln_next_object_cookie = LNET_COOKIE_TYPES;
-
- return (0);
-}
-
-void
-lnet_cleanup_handle_hash (void)
-{
- if (the_lnet.ln_lh_hash_table == NULL)
- return;
-
- LIBCFS_FREE(the_lnet.ln_lh_hash_table,
- the_lnet.ln_lh_hash_size * sizeof (struct list_head));
-}
-
-lnet_libhandle_t *
-lnet_lookup_cookie (__u64 cookie, int type)
-{
- /* ALWAYS called with LNET_LOCK held */
- struct list_head *list;
- struct list_head *el;
- unsigned int hash;
-
- if ((cookie & (LNET_COOKIE_TYPES - 1)) != type)
- return (NULL);
-
- hash = ((unsigned int)cookie) % the_lnet.ln_lh_hash_size;
- list = &the_lnet.ln_lh_hash_table[hash];
-
- list_for_each (el, list) {
- lnet_libhandle_t *lh = list_entry (el, lnet_libhandle_t,
- lh_hash_chain);
-
- if (lh->lh_cookie == cookie)
- return (lh);
- }
-
- return (NULL);
-}
-
-void
-lnet_initialise_handle (lnet_libhandle_t *lh, int type)
-{
- /* ALWAYS called with LNET_LOCK held */
- unsigned int hash;
-
- LASSERT (type >= 0 && type < LNET_COOKIE_TYPES);
- lh->lh_cookie = the_lnet.ln_next_object_cookie | type;
- the_lnet.ln_next_object_cookie += LNET_COOKIE_TYPES;
-
- hash = ((unsigned int)lh->lh_cookie) % the_lnet.ln_lh_hash_size;
- list_add (&lh->lh_hash_chain, &the_lnet.ln_lh_hash_table[hash]);
-}
-
-void
-lnet_invalidate_handle (lnet_libhandle_t *lh)
-{
- /* ALWAYS called with LNET_LOCK held */
- list_del (&lh->lh_hash_chain);
-}
-
-int
-lnet_init_finalizers(void)
-{
-#ifdef __KERNEL__
- int i;
-
- the_lnet.ln_nfinalizers = num_online_cpus();
-
- LIBCFS_ALLOC(the_lnet.ln_finalizers,
- the_lnet.ln_nfinalizers *
- sizeof(*the_lnet.ln_finalizers));
- if (the_lnet.ln_finalizers == NULL) {
- CERROR("Can't allocate ln_finalizers\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < the_lnet.ln_nfinalizers; i++)
- the_lnet.ln_finalizers[i] = NULL;
-#else
- the_lnet.ln_finalizing = 0;
-#endif
-
- CFS_INIT_LIST_HEAD(&the_lnet.ln_finalizeq);
- return 0;
-}
-
-void
-lnet_fini_finalizers(void)
-{
-#ifdef __KERNEL__
- int i;
-
- for (i = 0; i < the_lnet.ln_nfinalizers; i++)
- LASSERT (the_lnet.ln_finalizers[i] == NULL);
-
- LIBCFS_FREE(the_lnet.ln_finalizers,
- the_lnet.ln_nfinalizers *
- sizeof(*the_lnet.ln_finalizers));
-#else
- LASSERT (!the_lnet.ln_finalizing);
-#endif
- LASSERT (list_empty(&the_lnet.ln_finalizeq));
-}
-
-#ifndef __KERNEL__
-/* Temporary workaround to allow uOSS and test programs force server
- * mode in userspace. See comments near ln_server_mode_flag in
- * lnet/lib-types.h */
-
-void
-lnet_server_mode() {
- the_lnet.ln_server_mode_flag = 1;
-}
-#endif
-
-int
-lnet_prepare(lnet_pid_t requested_pid)
-{
- /* Prepare to bring up the network */
- int rc = 0;
- int i;
-
- LASSERT (the_lnet.ln_refcount == 0);
-
- the_lnet.ln_routing = 0;
-
-#ifdef __KERNEL__
- LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
- the_lnet.ln_pid = requested_pid;
-#else
- if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
- LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
-
- if (cfs_curproc_uid())/* Only root can run user-space server */
- return -EPERM;
- the_lnet.ln_pid = requested_pid;
-
- } else {/* client case (liblustre) */
-
- /* My PID must be unique on this node and flag I'm userspace */
- the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
- }
-#endif
-
- rc = lnet_descriptor_setup();
- if (rc != 0)
- goto failed0;
-
- memset(&the_lnet.ln_counters, 0,
- sizeof(the_lnet.ln_counters));
-
- CFS_INIT_LIST_HEAD (&the_lnet.ln_active_msgs);
- CFS_INIT_LIST_HEAD (&the_lnet.ln_active_mds);
- CFS_INIT_LIST_HEAD (&the_lnet.ln_active_eqs);
- CFS_INIT_LIST_HEAD (&the_lnet.ln_test_peers);
- CFS_INIT_LIST_HEAD (&the_lnet.ln_nis);
- CFS_INIT_LIST_HEAD (&the_lnet.ln_zombie_nis);
- CFS_INIT_LIST_HEAD (&the_lnet.ln_remote_nets);
- CFS_INIT_LIST_HEAD (&the_lnet.ln_routers);
-
- the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
-
- lnet_init_rtrpools();
-
- rc = lnet_setup_handle_hash ();
- if (rc != 0)
- goto failed0;
-
- rc = lnet_create_peer_table();
- if (rc != 0)
- goto failed1;
-
- rc = lnet_init_finalizers();
- if (rc != 0)
- goto failed2;
-
- the_lnet.ln_nportals = MAX_PORTALS;
- LIBCFS_ALLOC(the_lnet.ln_portals,
- the_lnet.ln_nportals *
- sizeof(*the_lnet.ln_portals));
- if (the_lnet.ln_portals == NULL) {
- rc = -ENOMEM;
- goto failed3;
- }
-
- for (i = 0; i < the_lnet.ln_nportals; i++) {
- CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_ml));
- CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_msgq));
- the_lnet.ln_portals[i].ptl_options = 0;
- }
-
- return 0;
-
- failed3:
- lnet_fini_finalizers();
- failed2:
- lnet_destroy_peer_table();
- failed1:
- lnet_cleanup_handle_hash();
- failed0:
- lnet_descriptor_cleanup();
- return rc;
-}
-
-int
-lnet_unprepare (void)
-{
- int idx;
-
- /* NB no LNET_LOCK since this is the last reference. All LND instances
- * have shut down already, so it is safe to unlink and free all
- * descriptors, even those that appear committed to a network op (eg MD
- * with non-zero pending count) */
-
- lnet_fail_nid(LNET_NID_ANY, 0);
-
- LASSERT (list_empty(&the_lnet.ln_test_peers));
- LASSERT (the_lnet.ln_refcount == 0);
- LASSERT (list_empty(&the_lnet.ln_nis));
- LASSERT (list_empty(&the_lnet.ln_zombie_nis));
- LASSERT (the_lnet.ln_nzombie_nis == 0);
-
- for (idx = 0; idx < the_lnet.ln_nportals; idx++) {
- LASSERT (list_empty(&the_lnet.ln_portals[idx].ptl_msgq));
-
- while (!list_empty (&the_lnet.ln_portals[idx].ptl_ml)) {
- lnet_me_t *me = list_entry (the_lnet.ln_portals[idx].ptl_ml.next,
- lnet_me_t, me_list);
-
- CERROR ("Active me %p on exit\n", me);
- list_del (&me->me_list);
- lnet_me_free (me);
- }
- }
-
- while (!list_empty (&the_lnet.ln_active_mds)) {
- lnet_libmd_t *md = list_entry (the_lnet.ln_active_mds.next,
- lnet_libmd_t, md_list);
-
- CERROR ("Active md %p on exit\n", md);
- list_del (&md->md_list);
- lnet_md_free (md);
- }
-
- while (!list_empty (&the_lnet.ln_active_eqs)) {
- lnet_eq_t *eq = list_entry (the_lnet.ln_active_eqs.next,
- lnet_eq_t, eq_list);
-
- CERROR ("Active eq %p on exit\n", eq);
- list_del (&eq->eq_list);
- lnet_eq_free (eq);
- }
-
- while (!list_empty (&the_lnet.ln_active_msgs)) {
- lnet_msg_t *msg = list_entry (the_lnet.ln_active_msgs.next,
- lnet_msg_t, msg_activelist);
-
- CERROR ("Active msg %p on exit\n", msg);
- LASSERT (msg->msg_onactivelist);
- msg->msg_onactivelist = 0;
- list_del (&msg->msg_activelist);
- lnet_msg_free (msg);
- }
-
- LIBCFS_FREE(the_lnet.ln_portals,
- the_lnet.ln_nportals * sizeof(*the_lnet.ln_portals));
-
- lnet_free_rtrpools();
- lnet_fini_finalizers();
- lnet_destroy_peer_table();
- lnet_cleanup_handle_hash();
- lnet_descriptor_cleanup();
-
- return (0);
-}
-
-lnet_ni_t *
-lnet_net2ni_locked (__u32 net)
-{
- struct list_head *tmp;
- lnet_ni_t *ni;
-
- list_for_each (tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- if (lnet_ptlcompat_matchnet(LNET_NIDNET(ni->ni_nid), net)) {
- lnet_ni_addref_locked(ni);
- return ni;
- }
- }
-
- return NULL;
-}
-
-int
-lnet_islocalnet (__u32 net)
-{
- lnet_ni_t *ni;
-
- LNET_LOCK();
- ni = lnet_net2ni_locked(net);
- if (ni != NULL)
- lnet_ni_decref_locked(ni);
- LNET_UNLOCK();
-
- return ni != NULL;
-}
-
-lnet_ni_t *
-lnet_nid2ni_locked (lnet_nid_t nid)
-{
- struct list_head *tmp;
- lnet_ni_t *ni;
-
- list_for_each (tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- if (lnet_ptlcompat_matchnid(ni->ni_nid, nid)) {
- lnet_ni_addref_locked(ni);
- return ni;
- }
- }
-
- return NULL;
-}
-
-int
-lnet_islocalnid (lnet_nid_t nid)
-{
- lnet_ni_t *ni;
-
- LNET_LOCK();
- ni = lnet_nid2ni_locked(nid);
- if (ni != NULL)
- lnet_ni_decref_locked(ni);
- LNET_UNLOCK();
-
- return ni != NULL;
-}
-
-int
-lnet_count_acceptor_nis (lnet_ni_t **first_ni)
-{
- /* Return the # of NIs that need the acceptor. Return the first one in
- * *first_ni so the acceptor can pass it connections "blind" to retain
- * binary compatibility. */
- int count = 0;
-#if defined(__KERNEL__) || defined(HAVE_LIBPTHREAD)
- struct list_head *tmp;
- lnet_ni_t *ni;
-
- LNET_LOCK();
- list_for_each (tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- if (ni->ni_lnd->lnd_accept != NULL) {
- /* This LND uses the acceptor */
- if (count == 0 && first_ni != NULL) {
- lnet_ni_addref_locked(ni);
- *first_ni = ni;
- }
- count++;
- }
- }
-
- LNET_UNLOCK();
-
-#endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */
- return count;
-}
-
-void
-lnet_shutdown_lndnis (void)
-{
- int i;
- int islo;
- lnet_ni_t *ni;
-
- /* NB called holding the global mutex */
-
- /* All quiet on the API front */
- LASSERT (!the_lnet.ln_shutdown);
- LASSERT (the_lnet.ln_refcount == 0);
- LASSERT (list_empty(&the_lnet.ln_zombie_nis));
- LASSERT (the_lnet.ln_nzombie_nis == 0);
- LASSERT (list_empty(&the_lnet.ln_remote_nets));
-
- LNET_LOCK();
- the_lnet.ln_shutdown = 1; /* flag shutdown */
-
- /* Unlink NIs from the global table */
- while (!list_empty(&the_lnet.ln_nis)) {
- ni = list_entry(the_lnet.ln_nis.next,
- lnet_ni_t, ni_list);
- list_del (&ni->ni_list);
-
- the_lnet.ln_nzombie_nis++;
- lnet_ni_decref_locked(ni); /* drop apini's ref */
- }
-
- /* Drop the cached eqwait NI. */
- if (the_lnet.ln_eqwaitni != NULL) {
- lnet_ni_decref_locked(the_lnet.ln_eqwaitni);
- the_lnet.ln_eqwaitni = NULL;
- }
-
- /* Drop the cached loopback NI. */
- if (the_lnet.ln_loni != NULL) {
- lnet_ni_decref_locked(the_lnet.ln_loni);
- the_lnet.ln_loni = NULL;
- }
-
- LNET_UNLOCK();
-
- /* Clear lazy portals and drop delayed messages which hold refs
- * on their lnet_msg_t::msg_rxpeer */
- for (i = 0; i < the_lnet.ln_nportals; i++)
- LNetClearLazyPortal(i);
-
- /* Clear the peer table and wait for all peers to go (they hold refs on
- * their NIs) */
- lnet_clear_peer_table();
-
- LNET_LOCK();
- /* Now wait for the NI's I just nuked to show up on apini_zombie_nis
- * and shut them down in guaranteed thread context */
- i = 2;
- while (the_lnet.ln_nzombie_nis != 0) {
-
- while (list_empty(&the_lnet.ln_zombie_nis)) {
- LNET_UNLOCK();
- ++i;
- if ((i & (-i)) == i)
- CDEBUG(D_WARNING,"Waiting for %d zombie NIs\n",
- the_lnet.ln_nzombie_nis);
- cfs_pause(cfs_time_seconds(1));
- LNET_LOCK();
- }
-
- ni = list_entry(the_lnet.ln_zombie_nis.next,
- lnet_ni_t, ni_list);
- list_del(&ni->ni_list);
- ni->ni_lnd->lnd_refcount--;
-
- LNET_UNLOCK();
-
- islo = ni->ni_lnd->lnd_type == LOLND;
-
- LASSERT (!in_interrupt ());
- (ni->ni_lnd->lnd_shutdown)(ni);
-
- /* can't deref lnd anymore now; it might have unregistered
- * itself... */
-
- if (!islo)
- CDEBUG(D_LNI, "Removed LNI %s\n",
- libcfs_nid2str(ni->ni_nid));
-
- LIBCFS_FREE(ni, sizeof(*ni));
-
- LNET_LOCK();
- the_lnet.ln_nzombie_nis--;
- }
-
- the_lnet.ln_shutdown = 0;
- LNET_UNLOCK();
-
- if (the_lnet.ln_network_tokens != NULL) {
- LIBCFS_FREE(the_lnet.ln_network_tokens,
- the_lnet.ln_network_tokens_nob);
- the_lnet.ln_network_tokens = NULL;
- }
-}
-
-int
-lnet_startup_lndnis (void)
-{
- lnd_t *lnd;
- lnet_ni_t *ni;
- struct list_head nilist;
- int rc = 0;
- int lnd_type;
- int nicount = 0;
- char *nets = lnet_get_networks();
-
- CFS_INIT_LIST_HEAD(&nilist);
-
- if (nets == NULL)
- goto failed;
-
- rc = lnet_parse_networks(&nilist, nets);
- if (rc != 0)
- goto failed;
-
- while (!list_empty(&nilist)) {
- ni = list_entry(nilist.next, lnet_ni_t, ni_list);
- lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
-
- LASSERT (libcfs_isknown_lnd(lnd_type));
-
- LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
- lnd = lnet_find_lnd_by_type(lnd_type);
-
-#ifdef __KERNEL__
- if (lnd == NULL) {
- LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
- rc = request_module(libcfs_lnd2modname(lnd_type));
- LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
-
- lnd = lnet_find_lnd_by_type(lnd_type);
- if (lnd == NULL) {
- LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
- CERROR("Can't load LND %s, module %s, rc=%d\n",
- libcfs_lnd2str(lnd_type),
- libcfs_lnd2modname(lnd_type), rc);
-#ifndef CONFIG_KMOD
- LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
- "compiled with CONFIG_KMOD set for "
- "automatic module loading.");
-#endif
- goto failed;
- }
- }
-#else
- if (lnd == NULL) {
- LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
- CERROR("LND %s not supported\n",
- libcfs_lnd2str(lnd_type));
- goto failed;
- }
-#endif
-
- ni->ni_refcount = 1;
-
- LNET_LOCK();
- lnd->lnd_refcount++;
- LNET_UNLOCK();
-
- ni->ni_lnd = lnd;
-
- rc = (lnd->lnd_startup)(ni);
-
- LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
-
- if (rc != 0) {
- LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s"
- "\n",
- rc, libcfs_lnd2str(lnd->lnd_type));
- LNET_LOCK();
- lnd->lnd_refcount--;
- LNET_UNLOCK();
- goto failed;
- }
-
- list_del(&ni->ni_list);
-
- LNET_LOCK();
- list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
- LNET_UNLOCK();
-
- if (lnd->lnd_type == LOLND) {
- lnet_ni_addref(ni);
- LASSERT (the_lnet.ln_loni == NULL);
- the_lnet.ln_loni = ni;
- continue;
- }
-
-#ifndef __KERNEL__
- if (lnd->lnd_wait != NULL) {
- if (the_lnet.ln_eqwaitni == NULL) {
- lnet_ni_addref(ni);
- the_lnet.ln_eqwaitni = ni;
- }
- } else {
-# ifndef HAVE_LIBPTHREAD
- LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a "
- "single-threaded runtime\n",
- libcfs_lnd2str(lnd_type));
- goto failed;
-# endif
- }
-#endif
- if (ni->ni_peertxcredits == 0 ||
- ni->ni_maxtxcredits == 0) {
- LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
- libcfs_lnd2str(lnd->lnd_type),
- ni->ni_peertxcredits == 0 ?
- "" : "per-peer ");
- goto failed;
- }
-
- ni->ni_txcredits = ni->ni_mintxcredits = ni->ni_maxtxcredits;
-
- CDEBUG(D_LNI, "Added LNI %s [%d/%d]\n",
- libcfs_nid2str(ni->ni_nid),
- ni->ni_peertxcredits, ni->ni_txcredits);
-
- /* Handle nidstrings for network 0 just like this one */
- if (the_lnet.ln_ptlcompat > 0) {
- if (nicount > 0) {
- LCONSOLE_ERROR_MSG(0x108, "Can't run > 1 "
- "network when portals_compatibility is "
- "set\n");
- goto failed;
- }
- libcfs_setnet0alias(lnd->lnd_type);
- }
-
- nicount++;
- }
-
- if (the_lnet.ln_eqwaitni != NULL && nicount > 1) {
- lnd_type = the_lnet.ln_eqwaitni->ni_lnd->lnd_type;
- LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network"
- "\n",
- libcfs_lnd2str(lnd_type));
- goto failed;
- }
-
- return 0;
-
- failed:
- lnet_shutdown_lndnis();
-
- while (!list_empty(&nilist)) {
- ni = list_entry(nilist.next, lnet_ni_t, ni_list);
- list_del(&ni->ni_list);
- LIBCFS_FREE(ni, sizeof(*ni));
- }
-
- return -ENETDOWN;
-}
-
-int
-LNetInit(void)
-{
- int rc;
-
- lnet_assert_wire_constants ();
- LASSERT (!the_lnet.ln_init);
-
- memset(&the_lnet, 0, sizeof(the_lnet));
-
- rc = lnet_get_portals_compatibility();
- if (rc < 0)
- return rc;
-
- lnet_init_locks();
- CFS_INIT_LIST_HEAD(&the_lnet.ln_lnds);
- the_lnet.ln_ptlcompat = rc;
- the_lnet.ln_refcount = 0;
- the_lnet.ln_init = 1;
-
-#ifdef __KERNEL__
- /* All LNDs apart from the LOLND are in separate modules. They
- * register themselves when their module loads, and unregister
- * themselves when their module is unloaded. */
-#else
- /* Register LNDs
- * NB the order here determines default 'networks=' order */
-# ifdef CRAY_XT3
- LNET_REGISTER_ULND(the_ptllnd);
-# endif
-# ifdef HAVE_LIBPTHREAD
- LNET_REGISTER_ULND(the_tcplnd);
-# endif
-#endif
- lnet_register_lnd(&the_lolnd);
- return 0;
-}
-
-void
-LNetFini(void)
-{
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount == 0);
-
- while (!list_empty(&the_lnet.ln_lnds))
- lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
- lnd_t, lnd_list));
- lnet_fini_locks();
-
- the_lnet.ln_init = 0;
-}
-
-int
-LNetNIInit(lnet_pid_t requested_pid)
-{
- int im_a_router = 0;
- int rc;
-
- LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex);
-
- LASSERT (the_lnet.ln_init);
- CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
-
- if (the_lnet.ln_refcount > 0) {
- rc = the_lnet.ln_refcount++;
- goto out;
- }
-
- if (requested_pid == LNET_PID_ANY) {
- /* Don't instantiate LNET just for me */
- rc = -ENETDOWN;
- goto failed0;
- }
-
- rc = lnet_prepare(requested_pid);
- if (rc != 0)
- goto failed0;
-
- rc = lnet_startup_lndnis();
- if (rc != 0)
- goto failed1;
-
- rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
- if (rc != 0)
- goto failed2;
-
- rc = lnet_check_routes();
- if (rc != 0)
- goto failed2;
-
- rc = lnet_alloc_rtrpools(im_a_router);
- if (rc != 0)
- goto failed2;
-
- rc = lnet_acceptor_start();
- if (rc != 0)
- goto failed2;
-
- the_lnet.ln_refcount = 1;
- /* Now I may use my own API functions... */
-
- rc = lnet_router_checker_start();
- if (rc != 0)
- goto failed3;
-
- rc = lnet_ping_target_init();
- if (rc != 0)
- goto failed4;
-
- lnet_proc_init();
- goto out;
-
- failed4:
- lnet_router_checker_stop();
- failed3:
- the_lnet.ln_refcount = 0;
- lnet_acceptor_stop();
- failed2:
- lnet_destroy_routes();
- lnet_shutdown_lndnis();
- failed1:
- lnet_unprepare();
- failed0:
- LASSERT (rc < 0);
- out:
- LNET_MUTEX_UP(&the_lnet.ln_api_mutex);
- return rc;
-}
-
-int
-LNetNIFini()
-{
- LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex);
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- if (the_lnet.ln_refcount != 1) {
- the_lnet.ln_refcount--;
- } else {
- LASSERT (!the_lnet.ln_niinit_self);
-
- lnet_proc_fini();
- lnet_ping_target_fini();
- lnet_router_checker_stop();
-
- /* Teardown fns that use my own API functions BEFORE here */
- the_lnet.ln_refcount = 0;
-
- lnet_acceptor_stop();
- lnet_destroy_routes();
- lnet_shutdown_lndnis();
- lnet_unprepare();
- }
-
- LNET_MUTEX_UP(&the_lnet.ln_api_mutex);
- return 0;
-}
-
-int
-LNetCtl(unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- lnet_process_id_t id;
- lnet_ni_t *ni;
- int rc;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- switch (cmd) {
- case IOC_LIBCFS_GET_NI:
- rc = LNetGetId(data->ioc_count, &id);
- data->ioc_nid = id.nid;
- return rc;
-
- case IOC_LIBCFS_FAIL_NID:
- return lnet_fail_nid(data->ioc_nid, data->ioc_count);
-
- case IOC_LIBCFS_ADD_ROUTE:
- rc = lnet_add_route(data->ioc_net, data->ioc_count,
- data->ioc_nid);
- return (rc != 0) ? rc : lnet_check_routes();
-
- case IOC_LIBCFS_DEL_ROUTE:
- return lnet_del_route(data->ioc_net, data->ioc_nid);
-
- case IOC_LIBCFS_GET_ROUTE:
- return lnet_get_route(data->ioc_count,
- &data->ioc_net, &data->ioc_count,
- &data->ioc_nid, &data->ioc_flags);
- case IOC_LIBCFS_NOTIFY_ROUTER:
- return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
- (time_t)data->ioc_u64[0]);
-
- case IOC_LIBCFS_PORTALS_COMPATIBILITY:
- return the_lnet.ln_ptlcompat;
-
- case IOC_LIBCFS_LNET_DIST:
- rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
- if (rc < 0 && rc != -EHOSTUNREACH)
- return rc;
-
- data->ioc_u32[0] = rc;
- return 0;
-
- case IOC_LIBCFS_TESTPROTOCOMPAT:
- LNET_LOCK();
- the_lnet.ln_testprotocompat = data->ioc_flags;
- LNET_UNLOCK();
- return 0;
-
- case IOC_LIBCFS_PING:
- rc = lnet_ping((lnet_process_id_t) {.nid = data->ioc_nid,
- .pid = data->ioc_u32[0]},
- data->ioc_u32[1], /* timeout */
- (lnet_process_id_t *)data->ioc_pbuf1,
- data->ioc_plen1/sizeof(lnet_process_id_t));
- if (rc < 0)
- return rc;
- data->ioc_count = rc;
- return 0;
-
- case IOC_LIBCFS_DEBUG_PEER: {
- /* CAVEAT EMPTOR: this one designed for calling directly; not
- * via an ioctl */
- lnet_process_id_t *id = arg;
-
- lnet_debug_peer(id->nid);
-
- ni = lnet_net2ni(LNET_NIDNET(id->nid));
- if (ni == NULL) {
- CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(*id));
- } else {
- if (ni->ni_lnd->lnd_ctl == NULL) {
- CDEBUG(D_WARNING, "No ctl for %s\n",
- libcfs_id2str(*id));
- } else {
- (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
- }
-
- lnet_ni_decref(ni);
- }
- return 0;
- }
-
- default:
- ni = lnet_net2ni(data->ioc_net);
- if (ni == NULL)
- return -EINVAL;
-
- if (ni->ni_lnd->lnd_ctl == NULL)
- rc = -EINVAL;
- else
- rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
-
- lnet_ni_decref(ni);
- return rc;
- }
- /* not reached */
-}
-
-int
-LNetGetId(unsigned int index, lnet_process_id_t *id)
-{
- lnet_ni_t *ni;
- struct list_head *tmp;
- int rc = -ENOENT;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- LNET_LOCK();
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (index-- != 0)
- continue;
-
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- id->nid = ni->ni_nid;
- id->pid = the_lnet.ln_pid;
- rc = 0;
- break;
- }
-
- LNET_UNLOCK();
-
- return rc;
-}
-
-void
-LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
-{
- snprintf(str, len, LPX64, h.cookie);
-}
-
-
-int
-lnet_ping_target_init(void)
-{
- lnet_handle_me_t meh;
- lnet_process_id_t id;
- int rc;
- int rc2;
- int n;
- int infosz;
- int i;
-
- for (n = 0; ; n++) {
- rc = LNetGetId(n, &id);
- if (rc == -ENOENT)
- break;
-
- LASSERT (rc == 0);
- }
-
- infosz = offsetof(lnet_ping_info_t, pi_nid[n]);
- LIBCFS_ALLOC(the_lnet.ln_ping_info, infosz);
- if (the_lnet.ln_ping_info == NULL) {
- CERROR("Can't allocate ping info[%d]\n", n);
- return -ENOMEM;
- }
-
- the_lnet.ln_ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
- the_lnet.ln_ping_info->pi_version = LNET_PROTO_PING_VERSION;
- the_lnet.ln_ping_info->pi_pid = the_lnet.ln_pid;
- the_lnet.ln_ping_info->pi_nnids = n;
-
- for (i = 0; i < n; i++) {
- rc = LNetGetId(i, &id);
- LASSERT (rc == 0);
- the_lnet.ln_ping_info->pi_nid[i] = id.nid;
- }
-
- /* We can have a tiny EQ since we only need to see the unlink event on
- * teardown, which by definition is the last one! */
- rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq);
- if (rc != 0) {
- CERROR("Can't allocate ping EQ: %d\n", rc);
- goto failed_0;
- }
-
- rc = LNetMEAttach(LNET_RESERVED_PORTAL,
- (lnet_process_id_t){.nid = LNET_NID_ANY,
- .pid = LNET_PID_ANY},
- LNET_PROTO_PING_MATCHBITS, 0LL,
- LNET_UNLINK, LNET_INS_AFTER,
- &meh);
- if (rc != 0) {
- CERROR("Can't create ping ME: %d\n", rc);
- goto failed_1;
- }
-
- rc = LNetMDAttach(meh,
- (lnet_md_t){.start = the_lnet.ln_ping_info,
- .length = infosz,
- .threshold = LNET_MD_THRESH_INF,
- .options = (LNET_MD_OP_GET |
- LNET_MD_TRUNCATE |
- LNET_MD_MANAGE_REMOTE),
- .eq_handle = the_lnet.ln_ping_target_eq},
- LNET_RETAIN,
- &the_lnet.ln_ping_target_md);
- if (rc != 0) {
- CERROR("Can't attach ping MD: %d\n", rc);
- goto failed_2;
- }
-
- return 0;
-
- failed_2:
- rc2 = LNetMEUnlink(meh);
- LASSERT (rc2 == 0);
- failed_1:
- rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
- LASSERT (rc2 == 0);
- failed_0:
- LIBCFS_FREE(the_lnet.ln_ping_info, infosz);
-
- return rc;
-}
-
-void
-lnet_ping_target_fini(void)
-{
- lnet_event_t event;
- int rc;
- int which;
- int timeout_ms = 1000;
- cfs_sigset_t blocked = cfs_block_allsigs();
-
- LNetMDUnlink(the_lnet.ln_ping_target_md);
- /* NB md could be busy; this just starts the unlink */
-
- for (;;) {
- rc = LNetEQPoll(&the_lnet.ln_ping_target_eq, 1,
- timeout_ms, &event, &which);
-
- /* I expect overflow... */
- LASSERT (rc >= 0 || rc == -EOVERFLOW);
-
- if (rc == 0) {
- /* timed out: provide a diagnostic */
- CWARN("Still waiting for ping MD to unlink\n");
- timeout_ms *= 2;
- continue;
- }
-
- /* Got a valid event */
- if (event.unlinked)
- break;
- }
-
- rc = LNetEQFree(the_lnet.ln_ping_target_eq);
- LASSERT (rc == 0);
-
- LIBCFS_FREE(the_lnet.ln_ping_info,
- offsetof(lnet_ping_info_t,
- pi_nid[the_lnet.ln_ping_info->pi_nnids]));
-
- cfs_restore_sigs(blocked);
-}
-
-int
-lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_ids)
-{
- lnet_handle_eq_t eqh;
- lnet_handle_md_t mdh;
- lnet_event_t event;
- int which;
- int unlinked = 0;
- int replied = 0;
- const int a_long_time = 60000; /* mS */
- int infosz = offsetof(lnet_ping_info_t, pi_nid[n_ids]);
- lnet_ping_info_t *info;
- lnet_process_id_t tmpid;
- int i;
- int nob;
- int rc;
- int rc2;
- cfs_sigset_t blocked;
-
- if (n_ids <= 0 ||
- id.nid == LNET_NID_ANY ||
- timeout_ms > 500000 || /* arbitrary limit! */
- n_ids > 20) /* arbitrary limit! */
- return -EINVAL;
-
- if (id.pid == LNET_PID_ANY)
- id.pid = LUSTRE_SRV_LNET_PID;
-
- LIBCFS_ALLOC(info, infosz);
- if (info == NULL)
- return -ENOMEM;
-
- /* NB 2 events max (including any unlink event) */
- rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
- if (rc != 0) {
- CERROR("Can't allocate EQ: %d\n", rc);
- goto out_0;
- }
-
- rc = LNetMDBind((lnet_md_t){.start = info,
- .length = infosz,
- .threshold = 2, /* GET/REPLY */
- .options = LNET_MD_TRUNCATE,
- .eq_handle = eqh},
- LNET_UNLINK,
- &mdh);
- if (rc != 0) {
- CERROR("Can't bind MD: %d\n", rc);
- goto out_1;
- }
-
- rc = LNetGet(LNET_NID_ANY, mdh, id,
- LNET_RESERVED_PORTAL,
- LNET_PROTO_PING_MATCHBITS, 0);
-
- if (rc != 0) {
- /* Don't CERROR; this could be deliberate! */
-
- rc2 = LNetMDUnlink(mdh);
- LASSERT (rc2 == 0);
-
- /* NB must wait for the UNLINK event below... */
- unlinked = 1;
- timeout_ms = a_long_time;
- }
-
- do {
- /* MUST block for unlink to complete */
- if (unlinked)
- blocked = cfs_block_allsigs();
-
- rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
-
- if (unlinked)
- cfs_restore_sigs(blocked);
-
- CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
- (rc2 <= 0) ? -1 : event.type,
- (rc2 <= 0) ? -1 : event.status,
- (rc2 > 0 && event.unlinked) ? " unlinked" : "");
-
- LASSERT (rc2 != -EOVERFLOW); /* can't miss anything */
-
- if (rc2 <= 0 || event.status != 0) {
- /* timeout or error */
- if (!replied && rc == 0)
- rc = (rc2 < 0) ? rc2 :
- (rc2 == 0) ? -ETIMEDOUT :
- event.status;
-
- if (!unlinked) {
- /* Ensure completion in finite time... */
- LNetMDUnlink(mdh);
- /* No assertion (racing with network) */
- unlinked = 1;
- timeout_ms = a_long_time;
- } else if (rc2 == 0) {
- /* timed out waiting for unlink */
- CWARN("ping %s: late network completion\n",
- libcfs_id2str(id));
- }
-
- } else if (event.type == LNET_EVENT_REPLY) {
- replied = 1;
- rc = event.mlength;
- }
-
- } while (rc2 <= 0 || !event.unlinked);
-
- if (!replied) {
- if (rc >= 0)
- CWARN("%s: Unexpected rc >= 0 but no reply!\n",
- libcfs_id2str(id));
- rc = -EIO;
- goto out_1;
- }
-
- nob = rc;
- LASSERT (nob >= 0 && nob <= infosz);
-
- rc = -EPROTO; /* if I can't parse... */
-
- if (nob < 8) {
- /* can't check magic/version */
- CERROR("%s: ping info too short %d\n",
- libcfs_id2str(id), nob);
- goto out_1;
- }
-
- if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
- /* NB I might be swabbing garbage until I check below, but it
- * doesn't matter */
- __swab32s(&info->pi_version);
- __swab32s(&info->pi_pid);
- __swab32s(&info->pi_nnids);
- for (i = 0; i < info->pi_nnids && i < n_ids; i++)
- __swab64s(&info->pi_nid[i]);
-
- } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
- CERROR("%s: Unexpected magic %08x\n",
- libcfs_id2str(id), info->pi_magic);
- goto out_1;
- }
-
- if (info->pi_version != LNET_PROTO_PING_VERSION) {
- CERROR("%s: Unexpected version 0x%x\n",
- libcfs_id2str(id), info->pi_version);
- goto out_1;
- }
-
- if (nob < offsetof(lnet_ping_info_t, pi_nid[0])) {
- CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
- nob, (int)offsetof(lnet_ping_info_t, pi_nid[0]));
- goto out_1;
- }
-
- if (info->pi_nnids < n_ids)
- n_ids = info->pi_nnids;
-
- if (nob < offsetof(lnet_ping_info_t, pi_nid[n_ids])) {
- CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
- nob, (int)offsetof(lnet_ping_info_t, pi_nid[n_ids]));
- goto out_1;
- }
-
- rc = -EFAULT; /* If I SEGV... */
-
- for (i = 0; i < n_ids; i++) {
- tmpid.pid = info->pi_pid;
- tmpid.nid = info->pi_nid[i];
-#ifdef __KERNEL__
- if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
- goto out_1;
-#else
- ids[i] = tmpid;
-#endif
- }
- rc = info->pi_nnids;
-
- out_1:
- rc2 = LNetEQFree(eqh);
- if (rc2 != 0)
- CERROR("rc2 %d\n", rc2);
- LASSERT (rc2 == 0);
-
- out_0:
- LIBCFS_FREE(info, infosz);
- return rc;
-}
+++ /dev/null
-my_sources = api-errno.c api-ni.c config.c \
- lib-me.c lib-msg.c lib-eq.c \
- lib-md.c lib-move.c lo.c \
- router.c router_proc.c \
- acceptor.c peer.c
-
-
-if LIBLUSTRE
-noinst_LIBRARIES= liblnet.a
-liblnet_a_SOURCES= $(my_sources)
-liblnet_a_CPPFLAGS = $(LLCPPFLAGS)
-liblnet_a_CFLAGS = $(LLCFLAGS)
-endif
-
-if MODULES
-
-if LINUX
-modulenet_DATA = lnet$(KMODEXT)
-endif # LINUX
-
-if DARWIN
-macos_PROGRAMS := lnet
-
-lnet_SOURCES := api-errno.c api-ni.c config.c
-lnet_SOURCES += lib-me.c lib-msg.c lib-eq.c lib-md.c
-lnet_SOURCES += lib-move.c module.c lo.c router.c router_proc.c
-lnet_SOURCES += acceptor.c peer.c
-
-lnet_CFLAGS := $(EXTRA_KCFLAGS)
-lnet_LDFLAGS := $(EXTRA_KLDFLAGS)
-lnet_LDADD := $(EXTRA_KLIBS)
-
-plist_DATA := Info.plist
-
-install_data_hook := fix-kext-ownership
-
-endif # DARWIN
-
-endif # MODULES
-
-install-data-hook: $(install_data_hook)
-
-EXTRA_DIST := Info.plist
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ lnet
-DIST_SOURCES = $(lnet-objs:%.o=%.c)
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2005 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <lnet/lib-lnet.h>
-
-typedef struct { /* tmp struct for parsing routes */
- struct list_head ltb_list; /* stash on lists */
- int ltb_size; /* allocated size */
- char ltb_text[0]; /* text buffer */
-} lnet_text_buf_t;
-
-static int lnet_tbnob = 0; /* track text buf allocation */
-#define LNET_MAX_TEXTBUF_NOB (64<<10) /* bound allocation */
-#define LNET_SINGLE_TEXTBUF_NOB (4<<10)
-
-typedef struct {
- struct list_head lre_list; /* stash in a list */
- int lre_min; /* min value */
- int lre_max; /* max value */
- int lre_stride; /* stride */
-} lnet_range_expr_t;
-
-static int lnet_re_alloc = 0; /* track expr allocation */
-
-void
-lnet_syntax(char *name, char *str, int offset, int width)
-{
- static char dots[LNET_SINGLE_TEXTBUF_NOB];
- static char dashes[LNET_SINGLE_TEXTBUF_NOB];
-
- memset(dots, '.', sizeof(dots));
- dots[sizeof(dots)-1] = 0;
- memset(dashes, '-', sizeof(dashes));
- dashes[sizeof(dashes)-1] = 0;
-
- LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str);
- LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n",
- (int)strlen(name), dots, offset, dots,
- (width < 1) ? 0 : width - 1, dashes);
-}
-
-int
-lnet_issep (char c)
-{
- switch (c) {
- case '\n':
- case '\r':
- case ';':
- return 1;
- default:
- return 0;
- }
-}
-
-int
-lnet_iswhite (char c)
-{
- switch (c) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- return 1;
- default:
- return 0;
- }
-}
-
-char *
-lnet_trimwhite(char *str)
-{
- char *end;
-
- while (lnet_iswhite(*str))
- str++;
-
- end = str + strlen(str);
- while (end > str) {
- if (!lnet_iswhite(end[-1]))
- break;
- end--;
- }
-
- *end = 0;
- return str;
-}
-
-int
-lnet_net_unique(__u32 net, struct list_head *nilist)
-{
- struct list_head *tmp;
- lnet_ni_t *ni;
-
- list_for_each (tmp, nilist) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- if (LNET_NIDNET(ni->ni_nid) == net)
- return 0;
- }
-
- return 1;
-}
-
-lnet_ni_t *
-lnet_new_ni(__u32 net, struct list_head *nilist)
-{
- lnet_ni_t *ni;
-
- if (!lnet_net_unique(net, nilist)) {
- LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
- libcfs_net2str(net));
- return NULL;
- }
-
- LIBCFS_ALLOC(ni, sizeof(*ni));
- if (ni == NULL) {
- CERROR("Out of memory creating network %s\n",
- libcfs_net2str(net));
- return NULL;
- }
-
- /* zero counters/flags, NULL pointers... */
- memset(ni, 0, sizeof(*ni));
-
- /* LND will fill in the address part of the NID */
- ni->ni_nid = LNET_MKNID(net, 0);
- CFS_INIT_LIST_HEAD(&ni->ni_txq);
-
- list_add_tail(&ni->ni_list, nilist);
- return ni;
-}
-
-int
-lnet_parse_networks(struct list_head *nilist, char *networks)
-{
- int tokensize = strlen(networks) + 1;
- char *tokens;
- char *str;
- lnet_ni_t *ni;
- __u32 net;
- int nnets = 0;
-
- if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) {
- /* _WAY_ conservative */
- LCONSOLE_ERROR_MSG(0x112, "Can't parse networks: string too "
- "long\n");
- return -EINVAL;
- }
-
- LIBCFS_ALLOC(tokens, tokensize);
- if (tokens == NULL) {
- CERROR("Can't allocate net tokens\n");
- return -ENOMEM;
- }
-
- the_lnet.ln_network_tokens = tokens;
- the_lnet.ln_network_tokens_nob = tokensize;
- memcpy (tokens, networks, tokensize);
- str = tokens;
-
- /* Add in the loopback network */
- ni = lnet_new_ni(LNET_MKNET(LOLND, 0), nilist);
- if (ni == NULL)
- goto failed;
-
- while (str != NULL && *str != 0) {
- char *comma = strchr(str, ',');
- char *bracket = strchr(str, '(');
- int niface;
- char *iface;
-
- /* NB we don't check interface conflicts here; it's the LNDs
- * responsibility (if it cares at all) */
-
- if (bracket == NULL ||
- (comma != NULL && comma < bracket)) {
-
- /* no interface list specified */
-
- if (comma != NULL)
- *comma++ = 0;
- net = libcfs_str2net(lnet_trimwhite(str));
-
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- lnet_syntax("networks", networks,
- str - tokens, strlen(str));
- LCONSOLE_ERROR_MSG(0x113, "Unrecognised network"
- " type\n");
- goto failed;
- }
-
- if (LNET_NETTYP(net) != LOLND && /* loopback is implicit */
- lnet_new_ni(net, nilist) == NULL)
- goto failed;
-
- str = comma;
- continue;
- }
-
- *bracket = 0;
- net = libcfs_str2net(lnet_trimwhite(str));
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- lnet_syntax("networks", networks,
- str - tokens, strlen(str));
- goto failed;
- }
-
- if (nnets > 0 &&
- the_lnet.ln_ptlcompat > 0) {
- LCONSOLE_ERROR_MSG(0x114, "Only 1 network supported when"
- " 'portals_compatible' is set\n");
- goto failed;
- }
-
- nnets++;
- ni = lnet_new_ni(net, nilist);
- if (ni == NULL)
- goto failed;
-
- niface = 0;
- iface = bracket + 1;
-
- bracket = strchr(iface, ')');
- if (bracket == NULL) {
- lnet_syntax("networks", networks,
- iface - tokens, strlen(iface));
- goto failed;
- }
-
- *bracket = 0;
- do {
- comma = strchr(iface, ',');
- if (comma != NULL)
- *comma++ = 0;
-
- iface = lnet_trimwhite(iface);
- if (*iface == 0) {
- lnet_syntax("networks", networks,
- iface - tokens, strlen(iface));
- goto failed;
- }
-
- if (niface == LNET_MAX_INTERFACES) {
- LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
- "for net %s\n",
- libcfs_net2str(net));
- goto failed;
- }
-
- ni->ni_interfaces[niface++] = iface;
- iface = comma;
- } while (iface != NULL);
-
- str = bracket + 1;
- comma = strchr(bracket + 1, ',');
- if (comma != NULL) {
- *comma = 0;
- str = lnet_trimwhite(str);
- if (*str != 0) {
- lnet_syntax("networks", networks,
- str - tokens, strlen(str));
- goto failed;
- }
- str = comma + 1;
- continue;
- }
-
- str = lnet_trimwhite(str);
- if (*str != 0) {
- lnet_syntax("networks", networks,
- str - tokens, strlen(str));
- goto failed;
- }
- }
-
- LASSERT (!list_empty(nilist));
- return 0;
-
- failed:
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, lnet_ni_t, ni_list);
-
- list_del(&ni->ni_list);
- LIBCFS_FREE(ni, sizeof(*ni));
- }
- LIBCFS_FREE(tokens, tokensize);
- the_lnet.ln_network_tokens = NULL;
-
- return -EINVAL;
-}
-
-lnet_text_buf_t *
-lnet_new_text_buf (int str_len)
-{
- lnet_text_buf_t *ltb;
- int nob;
-
- /* NB allocate space for the terminating 0 */
- nob = offsetof(lnet_text_buf_t, ltb_text[str_len + 1]);
- if (nob > LNET_SINGLE_TEXTBUF_NOB) {
- /* _way_ conservative for "route net gateway..." */
- CERROR("text buffer too big\n");
- return NULL;
- }
-
- if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) {
- CERROR("Too many text buffers\n");
- return NULL;
- }
-
- LIBCFS_ALLOC(ltb, nob);
- if (ltb == NULL)
- return NULL;
-
- ltb->ltb_size = nob;
- ltb->ltb_text[0] = 0;
- lnet_tbnob += nob;
- return ltb;
-}
-
-void
-lnet_free_text_buf (lnet_text_buf_t *ltb)
-{
- lnet_tbnob -= ltb->ltb_size;
- LIBCFS_FREE(ltb, ltb->ltb_size);
-}
-
-void
-lnet_free_text_bufs(struct list_head *tbs)
-{
- lnet_text_buf_t *ltb;
-
- while (!list_empty(tbs)) {
- ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list);
-
- list_del(<b->ltb_list);
- lnet_free_text_buf(ltb);
- }
-}
-
-void
-lnet_print_text_bufs(struct list_head *tbs)
-{
- struct list_head *tmp;
- lnet_text_buf_t *ltb;
-
- list_for_each (tmp, tbs) {
- ltb = list_entry(tmp, lnet_text_buf_t, ltb_list);
-
- CDEBUG(D_WARNING, "%s\n", ltb->ltb_text);
- }
-
- CDEBUG(D_WARNING, "%d allocated\n", lnet_tbnob);
-}
-
-int
-lnet_str2tbs_sep (struct list_head *tbs, char *str)
-{
- struct list_head pending;
- char *sep;
- int nob;
- int i;
- lnet_text_buf_t *ltb;
-
- CFS_INIT_LIST_HEAD(&pending);
-
- /* Split 'str' into separate commands */
- for (;;) {
- /* skip leading whitespace */
- while (lnet_iswhite(*str))
- str++;
-
- /* scan for separator or comment */
- for (sep = str; *sep != 0; sep++)
- if (lnet_issep(*sep) || *sep == '#')
- break;
-
- nob = sep - str;
- if (nob > 0) {
- ltb = lnet_new_text_buf(nob);
- if (ltb == NULL) {
- lnet_free_text_bufs(&pending);
- return -1;
- }
-
- for (i = 0; i < nob; i++)
- if (lnet_iswhite(str[i]))
- ltb->ltb_text[i] = ' ';
- else
- ltb->ltb_text[i] = str[i];
-
- ltb->ltb_text[nob] = 0;
-
- list_add_tail(<b->ltb_list, &pending);
- }
-
- if (*sep == '#') {
- /* scan for separator */
- do {
- sep++;
- } while (*sep != 0 && !lnet_issep(*sep));
- }
-
- if (*sep == 0)
- break;
-
- str = sep + 1;
- }
-
- list_splice(&pending, tbs->prev);
- return 0;
-}
-
-int
-lnet_expand1tb (struct list_head *list,
- char *str, char *sep1, char *sep2,
- char *item, int itemlen)
-{
- int len1 = sep1 - str;
- int len2 = strlen(sep2 + 1);
- lnet_text_buf_t *ltb;
-
- LASSERT (*sep1 == '[');
- LASSERT (*sep2 == ']');
-
- ltb = lnet_new_text_buf(len1 + itemlen + len2);
- if (ltb == NULL)
- return -ENOMEM;
-
- memcpy(ltb->ltb_text, str, len1);
- memcpy(<b->ltb_text[len1], item, itemlen);
- memcpy(<b->ltb_text[len1+itemlen], sep2 + 1, len2);
- ltb->ltb_text[len1 + itemlen + len2] = 0;
-
- list_add_tail(<b->ltb_list, list);
- return 0;
-}
-
-int
-lnet_str2tbs_expand (struct list_head *tbs, char *str)
-{
- char num[16];
- struct list_head pending;
- char *sep;
- char *sep2;
- char *parsed;
- char *enditem;
- int lo;
- int hi;
- int stride;
- int i;
- int nob;
- int scanned;
-
- CFS_INIT_LIST_HEAD(&pending);
-
- sep = strchr(str, '[');
- if (sep == NULL) /* nothing to expand */
- return 0;
-
- sep2 = strchr(sep, ']');
- if (sep2 == NULL)
- goto failed;
-
- for (parsed = sep; parsed < sep2; parsed = enditem) {
-
- enditem = ++parsed;
- while (enditem < sep2 && *enditem != ',')
- enditem++;
-
- if (enditem == parsed) /* no empty items */
- goto failed;
-
- if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi, &stride, &scanned) < 3) {
-
- if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) {
-
- /* simple string enumeration */
- if (lnet_expand1tb(&pending, str, sep, sep2,
- parsed, enditem - parsed) != 0)
- goto failed;
-
- continue;
- }
-
- stride = 1;
- }
-
- /* range expansion */
-
- if (enditem != parsed + scanned) /* no trailing junk */
- goto failed;
-
- if (hi < 0 || lo < 0 || stride < 0 || hi < lo ||
- (hi - lo) % stride != 0)
- goto failed;
-
- for (i = lo; i <= hi; i += stride) {
-
- snprintf(num, sizeof(num), "%d", i);
- nob = strlen(num);
- if (nob + 1 == sizeof(num))
- goto failed;
-
- if (lnet_expand1tb(&pending, str, sep, sep2,
- num, nob) != 0)
- goto failed;
- }
- }
-
- list_splice(&pending, tbs->prev);
- return 1;
-
- failed:
- lnet_free_text_bufs(&pending);
- return -1;
-}
-
-int
-lnet_parse_hops (char *str, unsigned int *hops)
-{
- int len = strlen(str);
- int nob = len;
-
- return (sscanf(str, "%u%n", hops, &nob) >= 1 &&
- nob == len &&
- *hops > 0 && *hops < 256);
-}
-
-
-int
-lnet_parse_route (char *str, int *im_a_router)
-{
- /* static scratch buffer OK (single threaded) */
- static char cmd[LNET_SINGLE_TEXTBUF_NOB];
-
- struct list_head nets;
- struct list_head gateways;
- struct list_head *tmp1;
- struct list_head *tmp2;
- __u32 net;
- lnet_nid_t nid;
- lnet_text_buf_t *ltb;
- int rc;
- char *sep;
- char *token = str;
- int ntokens = 0;
- int myrc = -1;
- unsigned int hops;
- int got_hops = 0;
-
- CFS_INIT_LIST_HEAD(&gateways);
- CFS_INIT_LIST_HEAD(&nets);
-
- /* save a copy of the string for error messages */
- strncpy(cmd, str, sizeof(cmd) - 1);
- cmd[sizeof(cmd) - 1] = 0;
-
- sep = str;
- for (;;) {
- /* scan for token start */
- while (lnet_iswhite(*sep))
- sep++;
- if (*sep == 0) {
- if (ntokens < (got_hops ? 3 : 2))
- goto token_error;
- break;
- }
-
- ntokens++;
- token = sep++;
-
- /* scan for token end */
- while (*sep != 0 && !lnet_iswhite(*sep))
- sep++;
- if (*sep != 0)
- *sep++ = 0;
-
- if (ntokens == 1) {
- tmp2 = &nets; /* expanding nets */
- } else if (ntokens == 2 &&
- lnet_parse_hops(token, &hops)) {
- got_hops = 1; /* got a hop count */
- continue;
- } else {
- tmp2 = &gateways; /* expanding gateways */
- }
-
- ltb = lnet_new_text_buf(strlen(token));
- if (ltb == NULL)
- goto out;
-
- strcpy(ltb->ltb_text, token);
- tmp1 = <b->ltb_list;
- list_add_tail(tmp1, tmp2);
-
- while (tmp1 != tmp2) {
- ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list);
-
- rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text);
- if (rc < 0)
- goto token_error;
-
- tmp1 = tmp1->next;
-
- if (rc > 0) { /* expanded! */
- list_del(<b->ltb_list);
- lnet_free_text_buf(ltb);
- continue;
- }
-
- if (ntokens == 1) {
- net = libcfs_str2net(ltb->ltb_text);
- if (net == LNET_NIDNET(LNET_NID_ANY) ||
- LNET_NETTYP(net) == LOLND)
- goto token_error;
- } else {
- nid = libcfs_str2nid(ltb->ltb_text);
- if (nid == LNET_NID_ANY ||
- LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
- goto token_error;
- }
- }
- }
-
- if (!got_hops)
- hops = 1;
-
- LASSERT (!list_empty(&nets));
- LASSERT (!list_empty(&gateways));
-
- list_for_each (tmp1, &nets) {
- ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list);
- net = libcfs_str2net(ltb->ltb_text);
- LASSERT (net != LNET_NIDNET(LNET_NID_ANY));
-
- list_for_each (tmp2, &gateways) {
- ltb = list_entry(tmp2, lnet_text_buf_t, ltb_list);
- nid = libcfs_str2nid(ltb->ltb_text);
- LASSERT (nid != LNET_NID_ANY);
-
- if (lnet_islocalnid(nid)) {
- *im_a_router = 1;
- continue;
- }
-
- rc = lnet_add_route (net, hops, nid);
- if (rc != 0) {
- CERROR("Can't create route "
- "to %s via %s\n",
- libcfs_net2str(net),
- libcfs_nid2str(nid));
- goto out;
- }
- }
- }
-
- myrc = 0;
- goto out;
-
- token_error:
- lnet_syntax("routes", cmd, token - str, strlen(token));
- out:
- lnet_free_text_bufs(&nets);
- lnet_free_text_bufs(&gateways);
- return myrc;
-}
-
-int
-lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router)
-{
- lnet_text_buf_t *ltb;
-
- while (!list_empty(tbs)) {
- ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list);
-
- if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) {
- lnet_free_text_bufs(tbs);
- return -EINVAL;
- }
-
- list_del(<b->ltb_list);
- lnet_free_text_buf(ltb);
- }
-
- return 0;
-}
-
-int
-lnet_parse_routes (char *routes, int *im_a_router)
-{
- struct list_head tbs;
- int rc = 0;
-
- *im_a_router = 0;
-
- if (the_lnet.ln_ptlcompat > 0 &&
- routes[0] != 0) {
- /* Can't route when running in compatibility mode */
- LCONSOLE_ERROR_MSG(0x116, "Route tables are not supported when "
- "'portals_compatible' is set\n");
- return -EINVAL;
- }
-
- CFS_INIT_LIST_HEAD(&tbs);
-
- if (lnet_str2tbs_sep(&tbs, routes) < 0) {
- CERROR("Error parsing routes\n");
- rc = -EINVAL;
- } else {
- rc = lnet_parse_route_tbs(&tbs, im_a_router);
- }
-
- LASSERT (lnet_tbnob == 0);
- return rc;
-}
-
-void
-lnet_print_range_exprs(struct list_head *exprs)
-{
- struct list_head *e;
- lnet_range_expr_t *lre;
-
- list_for_each(e, exprs) {
- lre = list_entry(exprs->next, lnet_range_expr_t, lre_list);
-
- CDEBUG(D_WARNING, "%d-%d/%d\n",
- lre->lre_min, lre->lre_max, lre->lre_stride);
- }
-
- CDEBUG(D_WARNING, "%d allocated\n", lnet_re_alloc);
-}
-
-int
-lnet_new_range_expr(struct list_head *exprs, int min, int max, int stride)
-{
- lnet_range_expr_t *lre;
-
- CDEBUG(D_NET, "%d-%d/%d\n", min, max, stride);
-
- if (min < 0 || min > 255 || min > max || stride < 0)
- return -EINVAL;
-
- LIBCFS_ALLOC(lre, sizeof(*lre));
- if (lre == NULL)
- return -ENOMEM;
-
- lnet_re_alloc++;
-
- lre->lre_min = min;
- lre->lre_max = max;
- lre->lre_stride = stride;
-
- list_add(&lre->lre_list, exprs);
- return 0;
-}
-
-void
-lnet_destroy_range_exprs(struct list_head *exprs)
-{
- lnet_range_expr_t *lre;
-
- while (!list_empty(exprs)) {
- lre = list_entry(exprs->next, lnet_range_expr_t, lre_list);
-
- list_del(&lre->lre_list);
- LIBCFS_FREE(lre, sizeof(*lre));
- lnet_re_alloc--;
- }
-}
-
-int
-lnet_parse_range_expr(struct list_head *exprs, char *str)
-{
- int nob = strlen(str);
- char *sep;
- int n;
- int x;
- int y;
- int z;
- int rc;
-
- if (nob == 0)
- return -EINVAL;
-
- if (!strcmp(str, "*")) /* match all */
- return lnet_new_range_expr(exprs, 0, 255, 1);
-
- n = nob;
- if (sscanf(str, "%u%n", &x, &n) >= 1 && n == nob) {
- /* simple number */
- return lnet_new_range_expr(exprs, x, x, 1);
- }
-
- /* Has to be an expansion */
- if (!(str[0] == '[' && nob > 2 && str[nob-1] == ']'))
- return -EINVAL;
-
- nob -= 2;
- str++;
- str[nob] = 0;
-
- do {
- /* Comma separated list of expressions... */
- sep = strchr(str, ',');
- if (sep != NULL)
- *sep++ = 0;
-
- nob = strlen(str);
- n = nob;
- if (sscanf(str, "%u%n", &x, &n) >= 1 && n == nob) {
- /* simple number */
- rc = lnet_new_range_expr(exprs, x, x, 1);
- if (rc != 0)
- return rc;
-
- continue;
- }
-
- n = nob;
- if (sscanf(str, "%u-%u%n", &x, &y, &n) >= 2 && n == nob) {
- /* simple range */
- rc = lnet_new_range_expr(exprs, x, y, 1);
- if (rc != 0)
- return rc;
- continue;
- }
-
- n = nob;
- if (sscanf(str, "%u-%u/%u%n", &x, &y, &z, &n) >= 3 && n == nob) {
- /* strided range */
- rc = lnet_new_range_expr(exprs, x, y, z);
- if (rc != 0)
- return rc;
- continue;
- }
-
- return -EINVAL;
-
- } while ((str = sep) != NULL);
-
- return 0;
-}
-
-int
-lnet_match_network_token(char *token, __u32 *ipaddrs, int nip)
-{
- struct list_head exprs[4];
- struct list_head *e;
- lnet_range_expr_t *re;
- char *str;
- int i;
- int j;
- __u32 ip;
- int n;
- int match;
- int rc;
-
- for (i = 0; i < 4; i++)
- CFS_INIT_LIST_HEAD(&exprs[i]);
-
- for (i = 0; i < 4; i++) {
- str = token;
- if (i != 3) {
- token = strchr(token, '.');
- if (token == NULL) {
- rc = -EINVAL;
- goto out;
- }
- *token++ = 0;
- }
-
- rc = lnet_parse_range_expr(&exprs[i], str);
- if (rc != 0) {
- LASSERT (rc < 0);
- goto out;
- }
- }
-
- for (match = i = 0; !match && i < nip; i++) {
- ip = ipaddrs[i];
-
- for (match = 1, j = 0; match && j < 4; j++) {
- n = (ip >> (8 * (3 - j))) & 0xff;
- match = 0;
-
- list_for_each(e, &exprs[j]) {
- re = list_entry(e, lnet_range_expr_t, lre_list);
-
- if (re->lre_min <= n &&
- re->lre_max >= n &&
- (n - re->lre_min) % re->lre_stride == 0) {
- match = 1;
- break;
- }
- }
- }
- }
-
- rc = match ? 1 : 0;
-
- out:
- for (i = 0; i < 4; i++)
- lnet_destroy_range_exprs(&exprs[i]);
- LASSERT (lnet_re_alloc == 0);
-
- return rc;
-}
-
-int
-lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip)
-{
- static char tokens[LNET_SINGLE_TEXTBUF_NOB];
-
- int matched = 0;
- int ntokens = 0;
- int len;
- char *net = NULL;
- char *sep;
- char *token;
- int rc;
-
- LASSERT (strlen(net_entry) < sizeof(tokens));
-
- /* work on a copy of the string */
- strcpy(tokens, net_entry);
- sep = tokens;
- for (;;) {
- /* scan for token start */
- while (lnet_iswhite(*sep))
- sep++;
- if (*sep == 0)
- break;
-
- token = sep++;
-
- /* scan for token end */
- while (*sep != 0 && !lnet_iswhite(*sep))
- sep++;
- if (*sep != 0)
- *sep++ = 0;
-
- if (ntokens++ == 0) {
- net = token;
- continue;
- }
-
- len = strlen(token);
-
- rc = lnet_match_network_token(token, ipaddrs, nip);
- if (rc < 0) {
- lnet_syntax("ip2nets", net_entry,
- token - tokens, len);
- return rc;
- }
-
- matched |= (rc != 0);
- }
-
- if (!matched)
- return 0;
-
- strcpy(net_entry, net); /* replace with matched net */
- return 1;
-}
-
-__u32
-lnet_netspec2net(char *netspec)
-{
- char *bracket = strchr(netspec, '(');
- __u32 net;
-
- if (bracket != NULL)
- *bracket = 0;
-
- net = libcfs_str2net(netspec);
-
- if (bracket != NULL)
- *bracket = '(';
-
- return net;
-}
-
-int
-lnet_splitnets(char *source, struct list_head *nets)
-{
- int offset = 0;
- int offset2;
- int len;
- lnet_text_buf_t *tb;
- lnet_text_buf_t *tb2;
- struct list_head *t;
- char *sep;
- char *bracket;
- __u32 net;
-
- LASSERT (!list_empty(nets));
- LASSERT (nets->next == nets->prev); /* single entry */
-
- tb = list_entry(nets->next, lnet_text_buf_t, ltb_list);
-
- for (;;) {
- sep = strchr(tb->ltb_text, ',');
- bracket = strchr(tb->ltb_text, '(');
-
- if (sep != NULL &&
- bracket != NULL &&
- bracket < sep) {
- /* netspec lists interfaces... */
-
- offset2 = offset + (bracket - tb->ltb_text);
- len = strlen(bracket);
-
- bracket = strchr(bracket + 1, ')');
-
- if (bracket == NULL ||
- !(bracket[1] == ',' || bracket[1] == 0)) {
- lnet_syntax("ip2nets", source, offset2, len);
- return -EINVAL;
- }
-
- sep = (bracket[1] == 0) ? NULL : bracket + 1;
- }
-
- if (sep != NULL)
- *sep++ = 0;
-
- net = lnet_netspec2net(tb->ltb_text);
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- lnet_syntax("ip2nets", source, offset,
- strlen(tb->ltb_text));
- return -EINVAL;
- }
-
- list_for_each(t, nets) {
- tb2 = list_entry(t, lnet_text_buf_t, ltb_list);
-
- if (tb2 == tb)
- continue;
-
- if (net == lnet_netspec2net(tb2->ltb_text)) {
- /* duplicate network */
- lnet_syntax("ip2nets", source, offset,
- strlen(tb->ltb_text));
- return -EINVAL;
- }
- }
-
- if (sep == NULL)
- return 0;
-
- offset += sep - tb->ltb_text;
- tb2 = lnet_new_text_buf(strlen(sep));
- if (tb2 == NULL)
- return -ENOMEM;
-
- strcpy(tb2->ltb_text, sep);
- list_add_tail(&tb2->ltb_list, nets);
-
- tb = tb2;
- }
-}
-
-int
-lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
-{
- static char networks[LNET_SINGLE_TEXTBUF_NOB];
- static char source[LNET_SINGLE_TEXTBUF_NOB];
-
- struct list_head raw_entries;
- struct list_head matched_nets;
- struct list_head current_nets;
- struct list_head *t;
- struct list_head *t2;
- lnet_text_buf_t *tb;
- lnet_text_buf_t *tb2;
- __u32 net1;
- __u32 net2;
- int len;
- int count;
- int dup;
- int rc;
-
- CFS_INIT_LIST_HEAD(&raw_entries);
- if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) {
- CERROR("Error parsing ip2nets\n");
- LASSERT (lnet_tbnob == 0);
- return -EINVAL;
- }
-
- CFS_INIT_LIST_HEAD(&matched_nets);
- CFS_INIT_LIST_HEAD(¤t_nets);
- networks[0] = 0;
- count = 0;
- len = 0;
- rc = 0;
-
- while (!list_empty(&raw_entries)) {
- tb = list_entry(raw_entries.next, lnet_text_buf_t, ltb_list);
-
- strncpy(source, tb->ltb_text, sizeof(source)-1);
- source[sizeof(source)-1] = 0;
-
- /* replace ltb_text with the network(s) add on match */
- rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip);
- if (rc < 0)
- break;
-
- list_del(&tb->ltb_list);
-
- if (rc == 0) { /* no match */
- lnet_free_text_buf(tb);
- continue;
- }
-
- /* split into separate networks */
- CFS_INIT_LIST_HEAD(¤t_nets);
- list_add(&tb->ltb_list, ¤t_nets);
- rc = lnet_splitnets(source, ¤t_nets);
- if (rc < 0)
- break;
-
- dup = 0;
- list_for_each (t, ¤t_nets) {
- tb = list_entry(t, lnet_text_buf_t, ltb_list);
- net1 = lnet_netspec2net(tb->ltb_text);
- LASSERT (net1 != LNET_NIDNET(LNET_NID_ANY));
-
- list_for_each(t2, &matched_nets) {
- tb2 = list_entry(t2, lnet_text_buf_t, ltb_list);
- net2 = lnet_netspec2net(tb2->ltb_text);
- LASSERT (net2 != LNET_NIDNET(LNET_NID_ANY));
-
- if (net1 == net2) {
- dup = 1;
- break;
- }
- }
-
- if (dup)
- break;
- }
-
- if (dup) {
- lnet_free_text_bufs(¤t_nets);
- continue;
- }
-
- list_for_each_safe(t, t2, ¤t_nets) {
- tb = list_entry(t, lnet_text_buf_t, ltb_list);
-
- list_del(&tb->ltb_list);
- list_add_tail(&tb->ltb_list, &matched_nets);
-
- len += snprintf(networks + len, sizeof(networks) - len,
- "%s%s", (len == 0) ? "" : ",",
- tb->ltb_text);
-
- if (len >= sizeof(networks)) {
- CERROR("Too many matched networks\n");
- rc = -E2BIG;
- goto out;
- }
- }
-
- count++;
- }
-
- out:
- lnet_free_text_bufs(&raw_entries);
- lnet_free_text_bufs(&matched_nets);
- lnet_free_text_bufs(¤t_nets);
- LASSERT (lnet_tbnob == 0);
-
- if (rc < 0)
- return rc;
-
- *networksp = networks;
- return count;
-}
-
-#ifdef __KERNEL__
-void
-lnet_ipaddr_free_enumeration(__u32 *ipaddrs, int nip)
-{
- LIBCFS_FREE(ipaddrs, nip * sizeof(*ipaddrs));
-}
-
-int
-lnet_ipaddr_enumerate (__u32 **ipaddrsp)
-{
- int up;
- __u32 netmask;
- __u32 *ipaddrs;
- __u32 *ipaddrs2;
- int nip;
- char **ifnames;
- int nif = libcfs_ipif_enumerate(&ifnames);
- int i;
- int rc;
-
- if (nif <= 0)
- return nif;
-
- LIBCFS_ALLOC(ipaddrs, nif * sizeof(*ipaddrs));
- if (ipaddrs == NULL) {
- CERROR("Can't allocate ipaddrs[%d]\n", nif);
- libcfs_ipif_free_enumeration(ifnames, nif);
- return -ENOMEM;
- }
-
- for (i = nip = 0; i < nif; i++) {
- if (!strcmp(ifnames[i], "lo"))
- continue;
-
- rc = libcfs_ipif_query(ifnames[i], &up,
- &ipaddrs[nip], &netmask);
- if (rc != 0) {
- CWARN("Can't query interface %s: %d\n",
- ifnames[i], rc);
- continue;
- }
-
- if (!up) {
- CWARN("Ignoring interface %s: it's down\n",
- ifnames[i]);
- continue;
- }
-
- nip++;
- }
-
- libcfs_ipif_free_enumeration(ifnames, nif);
-
- if (nip == nif) {
- *ipaddrsp = ipaddrs;
- } else {
- if (nip > 0) {
- LIBCFS_ALLOC(ipaddrs2, nip * sizeof(*ipaddrs2));
- if (ipaddrs2 == NULL) {
- CERROR("Can't allocate ipaddrs[%d]\n", nip);
- nip = -ENOMEM;
- } else {
- memcpy(ipaddrs2, ipaddrs,
- nip * sizeof(*ipaddrs));
- *ipaddrsp = ipaddrs2;
- rc = nip;
- }
- }
- lnet_ipaddr_free_enumeration(ipaddrs, nif);
- }
- return nip;
-}
-
-int
-lnet_parse_ip2nets (char **networksp, char *ip2nets)
-{
- __u32 *ipaddrs;
- int nip = lnet_ipaddr_enumerate(&ipaddrs);
- int rc;
-
- if (nip < 0) {
- LCONSOLE_ERROR_MSG(0x117, "Error %d enumerating local IP "
- "interfaces for ip2nets to match\n", nip);
- return nip;
- }
-
- if (nip == 0) {
- LCONSOLE_ERROR_MSG(0x118, "No local IP interfaces "
- "for ip2nets to match\n");
- return -ENOENT;
- }
-
- rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip);
- lnet_ipaddr_free_enumeration(ipaddrs, nip);
-
- if (rc < 0) {
- LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc);
- return rc;
- }
-
- if (rc == 0) {
- LCONSOLE_ERROR_MSG(0x11a, "ip2nets does not match "
- "any local IP interfaces\n");
- return -ENOENT;
- }
-
- return 0;
-}
-
-int
-lnet_set_ip_niaddr (lnet_ni_t *ni)
-{
- __u32 net = LNET_NIDNET(ni->ni_nid);
- char **names;
- int n;
- __u32 ip;
- __u32 netmask;
- int up;
- int i;
- int rc;
-
- /* Convenience for LNDs that use the IP address of a local interface as
- * the local address part of their NID */
-
- if (ni->ni_interfaces[0] != NULL) {
-
- CLASSERT (LNET_MAX_INTERFACES > 1);
-
- if (ni->ni_interfaces[1] != NULL) {
- CERROR("Net %s doesn't support multiple interfaces\n",
- libcfs_net2str(net));
- return -EPERM;
- }
-
- rc = libcfs_ipif_query(ni->ni_interfaces[0],
- &up, &ip, &netmask);
- if (rc != 0) {
- CERROR("Net %s can't query interface %s: %d\n",
- libcfs_net2str(net), ni->ni_interfaces[0], rc);
- return -EPERM;
- }
-
- if (!up) {
- CERROR("Net %s can't use interface %s: it's down\n",
- libcfs_net2str(net), ni->ni_interfaces[0]);
- return -ENETDOWN;
- }
-
- ni->ni_nid = LNET_MKNID(net, ip);
- return 0;
- }
-
- n = libcfs_ipif_enumerate(&names);
- if (n <= 0) {
- CERROR("Net %s can't enumerate interfaces: %d\n",
- libcfs_net2str(net), n);
- return 0;
- }
-
- for (i = 0; i < n; i++) {
- if (!strcmp(names[i], "lo")) /* skip the loopback IF */
- continue;
-
- rc = libcfs_ipif_query(names[i], &up, &ip, &netmask);
-
- if (rc != 0) {
- CWARN("Net %s can't query interface %s: %d\n",
- libcfs_net2str(net), names[i], rc);
- continue;
- }
-
- if (!up) {
- CWARN("Net %s ignoring interface %s (down)\n",
- libcfs_net2str(net), names[i]);
- continue;
- }
-
- libcfs_ipif_free_enumeration(names, n);
- ni->ni_nid = LNET_MKNID(net, ip);
- return 0;
- }
-
- CERROR("Net %s can't find any interfaces\n", libcfs_net2str(net));
- libcfs_ipif_free_enumeration(names, n);
- return -ENOENT;
-}
-EXPORT_SYMBOL(lnet_set_ip_niaddr);
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-eq.c
- * Library level Event queue management routines
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <lnet/lib-lnet.h>
-
-int
-LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback,
- lnet_handle_eq_t *handle)
-{
- lnet_eq_t *eq;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- /* We need count to be a power of 2 so that when eq_{enq,deq}_seq
- * overflow, they don't skip entries, so the queue has the same
- * apparant capacity at all times */
-
- if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
- do { /* knock off all but the top bit... */
- count &= ~LOWEST_BIT_SET (count);
- } while (count != LOWEST_BIT_SET(count));
-
- count <<= 1; /* ...and round up */
- }
-
- if (count == 0) /* catch bad parameter / overflow on roundup */
- return (-EINVAL);
-
- eq = lnet_eq_alloc();
- if (eq == NULL)
- return (-ENOMEM);
-
- LIBCFS_ALLOC(eq->eq_events, count * sizeof(lnet_event_t));
- if (eq->eq_events == NULL) {
- LNET_LOCK();
- lnet_eq_free (eq);
- LNET_UNLOCK();
-
- return -ENOMEM;
- }
-
- /* NB this resets all event sequence numbers to 0, to be earlier
- * than eq_deq_seq */
- memset(eq->eq_events, 0, count * sizeof(lnet_event_t));
-
- eq->eq_deq_seq = 1;
- eq->eq_enq_seq = 1;
- eq->eq_size = count;
- eq->eq_refcount = 0;
- eq->eq_callback = callback;
-
- LNET_LOCK();
-
- lnet_initialise_handle (&eq->eq_lh, LNET_COOKIE_TYPE_EQ);
- list_add (&eq->eq_list, &the_lnet.ln_active_eqs);
-
- LNET_UNLOCK();
-
- lnet_eq2handle(handle, eq);
- return (0);
-}
-
-int
-LNetEQFree(lnet_handle_eq_t eqh)
-{
- lnet_eq_t *eq;
- int size;
- lnet_event_t *events;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- LNET_LOCK();
-
- eq = lnet_handle2eq(&eqh);
- if (eq == NULL) {
- LNET_UNLOCK();
- return (-ENOENT);
- }
-
- if (eq->eq_refcount != 0) {
- LNET_UNLOCK();
- return (-EBUSY);
- }
-
- /* stash for free after lock dropped */
- events = eq->eq_events;
- size = eq->eq_size;
-
- lnet_invalidate_handle (&eq->eq_lh);
- list_del (&eq->eq_list);
- lnet_eq_free (eq);
-
- LNET_UNLOCK();
-
- LIBCFS_FREE(events, size * sizeof (lnet_event_t));
-
- return 0;
-}
-
-int
-lib_get_event (lnet_eq_t *eq, lnet_event_t *ev)
-{
- int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
- lnet_event_t *new_event = &eq->eq_events[new_index];
- int rc;
- ENTRY;
-
- CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
- new_event, eq->eq_deq_seq, eq->eq_size);
-
- if (LNET_SEQ_GT (eq->eq_deq_seq, new_event->sequence)) {
- RETURN(0);
- }
-
- /* We've got a new event... */
- *ev = *new_event;
-
- /* ...but did it overwrite an event we've not seen yet? */
- if (eq->eq_deq_seq == new_event->sequence) {
- rc = 1;
- } else {
- /* don't complain with CERROR: some EQs are sized small
- * anyway; if it's important, the caller should complain */
- CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n",
- eq->eq_deq_seq, new_event->sequence);
- rc = -EOVERFLOW;
- }
-
- eq->eq_deq_seq = new_event->sequence + 1;
- RETURN(rc);
-}
-
-
-int
-LNetEQGet (lnet_handle_eq_t eventq, lnet_event_t *event)
-{
- int which;
-
- return LNetEQPoll(&eventq, 1, 0,
- event, &which);
-}
-
-int
-LNetEQWait (lnet_handle_eq_t eventq, lnet_event_t *event)
-{
- int which;
-
- return LNetEQPoll(&eventq, 1, LNET_TIME_FOREVER,
- event, &which);
-}
-
-int
-LNetEQPoll (lnet_handle_eq_t *eventqs, int neq, int timeout_ms,
- lnet_event_t *event, int *which)
-{
- int i;
- int rc;
-#ifdef __KERNEL__
- cfs_waitlink_t wl;
- cfs_time_t now;
-#else
- struct timeval then;
- struct timeval now;
-# ifdef HAVE_LIBPTHREAD
- struct timespec ts;
-# endif
- lnet_ni_t *eqwaitni = the_lnet.ln_eqwaitni;
-#endif
- ENTRY;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- if (neq < 1)
- RETURN(-ENOENT);
-
- LNET_LOCK();
-
- for (;;) {
- for (i = 0; i < neq; i++) {
- lnet_eq_t *eq = lnet_handle2eq(&eventqs[i]);
-
- if (eq == NULL) {
- LNET_UNLOCK();
- RETURN(-ENOENT);
- }
-
- rc = lib_get_event (eq, event);
- if (rc != 0) {
- LNET_UNLOCK();
- *which = i;
- RETURN(rc);
- }
- }
-
-#ifdef __KERNEL__
- if (timeout_ms == 0) {
- LNET_UNLOCK();
- RETURN (0);
- }
-
- cfs_waitlink_init(&wl);
- set_current_state(TASK_INTERRUPTIBLE);
- cfs_waitq_add(&the_lnet.ln_waitq, &wl);
-
- LNET_UNLOCK();
-
- if (timeout_ms < 0) {
- cfs_waitq_wait (&wl, CFS_TASK_INTERRUPTIBLE);
- } else {
- struct timeval tv;
-
- now = cfs_time_current();
- cfs_waitq_timedwait(&wl, CFS_TASK_INTERRUPTIBLE,
- cfs_time_seconds(timeout_ms)/1000);
- cfs_duration_usec(cfs_time_sub(cfs_time_current(), now),
- &tv);
- timeout_ms -= tv.tv_sec * 1000 + tv.tv_usec / 1000;
- if (timeout_ms < 0)
- timeout_ms = 0;
- }
-
- LNET_LOCK();
- cfs_waitq_del(&the_lnet.ln_waitq, &wl);
-#else
- if (eqwaitni != NULL) {
- /* I have a single NI that I have to call into, to get
- * events queued, or to block. */
- lnet_ni_addref_locked(eqwaitni);
- LNET_UNLOCK();
-
- if (timeout_ms <= 0) {
- (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms);
- } else {
- gettimeofday(&then, NULL);
-
- (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms);
-
- gettimeofday(&now, NULL);
- timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 +
- (now.tv_usec - then.tv_usec) / 1000;
- if (timeout_ms < 0)
- timeout_ms = 0;
- }
-
- LNET_LOCK();
- lnet_ni_decref_locked(eqwaitni);
-
- /* don't call into eqwaitni again if timeout has
- * expired */
- if (timeout_ms == 0)
- eqwaitni = NULL;
-
- continue; /* go back and check for events */
- }
-
- if (timeout_ms == 0) {
- LNET_UNLOCK();
- RETURN (0);
- }
-
-# ifndef HAVE_LIBPTHREAD
- /* If I'm single-threaded, LNET fails at startup if it can't
- * set the_lnet.ln_eqwaitni correctly. */
- LBUG();
-# else
- if (timeout_ms < 0) {
- pthread_cond_wait(&the_lnet.ln_cond,
- &the_lnet.ln_lock);
- } else {
- gettimeofday(&then, NULL);
-
- ts.tv_sec = then.tv_sec + timeout_ms/1000;
- ts.tv_nsec = then.tv_usec * 1000 +
- (timeout_ms%1000) * 1000000;
- if (ts.tv_nsec >= 1000000000) {
- ts.tv_sec++;
- ts.tv_nsec -= 1000000000;
- }
-
- pthread_cond_timedwait(&the_lnet.ln_cond,
- &the_lnet.ln_lock, &ts);
-
- gettimeofday(&now, NULL);
- timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 +
- (now.tv_usec - then.tv_usec) / 1000;
-
- if (timeout_ms < 0)
- timeout_ms = 0;
- }
-# endif
-#endif
- }
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-md.c
- * Memory Descriptor management routines
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <lnet/lib-lnet.h>
-
-/* must be called with LNET_LOCK held */
-void
-lnet_md_unlink(lnet_libmd_t *md)
-{
- if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) == 0) {
- /* first unlink attempt... */
- lnet_me_t *me = md->md_me;
-
- md->md_flags |= LNET_MD_FLAG_ZOMBIE;
-
- /* Disassociate from ME (if any), and unlink it if it was created
- * with LNET_UNLINK */
- if (me != NULL) {
- md->md_me = NULL;
- me->me_md = NULL;
- if (me->me_unlink == LNET_UNLINK)
- lnet_me_unlink(me);
- }
-
- /* ensure all future handle lookups fail */
- lnet_invalidate_handle(&md->md_lh);
- }
-
- if (md->md_refcount != 0) {
- CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
- return;
- }
-
- CDEBUG(D_NET, "Unlinking md %p\n", md);
-
- if (md->md_eq != NULL) {
- md->md_eq->eq_refcount--;
- LASSERT (md->md_eq->eq_refcount >= 0);
- }
-
- list_del (&md->md_list);
- lnet_md_free(md);
-}
-
-/* must be called with LNET_LOCK held */
-static int
-lib_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink)
-{
- lnet_eq_t *eq = NULL;
- int i;
- unsigned int niov;
- int total_length = 0;
-
- /* NB we are passed an allocated, but uninitialised/active md.
- * if we return success, caller may lnet_md_unlink() it.
- * otherwise caller may only lnet_md_free() it.
- */
-
- if (!LNetHandleIsEqual (umd->eq_handle, LNET_EQ_NONE)) {
- eq = lnet_handle2eq(&umd->eq_handle);
- if (eq == NULL)
- return -ENOENT;
- }
-
- /* This implementation doesn't know how to create START events or
- * disable END events. Best to LASSERT our caller is compliant so
- * we find out quickly... */
- /* TODO - reevaluate what should be here in light of
- * the removal of the start and end events
- * maybe there we shouldn't even allow LNET_EQ_NONE!)
- LASSERT (eq == NULL);
- */
-
- lmd->md_me = NULL;
- lmd->md_start = umd->start;
- lmd->md_offset = 0;
- lmd->md_max_size = umd->max_size;
- lmd->md_options = umd->options;
- lmd->md_user_ptr = umd->user_ptr;
- lmd->md_eq = eq;
- lmd->md_threshold = umd->threshold;
- lmd->md_refcount = 0;
- lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
-
- if ((umd->options & LNET_MD_IOVEC) != 0) {
-
- if ((umd->options & LNET_MD_KIOV) != 0) /* Can't specify both */
- return -EINVAL;
-
- lmd->md_niov = niov = umd->length;
- memcpy(lmd->md_iov.iov, umd->start,
- niov * sizeof (lmd->md_iov.iov[0]));
-
- for (i = 0; i < niov; i++) {
- /* We take the base address on trust */
- if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
- return -EINVAL;
-
- total_length += lmd->md_iov.iov[i].iov_len;
- }
-
- lmd->md_length = total_length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > total_length)) // illegal max_size
- return -EINVAL;
-
- } else if ((umd->options & LNET_MD_KIOV) != 0) {
-#ifndef __KERNEL__
- return -EINVAL;
-#else
- lmd->md_niov = niov = umd->length;
- memcpy(lmd->md_iov.kiov, umd->start,
- niov * sizeof (lmd->md_iov.kiov[0]));
-
- for (i = 0; i < niov; i++) {
- /* We take the page pointer on trust */
- if (lmd->md_iov.kiov[i].kiov_offset +
- lmd->md_iov.kiov[i].kiov_len > CFS_PAGE_SIZE )
- return -EINVAL; /* invalid length */
-
- total_length += lmd->md_iov.kiov[i].kiov_len;
- }
-
- lmd->md_length = total_length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > total_length)) // illegal max_size
- return -EINVAL;
-#endif
- } else { /* contiguous */
- lmd->md_length = umd->length;
- lmd->md_niov = niov = 1;
- lmd->md_iov.iov[0].iov_base = umd->start;
- lmd->md_iov.iov[0].iov_len = umd->length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > umd->length)) // illegal max_size
- return -EINVAL;
- }
-
- if (eq != NULL)
- eq->eq_refcount++;
-
- /* It's good; let handle2md succeed and add to active mds */
- lnet_initialise_handle (&lmd->md_lh, LNET_COOKIE_TYPE_MD);
- list_add (&lmd->md_list, &the_lnet.ln_active_mds);
-
- return 0;
-}
-
-/* must be called with LNET_LOCK held */
-void
-lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd)
-{
- /* NB this doesn't copy out all the iov entries so when a
- * discontiguous MD is copied out, the target gets to know the
- * original iov pointer (in start) and the number of entries it had
- * and that's all.
- */
- umd->start = lmd->md_start;
- umd->length = ((lmd->md_options & (LNET_MD_IOVEC | LNET_MD_KIOV)) == 0) ?
- lmd->md_length : lmd->md_niov;
- umd->threshold = lmd->md_threshold;
- umd->max_size = lmd->md_max_size;
- umd->options = lmd->md_options;
- umd->user_ptr = lmd->md_user_ptr;
- lnet_eq2handle(&umd->eq_handle, lmd->md_eq);
-}
-
-int
-LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
- lnet_unlink_t unlink, lnet_handle_md_t *handle)
-{
- lnet_me_t *me;
- lnet_libmd_t *md;
- int rc;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 &&
- umd.length > LNET_MAX_IOV) /* too many fragments */
- return -EINVAL;
-
- md = lnet_md_alloc(&umd);
- if (md == NULL)
- return -ENOMEM;
-
- LNET_LOCK();
-
- me = lnet_handle2me(&meh);
- if (me == NULL) {
- rc = -ENOENT;
- } else if (me->me_md != NULL) {
- rc = -EBUSY;
- } else {
- rc = lib_md_build(md, &umd, unlink);
- if (rc == 0) {
- me->me_md = md;
- md->md_me = me;
-
- lnet_md2handle(handle, md);
-
- /* check if this MD matches any blocked msgs */
- lnet_match_blocked_msg(md); /* expects LNET_LOCK held */
-
- LNET_UNLOCK();
- return (0);
- }
- }
-
- lnet_md_free (md);
-
- LNET_UNLOCK();
- return (rc);
-}
-
-int
-LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle)
-{
- lnet_libmd_t *md;
- int rc;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 &&
- umd.length > LNET_MAX_IOV) /* too many fragments */
- return -EINVAL;
-
- md = lnet_md_alloc(&umd);
- if (md == NULL)
- return -ENOMEM;
-
- LNET_LOCK();
-
- rc = lib_md_build(md, &umd, unlink);
-
- if (rc == 0) {
- lnet_md2handle(handle, md);
-
- LNET_UNLOCK();
- return (0);
- }
-
- lnet_md_free (md);
-
- LNET_UNLOCK();
- return (rc);
-}
-
-int
-LNetMDUnlink (lnet_handle_md_t mdh)
-{
- lnet_event_t ev;
- lnet_libmd_t *md;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- LNET_LOCK();
-
- md = lnet_handle2md(&mdh);
- if (md == NULL) {
- LNET_UNLOCK();
- return -ENOENT;
- }
-
- /* If the MD is busy, lnet_md_unlink just marks it for deletion, and
- * when the NAL is done, the completion event flags that the MD was
- * unlinked. Otherwise, we enqueue an event now... */
-
- if (md->md_eq != NULL &&
- md->md_refcount == 0) {
- lnet_build_unlink_event(md, &ev);
- lnet_enq_event_locked(md->md_eq, &ev);
- }
-
- lnet_md_unlink(md);
-
- LNET_UNLOCK();
- return 0;
-}
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-me.c
- * Match Entry management routines
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <lnet/lib-lnet.h>
-
-int
-LNetMEAttach(unsigned int portal,
- lnet_process_id_t match_id,
- __u64 match_bits, __u64 ignore_bits,
- lnet_unlink_t unlink, lnet_ins_pos_t pos,
- lnet_handle_me_t *handle)
-{
- lnet_me_t *me;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- if (portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- me = lnet_me_alloc();
- if (me == NULL)
- return -ENOMEM;
-
- LNET_LOCK();
-
- me->me_portal = portal;
- me->me_match_id = match_id;
- me->me_match_bits = match_bits;
- me->me_ignore_bits = ignore_bits;
- me->me_unlink = unlink;
- me->me_md = NULL;
-
- lnet_initialise_handle (&me->me_lh, LNET_COOKIE_TYPE_ME);
-
- if (pos == LNET_INS_AFTER)
- list_add_tail(&me->me_list, &(the_lnet.ln_portals[portal].ptl_ml));
- else
- list_add(&me->me_list, &(the_lnet.ln_portals[portal].ptl_ml));
-
- lnet_me2handle(handle, me);
-
- LNET_UNLOCK();
-
- return 0;
-}
-
-int
-LNetMEInsert(lnet_handle_me_t current_meh,
- lnet_process_id_t match_id,
- __u64 match_bits, __u64 ignore_bits,
- lnet_unlink_t unlink, lnet_ins_pos_t pos,
- lnet_handle_me_t *handle)
-{
- lnet_me_t *current_me;
- lnet_me_t *new_me;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- new_me = lnet_me_alloc();
- if (new_me == NULL)
- return -ENOMEM;
-
- LNET_LOCK();
-
- current_me = lnet_handle2me(¤t_meh);
- if (current_me == NULL) {
- lnet_me_free (new_me);
-
- LNET_UNLOCK();
- return -ENOENT;
- }
-
- new_me->me_portal = current_me->me_portal;
- new_me->me_match_id = match_id;
- new_me->me_match_bits = match_bits;
- new_me->me_ignore_bits = ignore_bits;
- new_me->me_unlink = unlink;
- new_me->me_md = NULL;
-
- lnet_initialise_handle (&new_me->me_lh, LNET_COOKIE_TYPE_ME);
-
- if (pos == LNET_INS_AFTER)
- list_add_tail(&new_me->me_list, ¤t_me->me_list);
- else
- list_add(&new_me->me_list, ¤t_me->me_list);
-
- lnet_me2handle(handle, new_me);
-
- LNET_UNLOCK();
-
- return 0;
-}
-
-int
-LNetMEUnlink(lnet_handle_me_t meh)
-{
- lnet_me_t *me;
- lnet_libmd_t *md;
- lnet_event_t ev;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- LNET_LOCK();
-
- me = lnet_handle2me(&meh);
- if (me == NULL) {
- LNET_UNLOCK();
- return -ENOENT;
- }
-
- md = me->me_md;
- if (md != NULL &&
- md->md_eq != NULL &&
- md->md_refcount == 0) {
- lnet_build_unlink_event(md, &ev);
- lnet_enq_event_locked(md->md_eq, &ev);
- }
-
- lnet_me_unlink(me);
-
- LNET_UNLOCK();
- return 0;
-}
-
-/* call with LNET_LOCK please */
-void
-lnet_me_unlink(lnet_me_t *me)
-{
- list_del (&me->me_list);
-
- if (me->me_md != NULL) {
- me->me_md->md_me = NULL;
- lnet_md_unlink(me->me_md);
- }
-
- lnet_invalidate_handle (&me->me_lh);
- lnet_me_free(me);
-}
-
-#if 0
-static void
-lib_me_dump(lnet_me_t *me)
-{
- CWARN("Match Entry %p ("LPX64")\n", me,
- me->me_lh.lh_cookie);
-
- CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
- me->me_match_bits, me->me_ignore_bits);
-
- CWARN("\tMD\t= %p\n", me->md);
- CWARN("\tprev\t= %p\n",
- list_entry(me->me_list.prev, lnet_me_t, me_list));
- CWARN("\tnext\t= %p\n",
- list_entry(me->me_list.next, lnet_me_t, me_list));
-}
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-move.c
- * Data movement routines
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <lnet/lib-lnet.h>
-
-static int local_nid_dist_zero = 1;
-CFS_MODULE_PARM(local_nid_dist_zero, "i", int, 0444,
- "Reserved");
-
-/* forward ref */
-static void lnet_commit_md (lnet_libmd_t *md, lnet_msg_t *msg);
-static void lnet_drop_delayed_put(lnet_msg_t *msg, char *reason);
-
-#define LNET_MATCHMD_NONE 0 /* Didn't match */
-#define LNET_MATCHMD_OK 1 /* Matched OK */
-#define LNET_MATCHMD_DROP 2 /* Must be discarded */
-
-static int
-lnet_try_match_md (int index, int op_mask, lnet_process_id_t src,
- unsigned int rlength, unsigned int roffset,
- __u64 match_bits, lnet_libmd_t *md, lnet_msg_t *msg,
- unsigned int *mlength_out, unsigned int *offset_out)
-{
- /* ALWAYS called holding the LNET_LOCK, and can't LNET_UNLOCK;
- * lnet_match_blocked_msg() relies on this to avoid races */
- unsigned int offset;
- unsigned int mlength;
- lnet_me_t *me = md->md_me;
-
- /* mismatched MD op */
- if ((md->md_options & op_mask) == 0)
- return LNET_MATCHMD_NONE;
-
- /* MD exhausted */
- if (lnet_md_exhausted(md))
- return LNET_MATCHMD_NONE;
-
- /* mismatched ME nid/pid? */
- if (me->me_match_id.nid != LNET_NID_ANY &&
- me->me_match_id.nid != src.nid)
- return LNET_MATCHMD_NONE;
-
- if (me->me_match_id.pid != LNET_PID_ANY &&
- me->me_match_id.pid != src.pid)
- return LNET_MATCHMD_NONE;
-
- /* mismatched ME matchbits? */
- if (((me->me_match_bits ^ match_bits) & ~me->me_ignore_bits) != 0)
- return LNET_MATCHMD_NONE;
-
- /* Hurrah! This _is_ a match; check it out... */
-
- if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0)
- offset = md->md_offset;
- else
- offset = roffset;
-
- if ((md->md_options & LNET_MD_MAX_SIZE) != 0) {
- mlength = md->md_max_size;
- LASSERT (md->md_offset + mlength <= md->md_length);
- } else {
- mlength = md->md_length - offset;
- }
-
- if (rlength <= mlength) { /* fits in allowed space */
- mlength = rlength;
- } else if ((md->md_options & LNET_MD_TRUNCATE) == 0) {
- /* this packet _really_ is too big */
- CERROR("Matching packet from %s, match "LPU64
- " length %d too big: %d left, %d allowed\n",
- libcfs_id2str(src), match_bits, rlength,
- md->md_length - offset, mlength);
-
- return LNET_MATCHMD_DROP;
- }
-
- /* Commit to this ME/MD */
- CDEBUG(D_NET, "Incoming %s index %x from %s of "
- "length %d/%d into md "LPX64" [%d] + %d\n",
- (op_mask == LNET_MD_OP_PUT) ? "put" : "get",
- index, libcfs_id2str(src), mlength, rlength,
- md->md_lh.lh_cookie, md->md_niov, offset);
-
- lnet_commit_md(md, msg);
- md->md_offset = offset + mlength;
-
- /* NB Caller will set ev.type and ev.hdr_data */
- msg->msg_ev.initiator = src;
- msg->msg_ev.pt_index = index;
- msg->msg_ev.match_bits = match_bits;
- msg->msg_ev.rlength = rlength;
- msg->msg_ev.mlength = mlength;
- msg->msg_ev.offset = offset;
-
- lnet_md_deconstruct(md, &msg->msg_ev.md);
- lnet_md2handle(&msg->msg_ev.md_handle, md);
-
- *offset_out = offset;
- *mlength_out = mlength;
-
- /* Auto-unlink NOW, so the ME gets unlinked if required.
- * We bumped md->md_refcount above so the MD just gets flagged
- * for unlink when it is finalized. */
- if ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 &&
- lnet_md_exhausted(md)) {
- lnet_md_unlink(md);
- }
-
- return LNET_MATCHMD_OK;
-}
-
-static int
-lnet_match_md(int index, int op_mask, lnet_process_id_t src,
- unsigned int rlength, unsigned int roffset,
- __u64 match_bits, lnet_msg_t *msg,
- unsigned int *mlength_out, unsigned int *offset_out,
- lnet_libmd_t **md_out)
-{
- lnet_portal_t *ptl = &the_lnet.ln_portals[index];
- lnet_me_t *me;
- lnet_me_t *tmp;
- lnet_libmd_t *md;
- int rc;
-
- CDEBUG (D_NET, "Request from %s of length %d into portal %d "
- "MB="LPX64"\n", libcfs_id2str(src), rlength, index, match_bits);
-
- if (index < 0 || index >= the_lnet.ln_nportals) {
- CERROR("Invalid portal %d not in [0-%d]\n",
- index, the_lnet.ln_nportals);
- return LNET_MATCHMD_DROP;
- }
-
- list_for_each_entry_safe (me, tmp, &ptl->ptl_ml, me_list) {
- md = me->me_md;
-
- /* ME attached but MD not attached yet */
- if (md == NULL)
- continue;
-
- LASSERT (me == md->md_me);
-
- rc = lnet_try_match_md(index, op_mask, src, rlength,
- roffset, match_bits, md, msg,
- mlength_out, offset_out);
- switch (rc) {
- default:
- LBUG();
-
- case LNET_MATCHMD_NONE:
- continue;
-
- case LNET_MATCHMD_OK:
- *md_out = md;
- return LNET_MATCHMD_OK;
-
- case LNET_MATCHMD_DROP:
- return LNET_MATCHMD_DROP;
- }
- /* not reached */
- }
-
- if (op_mask == LNET_MD_OP_GET ||
- (ptl->ptl_options & LNET_PTL_LAZY) == 0)
- return LNET_MATCHMD_DROP;
-
- return LNET_MATCHMD_NONE;
-}
-
-int
-lnet_fail_nid (lnet_nid_t nid, unsigned int threshold)
-{
- lnet_test_peer_t *tp;
- struct list_head *el;
- struct list_head *next;
- struct list_head cull;
-
- LASSERT (the_lnet.ln_init);
-
- if (threshold != 0) {
- /* Adding a new entry */
- LIBCFS_ALLOC(tp, sizeof(*tp));
- if (tp == NULL)
- return -ENOMEM;
-
- tp->tp_nid = nid;
- tp->tp_threshold = threshold;
-
- LNET_LOCK();
- list_add_tail (&tp->tp_list, &the_lnet.ln_test_peers);
- LNET_UNLOCK();
- return 0;
- }
-
- /* removing entries */
- CFS_INIT_LIST_HEAD (&cull);
-
- LNET_LOCK();
-
- list_for_each_safe (el, next, &the_lnet.ln_test_peers) {
- tp = list_entry (el, lnet_test_peer_t, tp_list);
-
- if (tp->tp_threshold == 0 || /* needs culling anyway */
- nid == LNET_NID_ANY || /* removing all entries */
- tp->tp_nid == nid) /* matched this one */
- {
- list_del (&tp->tp_list);
- list_add (&tp->tp_list, &cull);
- }
- }
-
- LNET_UNLOCK();
-
- while (!list_empty (&cull)) {
- tp = list_entry (cull.next, lnet_test_peer_t, tp_list);
-
- list_del (&tp->tp_list);
- LIBCFS_FREE(tp, sizeof (*tp));
- }
- return 0;
-}
-
-static int
-fail_peer (lnet_nid_t nid, int outgoing)
-{
- lnet_test_peer_t *tp;
- struct list_head *el;
- struct list_head *next;
- struct list_head cull;
- int fail = 0;
-
- CFS_INIT_LIST_HEAD (&cull);
-
- LNET_LOCK();
-
- list_for_each_safe (el, next, &the_lnet.ln_test_peers) {
- tp = list_entry (el, lnet_test_peer_t, tp_list);
-
- if (tp->tp_threshold == 0) {
- /* zombie entry */
- if (outgoing) {
- /* only cull zombies on outgoing tests,
- * since we may be at interrupt priority on
- * incoming messages. */
- list_del (&tp->tp_list);
- list_add (&tp->tp_list, &cull);
- }
- continue;
- }
-
- if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */
- nid == tp->tp_nid) { /* fail this peer */
- fail = 1;
-
- if (tp->tp_threshold != LNET_MD_THRESH_INF) {
- tp->tp_threshold--;
- if (outgoing &&
- tp->tp_threshold == 0) {
- /* see above */
- list_del (&tp->tp_list);
- list_add (&tp->tp_list, &cull);
- }
- }
- break;
- }
- }
-
- LNET_UNLOCK ();
-
- while (!list_empty (&cull)) {
- tp = list_entry (cull.next, lnet_test_peer_t, tp_list);
- list_del (&tp->tp_list);
-
- LIBCFS_FREE(tp, sizeof (*tp));
- }
-
- return (fail);
-}
-
-unsigned int
-lnet_iov_nob (unsigned int niov, struct iovec *iov)
-{
- unsigned int nob = 0;
-
- while (niov-- > 0)
- nob += (iov++)->iov_len;
-
- return (nob);
-}
-
-void
-lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov, unsigned int doffset,
- unsigned int nsiov, struct iovec *siov, unsigned int soffset,
- unsigned int nob)
-{
- /* NB diov, siov are READ-ONLY */
- unsigned int this_nob;
-
- if (nob == 0)
- return;
-
- /* skip complete frags before 'doffset' */
- LASSERT (ndiov > 0);
- while (doffset >= diov->iov_len) {
- doffset -= diov->iov_len;
- diov++;
- ndiov--;
- LASSERT (ndiov > 0);
- }
-
- /* skip complete frags before 'soffset' */
- LASSERT (nsiov > 0);
- while (soffset >= siov->iov_len) {
- soffset -= siov->iov_len;
- siov++;
- nsiov--;
- LASSERT (nsiov > 0);
- }
-
- do {
- LASSERT (ndiov > 0);
- LASSERT (nsiov > 0);
- this_nob = MIN(diov->iov_len - doffset,
- siov->iov_len - soffset);
- this_nob = MIN(this_nob, nob);
-
- memcpy ((char *)diov->iov_base + doffset,
- (char *)siov->iov_base + soffset, this_nob);
- nob -= this_nob;
-
- if (diov->iov_len > doffset + this_nob) {
- doffset += this_nob;
- } else {
- diov++;
- ndiov--;
- doffset = 0;
- }
-
- if (siov->iov_len > soffset + this_nob) {
- soffset += this_nob;
- } else {
- siov++;
- nsiov--;
- soffset = 0;
- }
- } while (nob > 0);
-}
-
-int
-lnet_extract_iov (int dst_niov, struct iovec *dst,
- int src_niov, struct iovec *src,
- unsigned int offset, unsigned int len)
-{
- /* Initialise 'dst' to the subset of 'src' starting at 'offset',
- * for exactly 'len' bytes, and return the number of entries.
- * NB not destructive to 'src' */
- unsigned int frag_len;
- unsigned int niov;
-
- if (len == 0) /* no data => */
- return (0); /* no frags */
-
- LASSERT (src_niov > 0);
- while (offset >= src->iov_len) { /* skip initial frags */
- offset -= src->iov_len;
- src_niov--;
- src++;
- LASSERT (src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT (src_niov > 0);
- LASSERT (niov <= dst_niov);
-
- frag_len = src->iov_len - offset;
- dst->iov_base = ((char *)src->iov_base) + offset;
-
- if (len <= frag_len) {
- dst->iov_len = len;
- return (niov);
- }
-
- dst->iov_len = frag_len;
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-
-#ifndef __KERNEL__
-unsigned int
-lnet_kiov_nob (unsigned int niov, lnet_kiov_t *kiov)
-{
- LASSERT (0);
- return (0);
-}
-
-void
-lnet_copy_kiov2kiov (unsigned int ndkiov, lnet_kiov_t *dkiov, unsigned int doffset,
- unsigned int nskiov, lnet_kiov_t *skiov, unsigned int soffset,
- unsigned int nob)
-{
- LASSERT (0);
-}
-
-void
-lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, unsigned int iovoffset,
- unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset,
- unsigned int nob)
-{
- LASSERT (0);
-}
-
-void
-lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset,
- unsigned int niov, struct iovec *iov, unsigned int iovoffset,
- unsigned int nob)
-{
- LASSERT (0);
-}
-
-int
-lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst,
- int src_niov, lnet_kiov_t *src,
- unsigned int offset, unsigned int len)
-{
- LASSERT (0);
-}
-
-#else /* __KERNEL__ */
-
-unsigned int
-lnet_kiov_nob (unsigned int niov, lnet_kiov_t *kiov)
-{
- unsigned int nob = 0;
-
- while (niov-- > 0)
- nob += (kiov++)->kiov_len;
-
- return (nob);
-}
-
-void
-lnet_copy_kiov2kiov (unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset,
- unsigned int nsiov, lnet_kiov_t *siov, unsigned int soffset,
- unsigned int nob)
-{
- /* NB diov, siov are READ-ONLY */
- unsigned int this_nob;
- char *daddr = NULL;
- char *saddr = NULL;
-
- if (nob == 0)
- return;
-
- LASSERT (!in_interrupt ());
-
- LASSERT (ndiov > 0);
- while (doffset >= diov->kiov_len) {
- doffset -= diov->kiov_len;
- diov++;
- ndiov--;
- LASSERT (ndiov > 0);
- }
-
- LASSERT (nsiov > 0);
- while (soffset >= siov->kiov_len) {
- soffset -= siov->kiov_len;
- siov++;
- nsiov--;
- LASSERT (nsiov > 0);
- }
-
- do {
- LASSERT (ndiov > 0);
- LASSERT (nsiov > 0);
- this_nob = MIN(diov->kiov_len - doffset,
- siov->kiov_len - soffset);
- this_nob = MIN(this_nob, nob);
-
- if (daddr == NULL)
- daddr = ((char *)cfs_kmap(diov->kiov_page)) +
- diov->kiov_offset + doffset;
- if (saddr == NULL)
- saddr = ((char *)cfs_kmap(siov->kiov_page)) +
- siov->kiov_offset + soffset;
-
- /* Vanishing risk of kmap deadlock when mapping 2 pages.
- * However in practice at least one of the kiovs will be mapped
- * kernel pages and the map/unmap will be NOOPs */
-
- memcpy (daddr, saddr, this_nob);
- nob -= this_nob;
-
- if (diov->kiov_len > doffset + this_nob) {
- daddr += this_nob;
- doffset += this_nob;
- } else {
- cfs_kunmap(diov->kiov_page);
- daddr = NULL;
- diov++;
- ndiov--;
- doffset = 0;
- }
-
- if (siov->kiov_len > soffset + this_nob) {
- saddr += this_nob;
- soffset += this_nob;
- } else {
- cfs_kunmap(siov->kiov_page);
- saddr = NULL;
- siov++;
- nsiov--;
- soffset = 0;
- }
- } while (nob > 0);
-
- if (daddr != NULL)
- cfs_kunmap(diov->kiov_page);
- if (saddr != NULL)
- cfs_kunmap(siov->kiov_page);
-}
-
-void
-lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, unsigned int iovoffset,
- unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset,
- unsigned int nob)
-{
- /* NB iov, kiov are READ-ONLY */
- unsigned int this_nob;
- char *addr = NULL;
-
- if (nob == 0)
- return;
-
- LASSERT (!in_interrupt ());
-
- LASSERT (niov > 0);
- while (iovoffset >= iov->iov_len) {
- iovoffset -= iov->iov_len;
- iov++;
- niov--;
- LASSERT (niov > 0);
- }
-
- LASSERT (nkiov > 0);
- while (kiovoffset >= kiov->kiov_len) {
- kiovoffset -= kiov->kiov_len;
- kiov++;
- nkiov--;
- LASSERT (nkiov > 0);
- }
-
- do {
- LASSERT (niov > 0);
- LASSERT (nkiov > 0);
- this_nob = MIN(iov->iov_len - iovoffset,
- kiov->kiov_len - kiovoffset);
- this_nob = MIN(this_nob, nob);
-
- if (addr == NULL)
- addr = ((char *)cfs_kmap(kiov->kiov_page)) +
- kiov->kiov_offset + kiovoffset;
-
- memcpy ((char *)iov->iov_base + iovoffset, addr, this_nob);
- nob -= this_nob;
-
- if (iov->iov_len > iovoffset + this_nob) {
- iovoffset += this_nob;
- } else {
- iov++;
- niov--;
- iovoffset = 0;
- }
-
- if (kiov->kiov_len > kiovoffset + this_nob) {
- addr += this_nob;
- kiovoffset += this_nob;
- } else {
- cfs_kunmap(kiov->kiov_page);
- addr = NULL;
- kiov++;
- nkiov--;
- kiovoffset = 0;
- }
-
- } while (nob > 0);
-
- if (addr != NULL)
- cfs_kunmap(kiov->kiov_page);
-}
-
-void
-lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset,
- unsigned int niov, struct iovec *iov, unsigned int iovoffset,
- unsigned int nob)
-{
- /* NB kiov, iov are READ-ONLY */
- unsigned int this_nob;
- char *addr = NULL;
-
- if (nob == 0)
- return;
-
- LASSERT (!in_interrupt ());
-
- LASSERT (nkiov > 0);
- while (kiovoffset >= kiov->kiov_len) {
- kiovoffset -= kiov->kiov_len;
- kiov++;
- nkiov--;
- LASSERT (nkiov > 0);
- }
-
- LASSERT (niov > 0);
- while (iovoffset >= iov->iov_len) {
- iovoffset -= iov->iov_len;
- iov++;
- niov--;
- LASSERT (niov > 0);
- }
-
- do {
- LASSERT (nkiov > 0);
- LASSERT (niov > 0);
- this_nob = MIN(kiov->kiov_len - kiovoffset,
- iov->iov_len - iovoffset);
- this_nob = MIN(this_nob, nob);
-
- if (addr == NULL)
- addr = ((char *)cfs_kmap(kiov->kiov_page)) +
- kiov->kiov_offset + kiovoffset;
-
- memcpy (addr, (char *)iov->iov_base + iovoffset, this_nob);
- nob -= this_nob;
-
- if (kiov->kiov_len > kiovoffset + this_nob) {
- addr += this_nob;
- kiovoffset += this_nob;
- } else {
- cfs_kunmap(kiov->kiov_page);
- addr = NULL;
- kiov++;
- nkiov--;
- kiovoffset = 0;
- }
-
- if (iov->iov_len > iovoffset + this_nob) {
- iovoffset += this_nob;
- } else {
- iov++;
- niov--;
- iovoffset = 0;
- }
- } while (nob > 0);
-
- if (addr != NULL)
- cfs_kunmap(kiov->kiov_page);
-}
-
-int
-lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst,
- int src_niov, lnet_kiov_t *src,
- unsigned int offset, unsigned int len)
-{
- /* Initialise 'dst' to the subset of 'src' starting at 'offset',
- * for exactly 'len' bytes, and return the number of entries.
- * NB not destructive to 'src' */
- unsigned int frag_len;
- unsigned int niov;
-
- if (len == 0) /* no data => */
- return (0); /* no frags */
-
- LASSERT (src_niov > 0);
- while (offset >= src->kiov_len) { /* skip initial frags */
- offset -= src->kiov_len;
- src_niov--;
- src++;
- LASSERT (src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT (src_niov > 0);
- LASSERT (niov <= dst_niov);
-
- frag_len = src->kiov_len - offset;
- dst->kiov_page = src->kiov_page;
- dst->kiov_offset = src->kiov_offset + offset;
-
- if (len <= frag_len) {
- dst->kiov_len = len;
- LASSERT (dst->kiov_offset + dst->kiov_len <= CFS_PAGE_SIZE);
- return (niov);
- }
-
- dst->kiov_len = frag_len;
- LASSERT (dst->kiov_offset + dst->kiov_len <= CFS_PAGE_SIZE);
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-#endif
-
-void
-lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- unsigned int niov = 0;
- struct iovec *iov = NULL;
- lnet_kiov_t *kiov = NULL;
- int rc;
-
- LASSERT (!in_interrupt ());
- LASSERT (mlen == 0 || msg != NULL);
-
- if (msg != NULL) {
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_sending);
- LASSERT(rlen == msg->msg_len);
- LASSERT(mlen <= msg->msg_len);
-
- msg->msg_wanted = mlen;
- msg->msg_offset = offset;
- msg->msg_receiving = 0;
-
- if (mlen != 0) {
- niov = msg->msg_niov;
- iov = msg->msg_iov;
- kiov = msg->msg_kiov;
-
- LASSERT (niov > 0);
- LASSERT ((iov == NULL) != (kiov == NULL));
- }
- }
-
- rc = (ni->ni_lnd->lnd_recv)(ni, private, msg, delayed,
- niov, iov, kiov, offset, mlen, rlen);
- if (rc < 0)
- lnet_finalize(ni, msg, rc);
-}
-
-int
-lnet_compare_routers(lnet_peer_t *p1, lnet_peer_t *p2)
-{
- if (p1->lp_txqnob < p2->lp_txqnob)
- return 1;
-
- if (p1->lp_txqnob > p2->lp_txqnob)
- return -1;
-
- if (p1->lp_txcredits > p2->lp_txcredits)
- return 1;
-
- if (p1->lp_txcredits < p2->lp_txcredits)
- return -1;
-
- return 0;
-}
-
-
-void
-lnet_setpayloadbuffer(lnet_msg_t *msg)
-{
- lnet_libmd_t *md = msg->msg_md;
-
- LASSERT (msg->msg_len > 0);
- LASSERT (!msg->msg_routing);
- LASSERT (md != NULL);
- LASSERT (msg->msg_niov == 0);
- LASSERT (msg->msg_iov == NULL);
- LASSERT (msg->msg_kiov == NULL);
-
- msg->msg_niov = md->md_niov;
- if ((md->md_options & LNET_MD_KIOV) != 0)
- msg->msg_kiov = md->md_iov.kiov;
- else
- msg->msg_iov = md->md_iov.iov;
-}
-
-void
-lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target,
- unsigned int offset, unsigned int len)
-{
- msg->msg_type = type;
- msg->msg_target = target;
- msg->msg_len = len;
- msg->msg_offset = offset;
-
- if (len != 0)
- lnet_setpayloadbuffer(msg);
-
- memset (&msg->msg_hdr, 0, sizeof (msg->msg_hdr));
- msg->msg_hdr.type = cpu_to_le32(type);
- msg->msg_hdr.dest_nid = cpu_to_le64(target.nid);
- msg->msg_hdr.dest_pid = cpu_to_le32(target.pid);
- /* src_nid will be set later */
- msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid);
- msg->msg_hdr.payload_length = cpu_to_le32(len);
-}
-
-void
-lnet_ni_send(lnet_ni_t *ni, lnet_msg_t *msg)
-{
- void *priv = msg->msg_private;
- int rc;
-
- LASSERT (!in_interrupt ());
- LASSERT (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
- (msg->msg_txcredit && msg->msg_peertxcredit));
-
- rc = (ni->ni_lnd->lnd_send)(ni, priv, msg);
- if (rc < 0)
- lnet_finalize(ni, msg, rc);
-}
-
-int
-lnet_eager_recv_locked(lnet_msg_t *msg)
-{
- lnet_peer_t *peer;
- lnet_ni_t *ni;
- int rc = 0;
-
- LASSERT (!msg->msg_delayed);
- msg->msg_delayed = 1;
-
- LASSERT (msg->msg_receiving);
- LASSERT (!msg->msg_sending);
-
- peer = msg->msg_rxpeer;
- ni = peer->lp_ni;
-
- if (ni->ni_lnd->lnd_eager_recv != NULL) {
- LNET_UNLOCK();
-
- rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
- &msg->msg_private);
- if (rc != 0) {
- CERROR("recv from %s / send to %s aborted: "
- "eager_recv failed %d\n",
- libcfs_nid2str(peer->lp_nid),
- libcfs_id2str(msg->msg_target), rc);
- LASSERT (rc < 0); /* required by my callers */
- }
-
- LNET_LOCK();
- }
-
- return rc;
-}
-
-int
-lnet_post_send_locked (lnet_msg_t *msg, int do_send)
-{
- /* lnet_send is going to LNET_UNLOCK immediately after this, so it sets
- * do_send FALSE and I don't do the unlock/send/lock bit. I return
- * EAGAIN if msg blocked and 0 if sent or OK to send */
- lnet_peer_t *lp = msg->msg_txpeer;
- lnet_ni_t *ni = lp->lp_ni;
-
- /* non-lnet_send() callers have checked before */
- LASSERT (!do_send || msg->msg_delayed);
- LASSERT (!msg->msg_receiving);
-
- if (!msg->msg_peertxcredit) {
- LASSERT ((lp->lp_txcredits < 0) == !list_empty(&lp->lp_txq));
-
- msg->msg_peertxcredit = 1;
- lp->lp_txqnob += msg->msg_len + sizeof(lnet_hdr_t);
- lp->lp_txcredits--;
-
- if (lp->lp_txcredits < lp->lp_mintxcredits)
- lp->lp_mintxcredits = lp->lp_txcredits;
-
- if (lp->lp_txcredits < 0) {
- msg->msg_delayed = 1;
- list_add_tail (&msg->msg_list, &lp->lp_txq);
- return EAGAIN;
- }
- }
-
- if (!msg->msg_txcredit) {
- LASSERT ((ni->ni_txcredits < 0) == !list_empty(&ni->ni_txq));
-
- msg->msg_txcredit = 1;
- ni->ni_txcredits--;
-
- if (ni->ni_txcredits < ni->ni_mintxcredits)
- ni->ni_mintxcredits = ni->ni_txcredits;
-
- if (ni->ni_txcredits < 0) {
- msg->msg_delayed = 1;
- list_add_tail (&msg->msg_list, &ni->ni_txq);
- return EAGAIN;
- }
- }
-
- if (do_send) {
- LNET_UNLOCK();
- lnet_ni_send(ni, msg);
- LNET_LOCK();
- }
- return 0;
-}
-
-#ifdef __KERNEL__
-static void
-lnet_commit_routedmsg (lnet_msg_t *msg)
-{
- /* ALWAYS called holding the LNET_LOCK */
- LASSERT (msg->msg_routing);
-
- the_lnet.ln_counters.msgs_alloc++;
- if (the_lnet.ln_counters.msgs_alloc >
- the_lnet.ln_counters.msgs_max)
- the_lnet.ln_counters.msgs_max =
- the_lnet.ln_counters.msgs_alloc;
-
- the_lnet.ln_counters.route_count++;
- the_lnet.ln_counters.route_length += msg->msg_len;
-
- LASSERT (!msg->msg_onactivelist);
- msg->msg_onactivelist = 1;
- list_add (&msg->msg_activelist, &the_lnet.ln_active_msgs);
-}
-
-lnet_rtrbufpool_t *
-lnet_msg2bufpool(lnet_msg_t *msg)
-{
- lnet_rtrbufpool_t *rbp = &the_lnet.ln_rtrpools[0];
-
- LASSERT (msg->msg_len <= LNET_MTU);
- while (msg->msg_len > rbp->rbp_npages * CFS_PAGE_SIZE) {
- rbp++;
- LASSERT (rbp < &the_lnet.ln_rtrpools[LNET_NRBPOOLS]);
- }
-
- return rbp;
-}
-
-int
-lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv)
-{
- /* lnet_parse is going to LNET_UNLOCK immediately after this, so it
- * sets do_recv FALSE and I don't do the unlock/send/lock bit. I
- * return EAGAIN if msg blocked and 0 if sent or OK to send */
- lnet_peer_t *lp = msg->msg_rxpeer;
- lnet_rtrbufpool_t *rbp;
- lnet_rtrbuf_t *rb;
-
- LASSERT (msg->msg_iov == NULL);
- LASSERT (msg->msg_kiov == NULL);
- LASSERT (msg->msg_niov == 0);
- LASSERT (msg->msg_routing);
- LASSERT (msg->msg_receiving);
- LASSERT (!msg->msg_sending);
-
- /* non-lnet_parse callers only send delayed messages */
- LASSERT (!do_recv || msg->msg_delayed);
-
- if (!msg->msg_peerrtrcredit) {
- LASSERT ((lp->lp_rtrcredits < 0) == !list_empty(&lp->lp_rtrq));
-
- msg->msg_peerrtrcredit = 1;
- lp->lp_rtrcredits--;
- if (lp->lp_rtrcredits < lp->lp_minrtrcredits)
- lp->lp_minrtrcredits = lp->lp_rtrcredits;
-
- if (lp->lp_rtrcredits < 0) {
- /* must have checked eager_recv before here */
- LASSERT (msg->msg_delayed);
- list_add_tail(&msg->msg_list, &lp->lp_rtrq);
- return EAGAIN;
- }
- }
-
- rbp = lnet_msg2bufpool(msg);
-
- if (!msg->msg_rtrcredit) {
- LASSERT ((rbp->rbp_credits < 0) == !list_empty(&rbp->rbp_msgs));
-
- msg->msg_rtrcredit = 1;
- rbp->rbp_credits--;
- if (rbp->rbp_credits < rbp->rbp_mincredits)
- rbp->rbp_mincredits = rbp->rbp_credits;
-
- if (rbp->rbp_credits < 0) {
- /* must have checked eager_recv before here */
- LASSERT (msg->msg_delayed);
- list_add_tail(&msg->msg_list, &rbp->rbp_msgs);
- return EAGAIN;
- }
- }
-
- LASSERT (!list_empty(&rbp->rbp_bufs));
- rb = list_entry(rbp->rbp_bufs.next, lnet_rtrbuf_t, rb_list);
- list_del(&rb->rb_list);
-
- msg->msg_niov = rbp->rbp_npages;
- msg->msg_kiov = &rb->rb_kiov[0];
-
- if (do_recv) {
- LNET_UNLOCK();
- lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
- 0, msg->msg_len, msg->msg_len);
- LNET_LOCK();
- }
- return 0;
-}
-#endif
-
-void
-lnet_return_credits_locked (lnet_msg_t *msg)
-{
- lnet_peer_t *txpeer = msg->msg_txpeer;
- lnet_peer_t *rxpeer = msg->msg_rxpeer;
- lnet_msg_t *msg2;
- lnet_ni_t *ni;
-
- if (msg->msg_txcredit) {
- /* give back NI txcredits */
- msg->msg_txcredit = 0;
- ni = txpeer->lp_ni;
-
- LASSERT((ni->ni_txcredits < 0) == !list_empty(&ni->ni_txq));
-
- ni->ni_txcredits++;
- if (ni->ni_txcredits <= 0) {
- msg2 = list_entry(ni->ni_txq.next, lnet_msg_t, msg_list);
- list_del(&msg2->msg_list);
-
- LASSERT(msg2->msg_txpeer->lp_ni == ni);
- LASSERT(msg2->msg_delayed);
-
- (void) lnet_post_send_locked(msg2, 1);
- }
- }
-
- if (msg->msg_peertxcredit) {
- /* give back peer txcredits */
- msg->msg_peertxcredit = 0;
-
- LASSERT((txpeer->lp_txcredits < 0) == !list_empty(&txpeer->lp_txq));
-
- txpeer->lp_txqnob -= msg->msg_len + sizeof(lnet_hdr_t);
- LASSERT (txpeer->lp_txqnob >= 0);
-
- txpeer->lp_txcredits++;
- if (txpeer->lp_txcredits <= 0) {
- msg2 = list_entry(txpeer->lp_txq.next,
- lnet_msg_t, msg_list);
- list_del(&msg2->msg_list);
-
- LASSERT (msg2->msg_txpeer == txpeer);
- LASSERT (msg2->msg_delayed);
-
- (void) lnet_post_send_locked(msg2, 1);
- }
- }
-
- if (txpeer != NULL) {
- msg->msg_txpeer = NULL;
- lnet_peer_decref_locked(txpeer);
- }
-
-#ifdef __KERNEL__
- if (msg->msg_rtrcredit) {
- /* give back global router credits */
- lnet_rtrbuf_t *rb;
- lnet_rtrbufpool_t *rbp;
-
- /* NB If a msg ever blocks for a buffer in rbp_msgs, it stays
- * there until it gets one allocated, or aborts the wait
- * itself */
- LASSERT (msg->msg_kiov != NULL);
-
- rb = list_entry(msg->msg_kiov, lnet_rtrbuf_t, rb_kiov[0]);
- rbp = rb->rb_pool;
- LASSERT (rbp == lnet_msg2bufpool(msg));
-
- msg->msg_kiov = NULL;
- msg->msg_rtrcredit = 0;
-
- LASSERT((rbp->rbp_credits < 0) == !list_empty(&rbp->rbp_msgs));
- LASSERT((rbp->rbp_credits > 0) == !list_empty(&rbp->rbp_bufs));
-
- list_add(&rb->rb_list, &rbp->rbp_bufs);
- rbp->rbp_credits++;
- if (rbp->rbp_credits <= 0) {
- msg2 = list_entry(rbp->rbp_msgs.next,
- lnet_msg_t, msg_list);
- list_del(&msg2->msg_list);
-
- (void) lnet_post_routed_recv_locked(msg2, 1);
- }
- }
-
- if (msg->msg_peerrtrcredit) {
- /* give back peer router credits */
- msg->msg_peerrtrcredit = 0;
-
- LASSERT((rxpeer->lp_rtrcredits < 0) == !list_empty(&rxpeer->lp_rtrq));
-
- rxpeer->lp_rtrcredits++;
- if (rxpeer->lp_rtrcredits <= 0) {
- msg2 = list_entry(rxpeer->lp_rtrq.next,
- lnet_msg_t, msg_list);
- list_del(&msg2->msg_list);
-
- (void) lnet_post_routed_recv_locked(msg2, 1);
- }
- }
-#else
- LASSERT (!msg->msg_rtrcredit);
- LASSERT (!msg->msg_peerrtrcredit);
-#endif
- if (rxpeer != NULL) {
- msg->msg_rxpeer = NULL;
- lnet_peer_decref_locked(rxpeer);
- }
-}
-
-int
-lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg)
-{
- lnet_nid_t dst_nid = msg->msg_target.nid;
- lnet_ni_t *src_ni;
- lnet_ni_t *local_ni;
- lnet_remotenet_t *rnet;
- lnet_route_t *route;
- lnet_route_t *best_route;
- struct list_head *tmp;
- lnet_peer_t *lp;
- lnet_peer_t *lp2;
- int rc;
-
- LASSERT (msg->msg_txpeer == NULL);
- LASSERT (!msg->msg_sending);
- LASSERT (!msg->msg_target_is_router);
- LASSERT (!msg->msg_receiving);
-
- msg->msg_sending = 1;
-
- /* NB! ni != NULL == interface pre-determined (ACK/REPLY) */
-
- LNET_LOCK();
-
- if (the_lnet.ln_shutdown) {
- LNET_UNLOCK();
- return -ESHUTDOWN;
- }
-
- if (src_nid == LNET_NID_ANY) {
- src_ni = NULL;
- } else {
- src_ni = lnet_nid2ni_locked(src_nid);
- if (src_ni == NULL) {
- LNET_UNLOCK();
- CERROR("Can't send to %s: src %s is not a local nid\n",
- libcfs_nid2str(dst_nid), libcfs_nid2str(src_nid));
- return -EINVAL;
- }
- LASSERT (!msg->msg_routing);
- }
-
- /* Is this for someone on a local network? */
- local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid));
-
- if (local_ni != NULL) {
- if (src_ni == NULL) {
- src_ni = local_ni;
- src_nid = src_ni->ni_nid;
- } else if (src_ni == local_ni) {
- lnet_ni_decref_locked(local_ni);
- } else {
- lnet_ni_decref_locked(local_ni);
- lnet_ni_decref_locked(src_ni);
- LNET_UNLOCK();
- CERROR("no route to %s via from %s\n",
- libcfs_nid2str(dst_nid), libcfs_nid2str(src_nid));
- return -EINVAL;
- }
-
- LASSERT (src_nid != LNET_NID_ANY);
-
- if (!msg->msg_routing) {
- src_nid = lnet_ptlcompat_srcnid(src_nid, dst_nid);
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
- }
-
- if (src_ni == the_lnet.ln_loni) {
- /* No send credit hassles with LOLND */
- LNET_UNLOCK();
- lnet_ni_send(src_ni, msg);
- lnet_ni_decref(src_ni);
- return 0;
- }
-
- rc = lnet_nid2peer_locked(&lp, dst_nid);
- lnet_ni_decref_locked(src_ni); /* lp has ref on src_ni; lose mine */
- if (rc != 0) {
- LNET_UNLOCK();
- CERROR("Error %d finding peer %s\n", rc,
- libcfs_nid2str(dst_nid));
- /* ENOMEM or shutting down */
- return rc;
- }
- LASSERT (lp->lp_ni == src_ni);
- } else {
- /* sending to a remote network */
- rnet = lnet_find_net_locked(LNET_NIDNET(dst_nid));
- if (rnet == NULL) {
- if (src_ni != NULL)
- lnet_ni_decref_locked(src_ni);
- LNET_UNLOCK();
- CERROR("No route to %s\n", libcfs_id2str(msg->msg_target));
- return -EHOSTUNREACH;
- }
-
- /* Find the best gateway I can use */
- lp = NULL;
- best_route = NULL;
- list_for_each(tmp, &rnet->lrn_routes) {
- route = list_entry(tmp, lnet_route_t, lr_list);
- lp2 = route->lr_gateway;
-
- if (lp2->lp_alive &&
- (src_ni == NULL || lp2->lp_ni == src_ni) &&
- (lp == NULL || lnet_compare_routers(lp2, lp) > 0)) {
- best_route = route;
- lp = lp2;
- }
- }
-
- if (lp == NULL) {
- if (src_ni != NULL)
- lnet_ni_decref_locked(src_ni);
- LNET_UNLOCK();
- CERROR("No route to %s (all routers down)\n",
- libcfs_id2str(msg->msg_target));
- return -EHOSTUNREACH;
- }
-
- /* Place selected route at the end of the route list to ensure
- * fairness; everything else being equal... */
- list_del(&best_route->lr_list);
- list_add_tail(&best_route->lr_list, &rnet->lrn_routes);
-
- if (src_ni == NULL) {
- src_ni = lp->lp_ni;
- src_nid = src_ni->ni_nid;
- } else {
- LASSERT (src_ni == lp->lp_ni);
- lnet_ni_decref_locked(src_ni);
- }
-
- lnet_peer_addref_locked(lp);
-
- LASSERT (src_nid != LNET_NID_ANY);
-
- if (!msg->msg_routing) {
- /* I'm the source and now I know which NI to send on */
- src_nid = lnet_ptlcompat_srcnid(src_nid, dst_nid);
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
- }
-
- msg->msg_target_is_router = 1;
- msg->msg_target.nid = lp->lp_nid;
- msg->msg_target.pid = LUSTRE_SRV_LNET_PID;
- }
-
- /* 'lp' is our best choice of peer */
-
- LASSERT (!msg->msg_peertxcredit);
- LASSERT (!msg->msg_txcredit);
- LASSERT (msg->msg_txpeer == NULL);
-
- msg->msg_txpeer = lp; /* msg takes my ref on lp */
-
- rc = lnet_post_send_locked(msg, 0);
- LNET_UNLOCK();
-
- if (rc == 0)
- lnet_ni_send(src_ni, msg);
-
- return 0;
-}
-
-static void
-lnet_commit_md (lnet_libmd_t *md, lnet_msg_t *msg)
-{
- /* ALWAYS called holding the LNET_LOCK */
- /* Here, we commit the MD to a network OP by marking it busy and
- * decrementing its threshold. Come what may, the network "owns"
- * the MD until a call to lnet_finalize() signals completion. */
- LASSERT (!msg->msg_routing);
-
- msg->msg_md = md;
-
- md->md_refcount++;
- if (md->md_threshold != LNET_MD_THRESH_INF) {
- LASSERT (md->md_threshold > 0);
- md->md_threshold--;
- }
-
- the_lnet.ln_counters.msgs_alloc++;
- if (the_lnet.ln_counters.msgs_alloc >
- the_lnet.ln_counters.msgs_max)
- the_lnet.ln_counters.msgs_max =
- the_lnet.ln_counters.msgs_alloc;
-
- LASSERT (!msg->msg_onactivelist);
- msg->msg_onactivelist = 1;
- list_add (&msg->msg_activelist, &the_lnet.ln_active_msgs);
-}
-
-static void
-lnet_drop_message (lnet_ni_t *ni, void *private, unsigned int nob)
-{
- LNET_LOCK();
- the_lnet.ln_counters.drop_count++;
- the_lnet.ln_counters.drop_length += nob;
- LNET_UNLOCK();
-
- lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob);
-}
-
-static void
-lnet_drop_delayed_put(lnet_msg_t *msg, char *reason)
-{
- LASSERT (msg->msg_md == NULL);
- LASSERT (msg->msg_delayed);
- LASSERT (msg->msg_rxpeer != NULL);
- LASSERT (msg->msg_hdr.type == LNET_MSG_PUT);
-
- CWARN("Dropping delayed PUT from %s portal %d match "LPU64
- " offset %d length %d: %s\n",
- libcfs_id2str((lnet_process_id_t){
- .nid = msg->msg_hdr.src_nid,
- .pid = msg->msg_hdr.src_pid}),
- msg->msg_hdr.msg.put.ptl_index,
- msg->msg_hdr.msg.put.match_bits,
- msg->msg_hdr.msg.put.offset,
- msg->msg_hdr.payload_length,
- reason);
-
- /* NB I can't drop msg's ref on msg_rxpeer until after I've
- * called lnet_drop_message(), so I just hang onto msg as well
- * until that's done */
-
- lnet_drop_message(msg->msg_rxpeer->lp_ni,
- msg->msg_private, msg->msg_len);
-
- LNET_LOCK();
-
- lnet_peer_decref_locked(msg->msg_rxpeer);
- msg->msg_rxpeer = NULL;
-
- lnet_msg_free(msg);
-
- LNET_UNLOCK();
-}
-
-int
-LNetSetLazyPortal(int portal)
-{
- lnet_portal_t *ptl = &the_lnet.ln_portals[portal];
-
- if (portal < 0 || portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
-
- LNET_LOCK();
-
- ptl->ptl_options |= LNET_PTL_LAZY;
-
- LNET_UNLOCK();
-
- return 0;
-}
-
-int
-LNetClearLazyPortal(int portal)
-{
- struct list_head zombies;
- lnet_portal_t *ptl = &the_lnet.ln_portals[portal];
- lnet_msg_t *msg;
-
- if (portal < 0 || portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- LNET_LOCK();
-
- if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) {
- LNET_UNLOCK();
- return 0;
- }
-
- if (the_lnet.ln_shutdown)
- CWARN ("Active lazy portal %d on exit\n", portal);
- else
- CDEBUG (D_NET, "clearing portal %d lazy\n", portal);
-
- /* grab all the blocked messages atomically */
- list_add(&zombies, &ptl->ptl_msgq);
- list_del_init(&ptl->ptl_msgq);
-
- ptl->ptl_msgq_version++;
- ptl->ptl_options &= ~LNET_PTL_LAZY;
-
- LNET_UNLOCK();
-
- while (!list_empty(&zombies)) {
- msg = list_entry(zombies.next, lnet_msg_t, msg_list);
- list_del(&msg->msg_list);
-
- lnet_drop_delayed_put(msg, "Clearing lazy portal attr");
- }
-
- return 0;
-}
-
-static void
-lnet_recv_put(lnet_libmd_t *md, lnet_msg_t *msg, int delayed,
- unsigned int offset, unsigned int mlength)
-{
- lnet_hdr_t *hdr = &msg->msg_hdr;
-
- LNET_LOCK();
-
- the_lnet.ln_counters.recv_count++;
- the_lnet.ln_counters.recv_length += mlength;
-
- LNET_UNLOCK();
-
- if (mlength != 0)
- lnet_setpayloadbuffer(msg);
-
- msg->msg_ev.type = LNET_EVENT_PUT;
- msg->msg_ev.target.pid = hdr->dest_pid;
- msg->msg_ev.target.nid = hdr->dest_nid;
- msg->msg_ev.hdr_data = hdr->msg.put.hdr_data;
-
- /* Must I ACK? If so I'll grab the ack_wmd out of the header and put
- * it back into the ACK during lnet_finalize() */
- msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
- (md->md_options & LNET_MD_ACK_DISABLE) == 0);
-
- lnet_ni_recv(msg->msg_rxpeer->lp_ni,
- msg->msg_private,
- msg, delayed, offset, mlength,
- hdr->payload_length);
-}
-
-/* called with LNET_LOCK held */
-void
-lnet_match_blocked_msg(lnet_libmd_t *md)
-{
- CFS_LIST_HEAD (drops);
- CFS_LIST_HEAD (matches);
- struct list_head *tmp;
- struct list_head *entry;
- lnet_msg_t *msg;
- lnet_me_t *me = md->md_me;
- lnet_portal_t *ptl = &the_lnet.ln_portals[me->me_portal];
-
- LASSERT (me->me_portal < the_lnet.ln_nportals);
-
- if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) {
- LASSERT (list_empty(&ptl->ptl_msgq));
- return;
- }
-
- LASSERT (md->md_refcount == 0); /* a brand new MD */
-
- list_for_each_safe (entry, tmp, &ptl->ptl_msgq) {
- int rc;
- int index;
- unsigned int mlength;
- unsigned int offset;
- lnet_hdr_t *hdr;
- lnet_process_id_t src;
-
- msg = list_entry(entry, lnet_msg_t, msg_list);
-
- LASSERT (msg->msg_delayed);
-
- hdr = &msg->msg_hdr;
- index = hdr->msg.put.ptl_index;
-
- src.nid = hdr->src_nid;
- src.pid = hdr->src_pid;
-
- rc = lnet_try_match_md(index, LNET_MD_OP_PUT, src,
- hdr->payload_length,
- hdr->msg.put.offset,
- hdr->msg.put.match_bits,
- md, msg, &mlength, &offset);
-
- if (rc == LNET_MATCHMD_NONE)
- continue;
-
- /* Hurrah! This _is_ a match */
- list_del(&msg->msg_list);
- ptl->ptl_msgq_version++;
-
- if (rc == LNET_MATCHMD_OK) {
- list_add_tail(&msg->msg_list, &matches);
-
- CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
- "match "LPU64" offset %d length %d.\n",
- libcfs_id2str(src),
- hdr->msg.put.ptl_index,
- hdr->msg.put.match_bits,
- hdr->msg.put.offset,
- hdr->payload_length);
- } else {
- LASSERT (rc == LNET_MATCHMD_DROP);
-
- list_add_tail(&msg->msg_list, &drops);
- }
-
- if (lnet_md_exhausted(md))
- break;
- }
-
- LNET_UNLOCK();
-
- list_for_each_safe (entry, tmp, &drops) {
- msg = list_entry(entry, lnet_msg_t, msg_list);
-
- list_del(&msg->msg_list);
-
- lnet_drop_delayed_put(msg, "Bad match");
- }
-
- list_for_each_safe (entry, tmp, &matches) {
- msg = list_entry(entry, lnet_msg_t, msg_list);
-
- list_del(&msg->msg_list);
-
- /* md won't disappear under me, since each msg
- * holds a ref on it */
- lnet_recv_put(md, msg, 1,
- msg->msg_ev.offset,
- msg->msg_ev.mlength);
- }
-
- LNET_LOCK();
-}
-
-static int
-lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg)
-{
- int rc;
- int index;
- lnet_hdr_t *hdr = &msg->msg_hdr;
- unsigned int rlength = hdr->payload_length;
- unsigned int mlength = 0;
- unsigned int offset = 0;
- lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
- /* .pid = */ hdr->src_pid};
- lnet_libmd_t *md;
-
- /* Convert put fields to host byte order */
- hdr->msg.put.match_bits = le64_to_cpu(hdr->msg.put.match_bits);
- hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index);
- hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset);
-
- index = hdr->msg.put.ptl_index;
-
- LNET_LOCK();
-
- rc = lnet_match_md(index, LNET_MD_OP_PUT, src,
- rlength, hdr->msg.put.offset,
- hdr->msg.put.match_bits, msg,
- &mlength, &offset, &md);
- switch (rc) {
- default:
- LBUG();
-
- case LNET_MATCHMD_OK:
- LNET_UNLOCK();
- lnet_recv_put(md, msg, 0, offset, mlength);
- return 0;
-
- case LNET_MATCHMD_NONE:
- rc = lnet_eager_recv_locked(msg);
- if (rc == 0 && !the_lnet.ln_shutdown) {
- list_add_tail(&msg->msg_list,
- &the_lnet.ln_portals[index].ptl_msgq);
-
- the_lnet.ln_portals[index].ptl_msgq_version++;
-
- CDEBUG(D_NET, "Delaying PUT from %s portal %d match "
- LPU64" offset %d length %d: no match \n",
- libcfs_id2str(src), index,
- hdr->msg.put.match_bits,
- hdr->msg.put.offset, rlength);
-
- LNET_UNLOCK();
- return 0;
- }
- /* fall through */
-
- case LNET_MATCHMD_DROP:
- CDEBUG(D_NETERROR,
- "Dropping PUT from %s portal %d match "LPU64
- " offset %d length %d: %d\n",
- libcfs_id2str(src), index,
- hdr->msg.put.match_bits,
- hdr->msg.put.offset, rlength, rc);
- LNET_UNLOCK();
-
- return ENOENT; /* +ve: OK but no match */
- }
-}
-
-static int
-lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get)
-{
- lnet_hdr_t *hdr = &msg->msg_hdr;
- unsigned int mlength = 0;
- unsigned int offset = 0;
- lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
- /* .pid = */ hdr->src_pid};
- lnet_handle_wire_t reply_wmd;
- lnet_libmd_t *md;
- int rc;
-
- /* Convert get fields to host byte order */
- hdr->msg.get.match_bits = le64_to_cpu(hdr->msg.get.match_bits);
- hdr->msg.get.ptl_index = le32_to_cpu(hdr->msg.get.ptl_index);
- hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length);
- hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset);
-
- LNET_LOCK();
-
- rc = lnet_match_md(hdr->msg.get.ptl_index, LNET_MD_OP_GET, src,
- hdr->msg.get.sink_length, hdr->msg.get.src_offset,
- hdr->msg.get.match_bits, msg,
- &mlength, &offset, &md);
- if (rc == LNET_MATCHMD_DROP) {
- CDEBUG(D_NETERROR,
- "Dropping GET from %s portal %d match "LPU64
- " offset %d length %d\n",
- libcfs_id2str(src),
- hdr->msg.get.ptl_index,
- hdr->msg.get.match_bits,
- hdr->msg.get.src_offset,
- hdr->msg.get.sink_length);
- LNET_UNLOCK();
- return ENOENT; /* +ve: OK but no match */
- }
-
- LASSERT (rc == LNET_MATCHMD_OK);
-
- the_lnet.ln_counters.send_count++;
- the_lnet.ln_counters.send_length += mlength;
-
- LNET_UNLOCK();
-
- reply_wmd = hdr->msg.get.return_wmd;
-
- lnet_prep_send(msg, LNET_MSG_REPLY, src, offset, mlength);
-
- msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
-
- msg->msg_ev.type = LNET_EVENT_GET;
- msg->msg_ev.target.pid = hdr->dest_pid;
- msg->msg_ev.target.nid = hdr->dest_nid;
- msg->msg_ev.hdr_data = 0;
-
- if (rdma_get) {
- /* The LND completes the REPLY from her recv procedure */
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- msg->msg_offset, msg->msg_len, msg->msg_len);
- return 0;
- }
-
- lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0);
- msg->msg_receiving = 0;
-
- rc = lnet_send(ni->ni_nid, msg);
- if (rc < 0) {
- /* didn't get as far as lnet_ni_send() */
- CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), rc);
-
- lnet_finalize(ni, msg, rc);
- }
-
- return 0;
-}
-
-static int
-lnet_parse_reply(lnet_ni_t *ni, lnet_msg_t *msg)
-{
- void *private = msg->msg_private;
- lnet_hdr_t *hdr = &msg->msg_hdr;
- lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
- /* .pid = */ hdr->src_pid};
- lnet_libmd_t *md;
- int rlength;
- int mlength;
-
- LNET_LOCK();
-
- /* NB handles only looked up by creator (no flips) */
- md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
- if (md == NULL || md->md_threshold == 0) {
- CDEBUG(D_NETERROR, "%s: Dropping REPLY from %s for %s "
- "MD "LPX64"."LPX64"\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- (md == NULL) ? "invalid" : "inactive",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie);
-
- LNET_UNLOCK();
- return ENOENT; /* +ve: OK but no match */
- }
-
- LASSERT (md->md_offset == 0);
-
- rlength = hdr->payload_length;
- mlength = MIN(rlength, md->md_length);
-
- if (mlength < rlength &&
- (md->md_options & LNET_MD_TRUNCATE) == 0) {
- CDEBUG(D_NETERROR, "%s: Dropping REPLY from %s length %d "
- "for MD "LPX64" would overflow (%d)\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- rlength, hdr->msg.reply.dst_wmd.wh_object_cookie,
- mlength);
- LNET_UNLOCK();
- return ENOENT; /* +ve: OK but no match */
- }
-
- CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md "LPX64"\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie);
-
- lnet_commit_md(md, msg);
-
- if (mlength != 0)
- lnet_setpayloadbuffer(msg);
-
- msg->msg_ev.type = LNET_EVENT_REPLY;
- msg->msg_ev.target.pid = hdr->dest_pid;
- msg->msg_ev.target.nid = hdr->dest_nid;
- msg->msg_ev.initiator = src;
- msg->msg_ev.rlength = rlength;
- msg->msg_ev.mlength = mlength;
- msg->msg_ev.offset = 0;
-
- lnet_md_deconstruct(md, &msg->msg_ev.md);
- lnet_md2handle(&msg->msg_ev.md_handle, md);
-
- the_lnet.ln_counters.recv_count++;
- the_lnet.ln_counters.recv_length += mlength;
-
- LNET_UNLOCK();
-
- lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength);
- return 0;
-}
-
-static int
-lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg)
-{
- lnet_hdr_t *hdr = &msg->msg_hdr;
- lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
- /* .pid = */ hdr->src_pid};
- lnet_libmd_t *md;
-
- /* Convert ack fields to host byte order */
- hdr->msg.ack.match_bits = le64_to_cpu(hdr->msg.ack.match_bits);
- hdr->msg.ack.mlength = le32_to_cpu(hdr->msg.ack.mlength);
-
- LNET_LOCK();
-
- /* NB handles only looked up by creator (no flips) */
- md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd);
- if (md == NULL || md->md_threshold == 0) {
- /* Don't moan; this is expected */
- CDEBUG(D_NET,
- "%s: Dropping ACK from %s to %s MD "LPX64"."LPX64"\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- (md == NULL) ? "invalid" : "inactive",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie);
- LNET_UNLOCK();
- return ENOENT; /* +ve! */
- }
-
- CDEBUG(D_NET, "%s: ACK from %s into md "LPX64"\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- hdr->msg.ack.dst_wmd.wh_object_cookie);
-
- lnet_commit_md(md, msg);
-
- msg->msg_ev.type = LNET_EVENT_ACK;
- msg->msg_ev.target.pid = hdr->dest_pid;
- msg->msg_ev.target.nid = hdr->dest_nid;
- msg->msg_ev.initiator = src;
- msg->msg_ev.mlength = hdr->msg.ack.mlength;
- msg->msg_ev.match_bits = hdr->msg.ack.match_bits;
-
- lnet_md_deconstruct(md, &msg->msg_ev.md);
- lnet_md2handle(&msg->msg_ev.md_handle, md);
-
- the_lnet.ln_counters.recv_count++;
-
- LNET_UNLOCK();
-
- lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len);
- return 0;
-}
-
-char *
-lnet_msgtyp2str (int type)
-{
- switch (type) {
- case LNET_MSG_ACK:
- return ("ACK");
- case LNET_MSG_PUT:
- return ("PUT");
- case LNET_MSG_GET:
- return ("GET");
- case LNET_MSG_REPLY:
- return ("REPLY");
- case LNET_MSG_HELLO:
- return ("HELLO");
- default:
- return ("<UNKNOWN>");
- }
-}
-
-void
-lnet_print_hdr(lnet_hdr_t * hdr)
-{
- lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
- /* .pid = */ hdr->src_pid};
- lnet_process_id_t dst = {/* .nid = */ hdr->dest_nid,
- /* .pid = */ hdr->dest_pid};
- char *type_str = lnet_msgtyp2str (hdr->type);
-
- CWARN("P3 Header at %p of type %s\n", hdr, type_str);
- CWARN(" From %s\n", libcfs_id2str(src));
- CWARN(" To %s\n", libcfs_id2str(dst));
-
- switch (hdr->type) {
- default:
- break;
-
- case LNET_MSG_PUT:
- CWARN(" Ptl index %d, ack md "LPX64"."LPX64", "
- "match bits "LPU64"\n",
- hdr->msg.put.ptl_index,
- hdr->msg.put.ack_wmd.wh_interface_cookie,
- hdr->msg.put.ack_wmd.wh_object_cookie,
- hdr->msg.put.match_bits);
- CWARN(" Length %d, offset %d, hdr data "LPX64"\n",
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.hdr_data);
- break;
-
- case LNET_MSG_GET:
- CWARN(" Ptl index %d, return md "LPX64"."LPX64", "
- "match bits "LPU64"\n", hdr->msg.get.ptl_index,
- hdr->msg.get.return_wmd.wh_interface_cookie,
- hdr->msg.get.return_wmd.wh_object_cookie,
- hdr->msg.get.match_bits);
- CWARN(" Length %d, src offset %d\n",
- hdr->msg.get.sink_length,
- hdr->msg.get.src_offset);
- break;
-
- case LNET_MSG_ACK:
- CWARN(" dst md "LPX64"."LPX64", "
- "manipulated length %d\n",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie,
- hdr->msg.ack.mlength);
- break;
-
- case LNET_MSG_REPLY:
- CWARN(" dst md "LPX64"."LPX64", "
- "length %d\n",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie,
- hdr->payload_length);
- }
-
-}
-
-
-int
-lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
- void *private, int rdma_req)
-{
- int rc = 0;
- int for_me;
- lnet_msg_t *msg;
- lnet_nid_t dest_nid;
- lnet_nid_t src_nid;
- __u32 payload_length;
- __u32 type;
-
- LASSERT (!in_interrupt ());
-
- type = le32_to_cpu(hdr->type);
- src_nid = le64_to_cpu(hdr->src_nid);
- dest_nid = le64_to_cpu(hdr->dest_nid);
- payload_length = le32_to_cpu(hdr->payload_length);
-
- for_me = lnet_ptlcompat_matchnid(ni->ni_nid, dest_nid);
-
- switch (type) {
- case LNET_MSG_ACK:
- case LNET_MSG_GET:
- if (payload_length > 0) {
- CERROR("%s, src %s: bad %s payload %d (0 expected)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type), payload_length);
- return -EPROTO;
- }
- break;
-
- case LNET_MSG_PUT:
- case LNET_MSG_REPLY:
- if (payload_length > (for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) {
- CERROR("%s, src %s: bad %s payload %d "
- "(%d max expected)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type),
- payload_length,
- for_me ? LNET_MAX_PAYLOAD : LNET_MTU);
- return -EPROTO;
- }
- break;
-
- default:
- CERROR("%s, src %s: Bad message type 0x%x\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid), type);
- return -EPROTO;
- }
-
- /* Regard a bad destination NID as a protocol error. Senders should
- * know what they're doing; if they don't they're misconfigured, buggy
- * or malicious so we chop them off at the knees :) */
-
- if (!for_me) {
- if (the_lnet.ln_ptlcompat > 0) {
- /* portals compatibility is single-network */
- CERROR ("%s, src %s: Bad dest nid %s "
- "(routing not supported)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (the_lnet.ln_ptlcompat == 0 &&
- LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) {
- /* should have gone direct */
- CERROR ("%s, src %s: Bad dest nid %s "
- "(should have been sent direct)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (the_lnet.ln_ptlcompat == 0 &&
- lnet_islocalnid(dest_nid)) {
- /* dest is another local NI; sender should have used
- * this node's NID on its own network */
- CERROR ("%s, src %s: Bad dest nid %s "
- "(it's my nid but on a different network)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (rdma_req && type == LNET_MSG_GET) {
- CERROR ("%s, src %s: Bad optimized GET for %s "
- "(final destination must be me)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (!the_lnet.ln_routing) {
- CERROR ("%s, src %s: Dropping message for %s "
- "(routing not enabled)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- goto drop;
- }
- }
-
- /* Message looks OK; we're not going to return an error, so we MUST
- * call back lnd_recv() come what may... */
-
- if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer (src_nid, 0)) /* shall we now? */
- {
- CERROR("%s, src %s: Dropping %s to simulate failure\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type));
- goto drop;
- }
-
- msg = lnet_msg_alloc();
- if (msg == NULL) {
- CERROR("%s, src %s: Dropping %s (out of memory)\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type));
- goto drop;
- }
-
- /* msg zeroed in lnet_msg_alloc; i.e. flags all clear, pointers NULL etc */
-
- msg->msg_type = type;
- msg->msg_private = private;
- msg->msg_receiving = 1;
- msg->msg_len = msg->msg_wanted = payload_length;
- msg->msg_offset = 0;
- msg->msg_hdr = *hdr;
-
- LNET_LOCK();
- rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid);
- if (rc != 0) {
- LNET_UNLOCK();
- CERROR("%s, src %s: Dropping %s "
- "(error %d looking up sender)\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type), rc);
- goto free_drop;
- }
- LNET_UNLOCK();
-
-#ifndef __KERNEL__
- LASSERT (for_me);
-#else
- if (!for_me) {
- msg->msg_target.pid = le32_to_cpu(hdr->dest_pid);
- msg->msg_target.nid = dest_nid;
- msg->msg_routing = 1;
- msg->msg_offset = 0;
-
- LNET_LOCK();
- if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
- lnet_msg2bufpool(msg)->rbp_credits <= 0) {
- rc = lnet_eager_recv_locked(msg);
- if (rc != 0) {
- LNET_UNLOCK();
- goto free_drop;
- }
- }
-
- lnet_commit_routedmsg(msg);
- rc = lnet_post_routed_recv_locked(msg, 0);
- LNET_UNLOCK();
-
- if (rc == 0)
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- 0, payload_length, payload_length);
- return 0;
- }
-#endif
- /* convert common msg->hdr fields to host byteorder */
- msg->msg_hdr.type = type;
- msg->msg_hdr.src_nid = src_nid;
- msg->msg_hdr.src_pid = le32_to_cpu(msg->msg_hdr.src_pid);
- msg->msg_hdr.dest_nid = dest_nid;
- msg->msg_hdr.dest_pid = le32_to_cpu(msg->msg_hdr.dest_pid);
- msg->msg_hdr.payload_length = payload_length;
-
- msg->msg_ev.sender = from_nid;
-
- switch (type) {
- case LNET_MSG_ACK:
- rc = lnet_parse_ack(ni, msg);
- break;
- case LNET_MSG_PUT:
- rc = lnet_parse_put(ni, msg);
- break;
- case LNET_MSG_GET:
- rc = lnet_parse_get(ni, msg, rdma_req);
- break;
- case LNET_MSG_REPLY:
- rc = lnet_parse_reply(ni, msg);
- break;
- default:
- LASSERT(0);
- goto free_drop; /* prevent an unused label if !kernel */
- }
-
- if (rc == 0)
- return 0;
-
- LASSERT (rc == ENOENT);
-
- free_drop:
- LASSERT (msg->msg_md == NULL);
- LNET_LOCK();
- if (msg->msg_rxpeer != NULL) {
- lnet_peer_decref_locked(msg->msg_rxpeer);
- msg->msg_rxpeer = NULL;
- }
- lnet_msg_free(msg); /* expects LNET_LOCK held */
- LNET_UNLOCK();
-
- drop:
- lnet_drop_message(ni, private, payload_length);
- return 0;
-}
-
-int
-LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack,
- lnet_process_id_t target, unsigned int portal,
- __u64 match_bits, unsigned int offset,
- __u64 hdr_data)
-{
- lnet_msg_t *msg;
- lnet_libmd_t *md;
- int rc;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer (target.nid, 1)) /* shall we now? */
- {
- CERROR("Dropping PUT to %s: simulated failure\n",
- libcfs_id2str(target));
- return -EIO;
- }
-
- msg = lnet_msg_alloc();
- if (msg == NULL) {
- CERROR("Dropping PUT to %s: ENOMEM on lnet_msg_t\n",
- libcfs_id2str(target));
- return -ENOMEM;
- }
-
- LNET_LOCK();
-
- md = lnet_handle2md(&mdh);
- if (md == NULL || md->md_threshold == 0) {
- lnet_msg_free(msg);
- LNET_UNLOCK();
-
- CERROR("Dropping PUT to %s: MD invalid\n",
- libcfs_id2str(target));
- return -ENOENT;
- }
-
- CDEBUG(D_NET, "LNetPut -> %s\n", libcfs_id2str(target));
-
- lnet_commit_md(md, msg);
-
- lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length);
-
- msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits);
- msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal);
- msg->msg_hdr.msg.put.offset = cpu_to_le32(offset);
- msg->msg_hdr.msg.put.hdr_data = hdr_data;
-
- /* NB handles only looked up by creator (no flips) */
- if (ack == LNET_ACK_REQ) {
- msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
- the_lnet.ln_interface_cookie;
- msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
- md->md_lh.lh_cookie;
- } else {
- msg->msg_hdr.msg.put.ack_wmd = LNET_WIRE_HANDLE_NONE;
- }
-
- msg->msg_ev.type = LNET_EVENT_SEND;
- msg->msg_ev.initiator.nid = LNET_NID_ANY;
- msg->msg_ev.initiator.pid = the_lnet.ln_pid;
- msg->msg_ev.target = target;
- msg->msg_ev.sender = LNET_NID_ANY;
- msg->msg_ev.pt_index = portal;
- msg->msg_ev.match_bits = match_bits;
- msg->msg_ev.rlength = md->md_length;
- msg->msg_ev.mlength = md->md_length;
- msg->msg_ev.offset = offset;
- msg->msg_ev.hdr_data = hdr_data;
-
- lnet_md_deconstruct(md, &msg->msg_ev.md);
- lnet_md2handle(&msg->msg_ev.md_handle, md);
-
- the_lnet.ln_counters.send_count++;
- the_lnet.ln_counters.send_length += md->md_length;
-
- LNET_UNLOCK();
-
- rc = lnet_send(self, msg);
- if (rc != 0) {
- CERROR("Error sending PUT to %s: %d\n",
- libcfs_id2str(target), rc);
- lnet_finalize (NULL, msg, rc);
- }
-
- /* completion will be signalled by an event */
- return 0;
-}
-
-lnet_msg_t *
-lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *getmsg)
-{
- /* The LND can DMA direct to the GET md (i.e. no REPLY msg). This
- * returns a msg for the LND to pass to lnet_finalize() when the sink
- * data has been received.
- *
- * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
- * lnet_finalize() is called on it, so the LND must call this first */
-
- lnet_msg_t *msg = lnet_msg_alloc();
- lnet_libmd_t *getmd = getmsg->msg_md;
- lnet_process_id_t peer_id = getmsg->msg_target;
-
- LASSERT (!getmsg->msg_target_is_router);
- LASSERT (!getmsg->msg_routing);
-
- LNET_LOCK();
-
- LASSERT (getmd->md_refcount > 0);
-
- if (msg == NULL) {
- CERROR ("%s: Dropping REPLY from %s: can't allocate msg\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id));
- goto drop;
- }
-
- if (getmd->md_threshold == 0) {
- CERROR ("%s: Dropping REPLY from %s for inactive MD %p\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id),
- getmd);
- goto drop_msg;
- }
-
- LASSERT (getmd->md_offset == 0);
-
- CDEBUG(D_NET, "%s: Reply from %s md %p\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd);
-
- lnet_commit_md (getmd, msg);
-
- msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */
-
- msg->msg_ev.type = LNET_EVENT_REPLY;
- msg->msg_ev.initiator = peer_id;
- msg->msg_ev.sender = peer_id.nid; /* optimized GETs can't be routed */
- msg->msg_ev.rlength = msg->msg_ev.mlength = getmd->md_length;
- msg->msg_ev.offset = 0;
-
- lnet_md_deconstruct(getmd, &msg->msg_ev.md);
- lnet_md2handle(&msg->msg_ev.md_handle, getmd);
-
- the_lnet.ln_counters.recv_count++;
- the_lnet.ln_counters.recv_length += getmd->md_length;
-
- LNET_UNLOCK();
-
- return msg;
-
- drop_msg:
- lnet_msg_free(msg);
- drop:
- the_lnet.ln_counters.drop_count++;
- the_lnet.ln_counters.drop_length += getmd->md_length;
-
- LNET_UNLOCK ();
-
- return NULL;
-}
-
-void
-lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *reply, unsigned int len)
-{
- /* Set the REPLY length, now the RDMA that elides the REPLY message has
- * completed and I know it. */
- LASSERT (reply != NULL);
- LASSERT (reply->msg_type == LNET_MSG_GET);
- LASSERT (reply->msg_ev.type == LNET_EVENT_REPLY);
-
- /* NB I trusted my peer to RDMA. If she tells me she's written beyond
- * the end of my buffer, I might as well be dead. */
- LASSERT (len <= reply->msg_ev.mlength);
-
- reply->msg_ev.mlength = len;
-}
-
-int
-LNetGet(lnet_nid_t self, lnet_handle_md_t mdh,
- lnet_process_id_t target, unsigned int portal,
- __u64 match_bits, unsigned int offset)
-{
- lnet_msg_t *msg;
- lnet_libmd_t *md;
- int rc;
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer (target.nid, 1)) /* shall we now? */
- {
- CERROR("Dropping GET to %s: simulated failure\n",
- libcfs_id2str(target));
- return -EIO;
- }
-
- msg = lnet_msg_alloc();
- if (msg == NULL) {
- CERROR("Dropping GET to %s: ENOMEM on lnet_msg_t\n",
- libcfs_id2str(target));
- return -ENOMEM;
- }
-
- LNET_LOCK();
-
- md = lnet_handle2md(&mdh);
- if (md == NULL || md->md_threshold == 0) {
- lnet_msg_free(msg);
- LNET_UNLOCK();
-
- CERROR("Dropping GET to %s: MD invalid\n",
- libcfs_id2str(target));
- return -ENOENT;
- }
-
- CDEBUG(D_NET, "LNetGet -> %s\n", libcfs_id2str(target));
-
- lnet_commit_md(md, msg);
-
- lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0);
-
- msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits);
- msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal);
- msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset);
- msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length);
-
- /* NB handles only looked up by creator (no flips) */
- msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie =
- the_lnet.ln_interface_cookie;
- msg->msg_hdr.msg.get.return_wmd.wh_object_cookie =
- md->md_lh.lh_cookie;
-
- msg->msg_ev.type = LNET_EVENT_SEND;
- msg->msg_ev.initiator.nid = LNET_NID_ANY;
- msg->msg_ev.initiator.pid = the_lnet.ln_pid;
- msg->msg_ev.target = target;
- msg->msg_ev.sender = LNET_NID_ANY;
- msg->msg_ev.pt_index = portal;
- msg->msg_ev.match_bits = match_bits;
- msg->msg_ev.rlength = md->md_length;
- msg->msg_ev.mlength = md->md_length;
- msg->msg_ev.offset = offset;
- msg->msg_ev.hdr_data = 0;
-
- lnet_md_deconstruct(md, &msg->msg_ev.md);
- lnet_md2handle(&msg->msg_ev.md_handle, md);
-
- the_lnet.ln_counters.send_count++;
-
- LNET_UNLOCK();
-
- rc = lnet_send(self, msg);
- if (rc < 0) {
- CERROR("error sending GET to %s: %d\n",
- libcfs_id2str(target), rc);
- lnet_finalize (NULL, msg, rc);
- }
-
- /* completion will be signalled by an event */
- return 0;
-}
-
-int
-LNetDist (lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
-{
- struct list_head *e;
- lnet_ni_t *ni;
- lnet_route_t *route;
- lnet_remotenet_t *rnet;
- __u32 dstnet = LNET_NIDNET(dstnid);
- int hops;
- __u32 order = 2;
-
- /* if !local_nid_dist_zero, I don't return a distance of 0 ever
- * (when lustre sees a distance of 0, it substitutes 0@lo), so I
- * keep order 0 free for 0@lo and order 1 free for a local NID
- * match */
-
- LASSERT (the_lnet.ln_init);
- LASSERT (the_lnet.ln_refcount > 0);
-
- LNET_LOCK();
-
- list_for_each (e, &the_lnet.ln_nis) {
- ni = list_entry(e, lnet_ni_t, ni_list);
-
- if (ni->ni_nid == dstnid ||
- (the_lnet.ln_ptlcompat > 0 &&
- LNET_NIDNET(dstnid) == 0 &&
- LNET_NIDADDR(dstnid) == LNET_NIDADDR(ni->ni_nid) &&
- LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) != LOLND)) {
- if (srcnidp != NULL)
- *srcnidp = dstnid;
- if (orderp != NULL) {
- if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND)
- *orderp = 0;
- else
- *orderp = 1;
- }
- LNET_UNLOCK();
-
- return local_nid_dist_zero ? 0 : 1;
- }
-
- if (LNET_NIDNET(ni->ni_nid) == dstnet ||
- (the_lnet.ln_ptlcompat > 0 &&
- dstnet == 0 &&
- LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) != LOLND)) {
- if (srcnidp != NULL)
- *srcnidp = ni->ni_nid;
- if (orderp != NULL)
- *orderp = order;
- LNET_UNLOCK();
- return 1;
- }
-
- order++;
- }
-
- list_for_each (e, &the_lnet.ln_remote_nets) {
- rnet = list_entry(e, lnet_remotenet_t, lrn_list);
-
- if (rnet->lrn_net == dstnet) {
- LASSERT (!list_empty(&rnet->lrn_routes));
- route = list_entry(rnet->lrn_routes.next,
- lnet_route_t, lr_list);
- hops = rnet->lrn_hops;
- if (srcnidp != NULL)
- *srcnidp = route->lr_gateway->lp_ni->ni_nid;
- if (orderp != NULL)
- *orderp = order;
- LNET_UNLOCK();
- return hops + 1;
- }
- order++;
- }
-
- LNET_UNLOCK();
- return -EHOSTUNREACH;
-}
-
-int
-LNetSetAsync(lnet_process_id_t id, int nasync)
-{
-#ifdef __KERNEL__
- return 0;
-#else
- lnet_ni_t *ni;
- lnet_remotenet_t *rnet;
- struct list_head *tmp;
- lnet_route_t *route;
- lnet_nid_t *nids;
- int nnids;
- int maxnids = 256;
- int rc = 0;
- int rc2;
-
- /* Target on a local network? */
-
- ni = lnet_net2ni(LNET_NIDNET(id.nid));
- if (ni != NULL) {
- if (ni->ni_lnd->lnd_setasync != NULL)
- rc = (ni->ni_lnd->lnd_setasync)(ni, id, nasync);
- lnet_ni_decref(ni);
- return rc;
- }
-
- /* Target on a remote network: apply to routers */
- again:
- LIBCFS_ALLOC(nids, maxnids * sizeof(*nids));
- if (nids == NULL)
- return -ENOMEM;
- nnids = 0;
-
- /* Snapshot all the router NIDs */
- LNET_LOCK();
- rnet = lnet_find_net_locked(LNET_NIDNET(id.nid));
- if (rnet != NULL) {
- list_for_each(tmp, &rnet->lrn_routes) {
- if (nnids == maxnids) {
- LNET_UNLOCK();
- LIBCFS_FREE(nids, maxnids * sizeof(*nids));
- maxnids *= 2;
- goto again;
- }
-
- route = list_entry(tmp, lnet_route_t, lr_list);
- nids[nnids++] = route->lr_gateway->lp_nid;
- }
- }
- LNET_UNLOCK();
-
- /* set async on all the routers */
- while (nnids-- > 0) {
- id.pid = LUSTRE_SRV_LNET_PID;
- id.nid = nids[nnids];
-
- ni = lnet_net2ni(LNET_NIDNET(id.nid));
- if (ni == NULL)
- continue;
-
- if (ni->ni_lnd->lnd_setasync != NULL) {
- rc2 = (ni->ni_lnd->lnd_setasync)(ni, id, nasync);
- if (rc2 != 0)
- rc = rc2;
- }
- lnet_ni_decref(ni);
- }
-
- LIBCFS_FREE(nids, maxnids * sizeof(*nids));
- return rc;
-#endif
-}
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-msg.c
- * Message decoding, parsing and finalizing routines
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <lnet/lib-lnet.h>
-
-void
-lnet_build_unlink_event (lnet_libmd_t *md, lnet_event_t *ev)
-{
- memset(ev, 0, sizeof(*ev));
-
- ev->status = 0;
- ev->unlinked = 1;
- ev->type = LNET_EVENT_UNLINK;
- lnet_md_deconstruct(md, &ev->md);
- lnet_md2handle(&ev->md_handle, md);
-}
-
-void
-lnet_enq_event_locked (lnet_eq_t *eq, lnet_event_t *ev)
-{
- lnet_event_t *eq_slot;
-
- /* Allocate the next queue slot */
- ev->sequence = eq->eq_enq_seq++;
-
- /* size must be a power of 2 to handle sequence # overflow */
- LASSERT (eq->eq_size != 0 &&
- eq->eq_size == LOWEST_BIT_SET (eq->eq_size));
- eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1));
-
- /* There is no race since both event consumers and event producers
- * take the LNET_LOCK, so we don't screw around with memory
- * barriers, setting the sequence number last or wierd structure
- * layout assertions. */
- *eq_slot = *ev;
-
- /* Call the callback handler (if any) */
- if (eq->eq_callback != NULL)
- eq->eq_callback (eq_slot);
-
-#ifdef __KERNEL__
- /* Wake anyone waiting in LNetEQPoll() */
- if (cfs_waitq_active(&the_lnet.ln_waitq))
- cfs_waitq_broadcast(&the_lnet.ln_waitq);
-#else
-# ifndef HAVE_LIBPTHREAD
- /* LNetEQPoll() calls into _the_ LND to wait for action */
-# else
- /* Wake anyone waiting in LNetEQPoll() */
- pthread_cond_broadcast(&the_lnet.ln_cond);
-# endif
-#endif
-}
-
-void
-lnet_complete_msg_locked(lnet_msg_t *msg)
-{
- lnet_handle_wire_t ack_wmd;
- int rc;
- int status = msg->msg_ev.status;
-
- LASSERT (msg->msg_onactivelist);
-
- if (status == 0 && msg->msg_ack) {
- /* Only send an ACK if the PUT completed successfully */
-
- lnet_return_credits_locked(msg);
-
- msg->msg_ack = 0;
- LNET_UNLOCK();
-
- LASSERT(msg->msg_ev.type == LNET_EVENT_PUT);
- LASSERT(!msg->msg_routing);
-
- ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
-
- lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
-
- msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
- msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
- msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength);
-
- rc = lnet_send(msg->msg_ev.target.nid, msg);
-
- LNET_LOCK();
-
- if (rc == 0)
- return;
- } else if (status == 0 && /* OK so far */
- (msg->msg_routing && !msg->msg_sending)) { /* not forwarded */
-
- LASSERT (!msg->msg_receiving); /* called back recv already */
-
- LNET_UNLOCK();
-
- rc = lnet_send(LNET_NID_ANY, msg);
-
- LNET_LOCK();
-
- if (rc == 0)
- return;
- }
-
- lnet_return_credits_locked(msg);
-
- LASSERT (msg->msg_onactivelist);
- msg->msg_onactivelist = 0;
- list_del (&msg->msg_activelist);
- the_lnet.ln_counters.msgs_alloc--;
- lnet_msg_free(msg);
-}
-
-
-void
-lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status)
-{
-#ifdef __KERNEL__
- int i;
- int my_slot;
-#endif
- lnet_libmd_t *md;
-
- LASSERT (!in_interrupt ());
-
- if (msg == NULL)
- return;
-#if 0
- CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n",
- lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target),
- msg->msg_target_is_router ? "t" : "",
- msg->msg_routing ? "X" : "",
- msg->msg_ack ? "A" : "",
- msg->msg_sending ? "S" : "",
- msg->msg_receiving ? "R" : "",
- msg->msg_delayed ? "d" : "",
- msg->msg_txcredit ? "C" : "",
- msg->msg_peertxcredit ? "c" : "",
- msg->msg_rtrcredit ? "F" : "",
- msg->msg_peerrtrcredit ? "f" : "",
- msg->msg_onactivelist ? "!" : "",
- msg->msg_txpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid),
- msg->msg_rxpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid));
-#endif
- LNET_LOCK();
-
- LASSERT (msg->msg_onactivelist);
-
- msg->msg_ev.status = status;
-
- md = msg->msg_md;
- if (md != NULL) {
- int unlink;
-
- /* Now it's safe to drop my caller's ref */
- md->md_refcount--;
- LASSERT (md->md_refcount >= 0);
-
- unlink = lnet_md_unlinkable(md);
-
- msg->msg_ev.unlinked = unlink;
-
- if (md->md_eq != NULL)
- lnet_enq_event_locked(md->md_eq, &msg->msg_ev);
-
- if (unlink)
- lnet_md_unlink(md);
-
- msg->msg_md = NULL;
- }
-
- list_add_tail (&msg->msg_list, &the_lnet.ln_finalizeq);
-
- /* Recursion breaker. Don't complete the message here if I am (or
- * enough other threads are) already completing messages */
-
-#ifdef __KERNEL__
- my_slot = -1;
- for (i = 0; i < the_lnet.ln_nfinalizers; i++) {
- if (the_lnet.ln_finalizers[i] == cfs_current())
- goto out;
- if (my_slot < 0 && the_lnet.ln_finalizers[i] == NULL)
- my_slot = i;
- }
- if (my_slot < 0)
- goto out;
-
- the_lnet.ln_finalizers[my_slot] = cfs_current();
-#else
- if (the_lnet.ln_finalizing)
- goto out;
-
- the_lnet.ln_finalizing = 1;
-#endif
-
- while (!list_empty(&the_lnet.ln_finalizeq)) {
- msg = list_entry(the_lnet.ln_finalizeq.next,
- lnet_msg_t, msg_list);
-
- list_del(&msg->msg_list);
-
- /* NB drops and regains the lnet lock if it actually does
- * anything, so my finalizing friends can chomp along too */
- lnet_complete_msg_locked(msg);
- }
-
-#ifdef __KERNEL__
- the_lnet.ln_finalizers[my_slot] = NULL;
-#else
- the_lnet.ln_finalizing = 0;
-#endif
-
- out:
- LNET_UNLOCK();
-}
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <lnet/lib-lnet.h>
-
-int
-lolnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- LASSERT (!lntmsg->msg_routing);
- LASSERT (!lntmsg->msg_target_is_router);
-
- return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0);
-}
-
-int
-lolnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- lnet_msg_t *sendmsg = private;
-
- if (lntmsg != NULL) { /* not discarding */
- if (sendmsg->msg_iov != NULL) {
- if (iov != NULL)
- lnet_copy_iov2iov(niov, iov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_iov,
- sendmsg->msg_offset, mlen);
- else
- lnet_copy_iov2kiov(niov, kiov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_iov,
- sendmsg->msg_offset, mlen);
- } else {
- if (iov != NULL)
- lnet_copy_kiov2iov(niov, iov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_kiov,
- sendmsg->msg_offset, mlen);
- else
- lnet_copy_kiov2kiov(niov, kiov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_kiov,
- sendmsg->msg_offset, mlen);
- }
-
- lnet_finalize(ni, lntmsg, 0);
- }
-
- lnet_finalize(ni, sendmsg, 0);
- return 0;
-}
-
-static int lolnd_instanced;
-
-void
-lolnd_shutdown(lnet_ni_t *ni)
-{
- CDEBUG (D_NET, "shutdown\n");
- LASSERT (lolnd_instanced);
-
- lolnd_instanced = 0;
-}
-
-int
-lolnd_startup (lnet_ni_t *ni)
-{
- LASSERT (ni->ni_lnd == &the_lolnd);
- LASSERT (!lolnd_instanced);
- lolnd_instanced = 1;
-
- return (0);
-}
-
-lnd_t the_lolnd = {
- /* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list},
- /* .lnd_refcount = */ 0,
- /* .lnd_type = */ LOLND,
- /* .lnd_startup = */ lolnd_startup,
- /* .lnd_shutdown = */ lolnd_shutdown,
- /* .lnt_ctl = */ NULL,
- /* .lnd_send = */ lolnd_send,
- /* .lnd_recv = */ lolnd_recv,
- /* .lnd_eager_recv = */ NULL,
- /* .lnd_notify = */ NULL,
-#ifdef __KERNEL__
- /* .lnd_accept = */ NULL
-#else
- /* .lnd_wait = */ NULL
-#endif
-};
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#define DEBUG_SUBSYSTEM S_LNET
-#include <lnet/lib-lnet.h>
-
-static int config_on_load = 0;
-CFS_MODULE_PARM(config_on_load, "i", int, 0444,
- "configure network at module load");
-
-static struct semaphore lnet_config_mutex;
-
-int
-lnet_configure (void *arg)
-{
- /* 'arg' only there so I can be passed to cfs_kernel_thread() */
- int rc = 0;
-
- LNET_MUTEX_DOWN(&lnet_config_mutex);
-
- if (!the_lnet.ln_niinit_self) {
- rc = LNetNIInit(LUSTRE_SRV_LNET_PID);
- if (rc >= 0) {
- the_lnet.ln_niinit_self = 1;
- rc = 0;
- }
- }
-
- LNET_MUTEX_UP(&lnet_config_mutex);
- return rc;
-}
-
-int
-lnet_unconfigure (void)
-{
- int refcount;
-
- LNET_MUTEX_DOWN(&lnet_config_mutex);
-
- if (the_lnet.ln_niinit_self) {
- the_lnet.ln_niinit_self = 0;
- LNetNIFini();
- }
-
- LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex);
- refcount = the_lnet.ln_refcount;
- LNET_MUTEX_UP(&the_lnet.ln_api_mutex);
-
- LNET_MUTEX_UP(&lnet_config_mutex);
- return (refcount == 0) ? 0 : -EBUSY;
-}
-
-int
-lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data)
-{
- int rc;
-
- switch (cmd) {
- case IOC_LIBCFS_CONFIGURE:
- return lnet_configure(NULL);
-
- case IOC_LIBCFS_UNCONFIGURE:
- return lnet_unconfigure();
-
- default:
- /* Passing LNET_PID_ANY only gives me a ref if the net is up
- * already; I'll need it to ensure the net can't go down while
- * I'm called into it */
- rc = LNetNIInit(LNET_PID_ANY);
- if (rc >= 0) {
- rc = LNetCtl(cmd, data);
- LNetNIFini();
- }
- return rc;
- }
-}
-
-DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl);
-
-int
-init_lnet(void)
-{
- int rc;
- ENTRY;
-
- init_mutex(&lnet_config_mutex);
-
- rc = LNetInit();
- if (rc != 0) {
- CERROR("LNetInit: error %d\n", rc);
- RETURN(rc);
- }
-
- rc = libcfs_register_ioctl(&lnet_ioctl_handler);
- LASSERT (rc == 0);
-
- if (config_on_load) {
- /* Have to schedule a separate thread to avoid deadlocking
- * in modload */
- (void) cfs_kernel_thread(lnet_configure, NULL, 0);
- }
-
- RETURN(0);
-}
-
-void
-fini_lnet(void)
-{
- int rc;
-
- rc = libcfs_deregister_ioctl(&lnet_ioctl_handler);
- LASSERT (rc == 0);
-
- LNetFini();
-}
-
-EXPORT_SYMBOL(lnet_register_lnd);
-EXPORT_SYMBOL(lnet_unregister_lnd);
-
-EXPORT_SYMBOL(LNetMEAttach);
-EXPORT_SYMBOL(LNetMEInsert);
-EXPORT_SYMBOL(LNetMEUnlink);
-EXPORT_SYMBOL(LNetEQAlloc);
-EXPORT_SYMBOL(LNetMDAttach);
-EXPORT_SYMBOL(LNetMDUnlink);
-EXPORT_SYMBOL(LNetNIInit);
-EXPORT_SYMBOL(LNetNIFini);
-EXPORT_SYMBOL(LNetInit);
-EXPORT_SYMBOL(LNetFini);
-EXPORT_SYMBOL(LNetSnprintHandle);
-EXPORT_SYMBOL(LNetPut);
-EXPORT_SYMBOL(LNetGet);
-EXPORT_SYMBOL(LNetEQWait);
-EXPORT_SYMBOL(LNetEQFree);
-EXPORT_SYMBOL(LNetEQGet);
-EXPORT_SYMBOL(LNetGetId);
-EXPORT_SYMBOL(LNetMDBind);
-EXPORT_SYMBOL(LNetDist);
-EXPORT_SYMBOL(LNetSetAsync);
-EXPORT_SYMBOL(LNetCtl);
-EXPORT_SYMBOL(LNetSetLazyPortal);
-EXPORT_SYMBOL(LNetClearLazyPortal);
-EXPORT_SYMBOL(the_lnet);
-EXPORT_SYMBOL(lnet_iov_nob);
-EXPORT_SYMBOL(lnet_extract_iov);
-EXPORT_SYMBOL(lnet_kiov_nob);
-EXPORT_SYMBOL(lnet_extract_kiov);
-EXPORT_SYMBOL(lnet_copy_iov2iov);
-EXPORT_SYMBOL(lnet_copy_iov2kiov);
-EXPORT_SYMBOL(lnet_copy_kiov2iov);
-EXPORT_SYMBOL(lnet_copy_kiov2kiov);
-EXPORT_SYMBOL(lnet_finalize);
-EXPORT_SYMBOL(lnet_parse);
-EXPORT_SYMBOL(lnet_create_reply_msg);
-EXPORT_SYMBOL(lnet_set_reply_msg_len);
-EXPORT_SYMBOL(lnet_msgtyp2str);
-EXPORT_SYMBOL(lnet_net2ni_locked);
-
-MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
-MODULE_DESCRIPTION("Portals v3.1");
-MODULE_LICENSE("GPL");
-
-cfs_module(lnet, "1.0.0", init_lnet, fini_lnet);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-move.c
- * Data movement routines
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <lnet/lib-lnet.h>
-
-int
-lnet_create_peer_table(void)
-{
- struct list_head *hash;
- int i;
-
- LASSERT (the_lnet.ln_peer_hash == NULL);
- LIBCFS_ALLOC(hash, LNET_PEER_HASHSIZE * sizeof(struct list_head));
-
- if (hash == NULL) {
- CERROR("Can't allocate peer hash table\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < LNET_PEER_HASHSIZE; i++)
- CFS_INIT_LIST_HEAD(&hash[i]);
-
- the_lnet.ln_peer_hash = hash;
- return 0;
-}
-
-void
-lnet_destroy_peer_table(void)
-{
- int i;
-
- if (the_lnet.ln_peer_hash == NULL)
- return;
-
- for (i = 0; i < LNET_PEER_HASHSIZE; i++)
- LASSERT (list_empty(&the_lnet.ln_peer_hash[i]));
-
- LIBCFS_FREE(the_lnet.ln_peer_hash,
- LNET_PEER_HASHSIZE * sizeof (struct list_head));
- the_lnet.ln_peer_hash = NULL;
-}
-
-void
-lnet_clear_peer_table(void)
-{
- int i;
-
- LASSERT (the_lnet.ln_shutdown); /* i.e. no new peers */
-
- for (i = 0; i < LNET_PEER_HASHSIZE; i++) {
- struct list_head *peers = &the_lnet.ln_peer_hash[i];
-
- LNET_LOCK();
- while (!list_empty(peers)) {
- lnet_peer_t *lp = list_entry(peers->next,
- lnet_peer_t, lp_hashlist);
-
- list_del(&lp->lp_hashlist);
- lnet_peer_decref_locked(lp); /* lose hash table's ref */
- }
- LNET_UNLOCK();
- }
-
- LNET_LOCK();
- for (i = 3; the_lnet.ln_npeers != 0;i++) {
- LNET_UNLOCK();
-
- if ((i & (i-1)) == 0)
- CDEBUG(D_WARNING,"Waiting for %d peers\n",
- the_lnet.ln_npeers);
- cfs_pause(cfs_time_seconds(1));
-
- LNET_LOCK();
- }
- LNET_UNLOCK();
-}
-
-void
-lnet_destroy_peer_locked (lnet_peer_t *lp)
-{
- lnet_ni_decref_locked(lp->lp_ni);
- LNET_UNLOCK();
-
- LASSERT (lp->lp_refcount == 0);
- LASSERT (lp->lp_rtr_refcount == 0);
- LASSERT (list_empty(&lp->lp_txq));
- LASSERT (lp->lp_txqnob == 0);
-
- LIBCFS_FREE(lp, sizeof(*lp));
-
- LNET_LOCK();
-
- LASSERT(the_lnet.ln_npeers > 0);
- the_lnet.ln_npeers--;
-}
-
-lnet_peer_t *
-lnet_find_peer_locked (lnet_nid_t nid)
-{
- unsigned int idx = LNET_NIDADDR(nid) % LNET_PEER_HASHSIZE;
- struct list_head *peers = &the_lnet.ln_peer_hash[idx];
- struct list_head *tmp;
- lnet_peer_t *lp;
-
- if (the_lnet.ln_shutdown)
- return NULL;
-
- list_for_each (tmp, peers) {
- lp = list_entry(tmp, lnet_peer_t, lp_hashlist);
-
- if (lp->lp_nid == nid) {
- lnet_peer_addref_locked(lp);
- return lp;
- }
- }
-
- return NULL;
-}
-
-int
-lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid)
-{
- lnet_peer_t *lp;
- lnet_peer_t *lp2;
-
- lp = lnet_find_peer_locked(nid);
- if (lp != NULL) {
- *lpp = lp;
- return 0;
- }
-
- LNET_UNLOCK();
-
- LIBCFS_ALLOC(lp, sizeof(*lp));
- if (lp == NULL) {
- *lpp = NULL;
- LNET_LOCK();
- return -ENOMEM;
- }
-
- memset(lp, 0, sizeof(*lp)); /* zero counters etc */
-
- CFS_INIT_LIST_HEAD(&lp->lp_txq);
- CFS_INIT_LIST_HEAD(&lp->lp_rtrq);
-
- lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
- lp->lp_notify = 0;
- lp->lp_notifylnd = 0;
- lp->lp_notifying = 0;
- lp->lp_alive_count = 0;
- lp->lp_timestamp = 0;
- lp->lp_ping_timestamp = 0;
- lp->lp_nid = nid;
- lp->lp_refcount = 2; /* 1 for caller; 1 for hash */
- lp->lp_rtr_refcount = 0;
-
- LNET_LOCK();
-
- lp2 = lnet_find_peer_locked(nid);
- if (lp2 != NULL) {
- LNET_UNLOCK();
- LIBCFS_FREE(lp, sizeof(*lp));
- LNET_LOCK();
-
- if (the_lnet.ln_shutdown) {
- lnet_peer_decref_locked(lp2);
- *lpp = NULL;
- return -ESHUTDOWN;
- }
-
- *lpp = lp2;
- return 0;
- }
-
- lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid));
- if (lp->lp_ni == NULL) {
- LNET_UNLOCK();
- LIBCFS_FREE(lp, sizeof(*lp));
- LNET_LOCK();
-
- *lpp = NULL;
- return the_lnet.ln_shutdown ? -ESHUTDOWN : -EHOSTUNREACH;
- }
-
- lp->lp_txcredits =
- lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
-
- /* As a first approximation; allow this peer the same number of router
- * buffers as it is allowed outstanding sends */
- lp->lp_rtrcredits = lp->lp_minrtrcredits = lp->lp_txcredits;
-
- LASSERT (!the_lnet.ln_shutdown);
- /* can't add peers after shutdown starts */
-
- list_add_tail(&lp->lp_hashlist, lnet_nid2peerhash(nid));
- the_lnet.ln_npeers++;
- the_lnet.ln_peertable_version++;
- *lpp = lp;
- return 0;
-}
-
-void
-lnet_debug_peer(lnet_nid_t nid)
-{
- int rc;
- lnet_peer_t *lp;
-
- LNET_LOCK();
-
- rc = lnet_nid2peer_locked(&lp, nid);
- if (rc != 0) {
- LNET_UNLOCK();
- CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
- return;
- }
-
- CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
- libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
- !lnet_isrouter(lp) ? "~rtr" : (lp->lp_alive ? "up" : "down"),
- lp->lp_ni->ni_peertxcredits,
- lp->lp_rtrcredits, lp->lp_minrtrcredits,
- lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
-
- lnet_peer_decref_locked(lp);
-
- LNET_UNLOCK();
-}
+++ /dev/null
-// !$*UTF8*$!
-{
- archiveVersion = 1;
- classes = {
- };
- objectVersion = 39;
- objects = {
- 06AA1262FFB20DD611CA28AA = {
- buildRules = (
- );
- buildSettings = {
- COPY_PHASE_STRIP = NO;
- GCC_DYNAMIC_NO_PIC = NO;
- GCC_ENABLE_FIX_AND_CONTINUE = YES;
- GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
- GCC_OPTIMIZATION_LEVEL = 0;
- OPTIMIZATION_CFLAGS = "-O0";
- ZERO_LINK = YES;
- };
- isa = PBXBuildStyle;
- name = Development;
- };
- 06AA1263FFB20DD611CA28AA = {
- buildRules = (
- );
- buildSettings = {
- COPY_PHASE_STRIP = YES;
- GCC_ENABLE_FIX_AND_CONTINUE = NO;
- ZERO_LINK = NO;
- };
- isa = PBXBuildStyle;
- name = Deployment;
- };
-//060
-//061
-//062
-//063
-//064
-//080
-//081
-//082
-//083
-//084
- 089C1669FE841209C02AAC07 = {
- buildSettings = {
- };
- buildStyles = (
- 06AA1262FFB20DD611CA28AA,
- 06AA1263FFB20DD611CA28AA,
- );
- hasScannedForEncodings = 1;
- isa = PBXProject;
- mainGroup = 089C166AFE841209C02AAC07;
- projectDirPath = "";
- targets = (
- 32A4FEB80562C75700D090E7,
- );
- };
- 089C166AFE841209C02AAC07 = {
- children = (
- 247142CAFF3F8F9811CA285C,
- 089C167CFE841241C02AAC07,
- 19C28FB6FE9D52B211CA2CBB,
- );
- isa = PBXGroup;
- name = portals;
- refType = 4;
- sourceTree = "<group>";
- };
- 089C167CFE841241C02AAC07 = {
- children = (
- 32A4FEC30562C75700D090E7,
- );
- isa = PBXGroup;
- name = Resources;
- refType = 4;
- sourceTree = "<group>";
- };
-//080
-//081
-//082
-//083
-//084
-//190
-//191
-//192
-//193
-//194
- 19A778270730EACD00846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = module.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778280730EACD00846375 = {
- fileRef = 19A778270730EACD00846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7782B0730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "api-errno.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A7782C0730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "api-ni.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A7782D0730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "api-wrap.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A7782E0730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-eq.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A7782F0730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-init.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778300730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-md.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778310730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-me.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778320730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-move.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778330730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-msg.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778340730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-ni.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778350730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-pid.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778360730EB8400846375 = {
- fileRef = 19A7782B0730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A778370730EB8400846375 = {
- fileRef = 19A7782C0730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A778380730EB8400846375 = {
- fileRef = 19A7782D0730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A778390730EB8400846375 = {
- fileRef = 19A7782E0730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783A0730EB8400846375 = {
- fileRef = 19A7782F0730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783B0730EB8400846375 = {
- fileRef = 19A778300730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783C0730EB8400846375 = {
- fileRef = 19A778310730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783D0730EB8400846375 = {
- fileRef = 19A778320730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783E0730EB8400846375 = {
- fileRef = 19A778330730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783F0730EB8400846375 = {
- fileRef = 19A778340730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A778400730EB8400846375 = {
- fileRef = 19A778350730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19C28FB6FE9D52B211CA2CBB = {
- children = (
- 32A4FEC40562C75800D090E7,
- );
- isa = PBXGroup;
- name = Products;
- refType = 4;
- sourceTree = "<group>";
- };
-//190
-//191
-//192
-//193
-//194
-//240
-//241
-//242
-//243
-//244
- 247142CAFF3F8F9811CA285C = {
- children = (
- 19A7782B0730EB8400846375,
- 19A7782C0730EB8400846375,
- 19A7782D0730EB8400846375,
- 19A7782E0730EB8400846375,
- 19A7782F0730EB8400846375,
- 19A778300730EB8400846375,
- 19A778310730EB8400846375,
- 19A778320730EB8400846375,
- 19A778330730EB8400846375,
- 19A778340730EB8400846375,
- 19A778350730EB8400846375,
- 19A778270730EACD00846375,
- );
- isa = PBXGroup;
- name = Source;
- path = "";
- refType = 4;
- sourceTree = "<group>";
- };
-//240
-//241
-//242
-//243
-//244
-//320
-//321
-//322
-//323
-//324
- 32A4FEB80562C75700D090E7 = {
- buildPhases = (
- 32A4FEB90562C75700D090E7,
- 32A4FEBA0562C75700D090E7,
- 32A4FEBB0562C75700D090E7,
- 32A4FEBD0562C75700D090E7,
- 32A4FEBF0562C75700D090E7,
- 32A4FEC00562C75700D090E7,
- 32A4FEC10562C75700D090E7,
- );
- buildRules = (
- );
- buildSettings = {
- FRAMEWORK_SEARCH_PATHS = "";
- GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO;
- GCC_WARN_UNKNOWN_PRAGMAS = NO;
- HEADER_SEARCH_PATHS = ../include;
- INFOPLIST_FILE = Info.plist;
- INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
- LIBRARY_SEARCH_PATHS = "";
- MODULE_NAME = com.clusterfs.lustre.portals.portals.portals;
- MODULE_START = portals_start;
- MODULE_STOP = portals_stop;
- MODULE_VERSION = 1.0.1;
- OTHER_CFLAGS = "-D__KERNEL__";
- OTHER_LDFLAGS = "";
- OTHER_REZFLAGS = "";
- PRODUCT_NAME = portals;
- SECTORDER_FLAGS = "";
- WARNING_CFLAGS = "-Wmost";
- WRAPPER_EXTENSION = kext;
- };
- dependencies = (
- );
- isa = PBXNativeTarget;
- name = portals;
- productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions";
- productName = portals;
- productReference = 32A4FEC40562C75800D090E7;
- productType = "com.apple.product-type.kernel-extension";
- };
- 32A4FEB90562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXShellScriptBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- shellPath = /bin/sh;
- shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
- };
- 32A4FEBA0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXHeadersBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBB0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXResourcesBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBD0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- 19A778280730EACD00846375,
- 19A778360730EB8400846375,
- 19A778370730EB8400846375,
- 19A778380730EB8400846375,
- 19A778390730EB8400846375,
- 19A7783A0730EB8400846375,
- 19A7783B0730EB8400846375,
- 19A7783C0730EB8400846375,
- 19A7783D0730EB8400846375,
- 19A7783E0730EB8400846375,
- 19A7783F0730EB8400846375,
- 19A778400730EB8400846375,
- );
- isa = PBXSourcesBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBF0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXFrameworksBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEC00562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXRezBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEC10562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXShellScriptBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- shellPath = /bin/sh;
- shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
- };
- 32A4FEC30562C75700D090E7 = {
- isa = PBXFileReference;
- lastKnownFileType = text.plist.xml;
- path = Info.plist;
- refType = 4;
- sourceTree = "<group>";
- };
- 32A4FEC40562C75800D090E7 = {
- explicitFileType = wrapper.cfbundle;
- includeInIndex = 0;
- isa = PBXFileReference;
- path = portals.kext;
- refType = 3;
- sourceTree = BUILT_PRODUCTS_DIR;
- };
- };
- rootObject = 089C1669FE841209C02AAC07;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- *
- * This file is part of Portals
- * http://sourceforge.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <lnet/lib-lnet.h>
-
-#if defined(__KERNEL__) && defined(LNET_ROUTER)
-
-static char *forwarding = "";
-CFS_MODULE_PARM(forwarding, "s", charp, 0444,
- "Explicitly enable/disable forwarding between networks");
-
-static int tiny_router_buffers = 1024;
-CFS_MODULE_PARM(tiny_router_buffers, "i", int, 0444,
- "# of 0 payload messages to buffer in the router");
-static int small_router_buffers = 8192;
-CFS_MODULE_PARM(small_router_buffers, "i", int, 0444,
- "# of small (1 page) messages to buffer in the router");
-static int large_router_buffers = 512;
-CFS_MODULE_PARM(large_router_buffers, "i", int, 0444,
- "# of large messages to buffer in the router");
-
-static int auto_down = 1;
-CFS_MODULE_PARM(auto_down, "i", int, 0444,
- "Automatically mark peers down on comms error");
-
-static int check_routers_before_use = 0;
-CFS_MODULE_PARM(check_routers_before_use, "i", int, 0444,
- "Assume routers are down and ping them before use");
-
-static int dead_router_check_interval = 0;
-CFS_MODULE_PARM(dead_router_check_interval, "i", int, 0444,
- "Seconds between dead router health checks (<= 0 to disable)");
-
-static int live_router_check_interval = 0;
-CFS_MODULE_PARM(live_router_check_interval, "i", int, 0444,
- "Seconds between live router health checks (<= 0 to disable)");
-
-static int router_ping_timeout = 50;
-CFS_MODULE_PARM(router_ping_timeout, "i", int, 0444,
- "Seconds to wait for the reply to a router health query");
-
-int
-lnet_peers_start_down(void)
-{
- return check_routers_before_use;
-}
-
-void
-lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, time_t when)
-{
- if (when < lp->lp_timestamp) { /* out of date information */
- CDEBUG(D_NET, "Out of date\n");
- return;
- }
-
- lp->lp_timestamp = when; /* update timestamp */
- lp->lp_ping_deadline = 0; /* disable ping timeout */
-
- if (lp->lp_alive_count != 0 && /* got old news */
- (!lp->lp_alive) == (!alive)) { /* new date for old news */
- CDEBUG(D_NET, "Old news\n");
- return;
- }
-
- /* Flag that notification is outstanding */
-
- lp->lp_alive_count++;
- lp->lp_alive = !(!alive); /* 1 bit! */
- lp->lp_notify = 1;
- lp->lp_notifylnd |= notifylnd;
-
- CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
-}
-
-void
-lnet_do_notify (lnet_peer_t *lp)
-{
- lnet_ni_t *ni = lp->lp_ni;
- int alive;
- int notifylnd;
-
- LNET_LOCK();
-
- /* Notify only in 1 thread at any time to ensure ordered notification.
- * NB individual events can be missed; the only guarantee is that you
- * always get the most recent news */
-
- if (lp->lp_notifying) {
- LNET_UNLOCK();
- return;
- }
-
- lp->lp_notifying = 1;
-
- while (lp->lp_notify) {
- alive = lp->lp_alive;
- notifylnd = lp->lp_notifylnd;
-
- lp->lp_notifylnd = 0;
- lp->lp_notify = 0;
-
- if (notifylnd && ni->ni_lnd->lnd_notify != NULL) {
- LNET_UNLOCK();
-
- /* A new notification could happen now; I'll handle it
- * when control returns to me */
-
- (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive);
-
- LNET_LOCK();
- }
- }
-
- lp->lp_notifying = 0;
-
- LNET_UNLOCK();
-}
-
-int
-lnet_notify (lnet_ni_t *ni, lnet_nid_t nid, int alive, time_t when)
-{
- lnet_peer_t *lp = NULL;
- time_t now = cfs_time_current_sec();
-
- LASSERT (!in_interrupt ());
-
- CDEBUG (D_NET, "%s notifying %s: %s\n",
- (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid),
- libcfs_nid2str(nid),
- alive ? "up" : "down");
-
- if (ni != NULL &&
- LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) {
- CWARN ("Ignoring notification of %s %s by %s (different net)\n",
- libcfs_nid2str(nid), alive ? "birth" : "death",
- libcfs_nid2str(ni->ni_nid));
- return -EINVAL;
- }
-
- /* can't do predictions... */
- if (when > now) {
- CWARN ("Ignoring prediction from %s of %s %s "
- "%ld seconds in the future\n",
- (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid),
- libcfs_nid2str(nid), alive ? "up" : "down",
- when - now);
- return -EINVAL;
- }
-
- if (ni != NULL && !alive && /* LND telling me she's down */
- !auto_down) { /* auto-down disabled */
- CDEBUG(D_NET, "Auto-down disabled\n");
- return 0;
- }
-
- LNET_LOCK();
-
- lp = lnet_find_peer_locked(nid);
- if (lp == NULL) {
- /* nid not found */
- LNET_UNLOCK();
- CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
- return 0;
- }
-
- lnet_notify_locked(lp, ni == NULL, alive, when);
-
- LNET_UNLOCK();
-
- lnet_do_notify(lp);
-
- LNET_LOCK();
-
- lnet_peer_decref_locked(lp);
-
- LNET_UNLOCK();
- return 0;
-}
-EXPORT_SYMBOL(lnet_notify);
-
-#else
-
-int
-lnet_notify (lnet_ni_t *ni, lnet_nid_t nid, int alive, time_t when)
-{
- return -EOPNOTSUPP;
-}
-
-#endif
-
-static void
-lnet_rtr_addref_locked(lnet_peer_t *lp)
-{
- LASSERT (lp->lp_refcount > 0);
- LASSERT (lp->lp_rtr_refcount >= 0);
-
- lp->lp_rtr_refcount++;
- if (lp->lp_rtr_refcount == 1) {
- struct list_head *pos;
-
- /* a simple insertion sort */
- list_for_each_prev(pos, &the_lnet.ln_routers) {
- lnet_peer_t *rtr = list_entry(pos, lnet_peer_t,
- lp_rtr_list);
-
- if (rtr->lp_nid < lp->lp_nid)
- break;
- }
-
- list_add(&lp->lp_rtr_list, pos);
- /* addref for the_lnet.ln_routers */
- lnet_peer_addref_locked(lp);
- the_lnet.ln_routers_version++;
- }
-}
-
-static void
-lnet_rtr_decref_locked(lnet_peer_t *lp)
-{
- LASSERT (lp->lp_refcount > 0);
- LASSERT (lp->lp_rtr_refcount > 0);
-
- lp->lp_rtr_refcount--;
- if (lp->lp_rtr_refcount == 0) {
- list_del(&lp->lp_rtr_list);
- /* decref for the_lnet.ln_routers */
- lnet_peer_decref_locked(lp);
- the_lnet.ln_routers_version++;
- }
-}
-
-lnet_remotenet_t *
-lnet_find_net_locked (__u32 net)
-{
- lnet_remotenet_t *rnet;
- struct list_head *tmp;
-
- LASSERT (!the_lnet.ln_shutdown);
-
- list_for_each (tmp, &the_lnet.ln_remote_nets) {
- rnet = list_entry(tmp, lnet_remotenet_t, lrn_list);
-
- if (rnet->lrn_net == net)
- return rnet;
- }
- return NULL;
-}
-
-int
-lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
-{
- struct list_head zombies;
- struct list_head *e;
- lnet_remotenet_t *rnet;
- lnet_remotenet_t *rnet2;
- lnet_route_t *route;
- lnet_route_t *route2;
- lnet_ni_t *ni;
- int add_route;
- int rc;
-
- CDEBUG(D_NET, "Add route: net %s hops %u gw %s\n",
- libcfs_net2str(net), hops, libcfs_nid2str(gateway));
-
- if (gateway == LNET_NID_ANY ||
- LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND ||
- net == LNET_NIDNET(LNET_NID_ANY) ||
- LNET_NETTYP(net) == LOLND ||
- LNET_NIDNET(gateway) == net ||
- hops < 1 || hops > 255)
- return (-EINVAL);
-
- if (lnet_islocalnet(net)) /* it's a local network */
- return 0; /* ignore the route entry */
-
- /* Assume net, route, all new */
- LIBCFS_ALLOC(route, sizeof(*route));
- LIBCFS_ALLOC(rnet, sizeof(*rnet));
- if (route == NULL || rnet == NULL) {
- CERROR("Out of memory creating route %s %d %s\n",
- libcfs_net2str(net), hops, libcfs_nid2str(gateway));
- if (route != NULL)
- LIBCFS_FREE(route, sizeof(*route));
- if (rnet != NULL)
- LIBCFS_FREE(rnet, sizeof(*rnet));
- return -ENOMEM;
- }
-
- CFS_INIT_LIST_HEAD(&rnet->lrn_routes);
- rnet->lrn_net = net;
- rnet->lrn_hops = hops;
-
- LNET_LOCK();
-
- rc = lnet_nid2peer_locked(&route->lr_gateway, gateway);
- if (rc != 0) {
- LNET_UNLOCK();
-
- LIBCFS_FREE(route, sizeof(*route));
- LIBCFS_FREE(rnet, sizeof(*rnet));
-
- if (rc == -EHOSTUNREACH) /* gateway is not on a local net */
- return 0; /* ignore the route entry */
-
- CERROR("Error %d creating route %s %d %s\n", rc,
- libcfs_net2str(net), hops, libcfs_nid2str(gateway));
- return rc;
- }
-
- LASSERT (!the_lnet.ln_shutdown);
- CFS_INIT_LIST_HEAD(&zombies);
-
- rnet2 = lnet_find_net_locked(net);
- if (rnet2 == NULL) {
- /* new network */
- list_add_tail(&rnet->lrn_list, &the_lnet.ln_remote_nets);
- rnet2 = rnet;
- }
-
- if (hops > rnet2->lrn_hops) {
- /* New route is longer; ignore it */
- add_route = 0;
- } else if (hops < rnet2->lrn_hops) {
- /* new route supercedes all currently known routes to this
- * net */
- list_add(&zombies, &rnet2->lrn_routes);
- list_del_init(&rnet2->lrn_routes);
- add_route = 1;
- } else {
- add_route = 1;
- /* New route has the same hopcount as existing routes; search
- * for a duplicate route (it's a NOOP if it is) */
- list_for_each (e, &rnet2->lrn_routes) {
- route2 = list_entry(e, lnet_route_t, lr_list);
-
- if (route2->lr_gateway == route->lr_gateway) {
- add_route = 0;
- break;
- }
-
- /* our loopups must be true */
- LASSERT (route2->lr_gateway->lp_nid != gateway);
- }
- }
-
- if (add_route) {
- ni = route->lr_gateway->lp_ni;
- lnet_ni_addref_locked(ni);
-
- LASSERT (rc == 0);
- list_add_tail(&route->lr_list, &rnet2->lrn_routes);
- the_lnet.ln_remote_nets_version++;
-
- lnet_rtr_addref_locked(route->lr_gateway);
-
- LNET_UNLOCK();
-
- /* XXX Assume alive */
- if (ni->ni_lnd->lnd_notify != NULL)
- (ni->ni_lnd->lnd_notify)(ni, gateway, 1);
-
- lnet_ni_decref(ni);
- } else {
- lnet_peer_decref_locked(route->lr_gateway);
- LNET_UNLOCK();
- LIBCFS_FREE(route, sizeof(*route));
- }
-
- if (rnet != rnet2)
- LIBCFS_FREE(rnet, sizeof(*rnet));
-
- while (!list_empty(&zombies)) {
- route = list_entry(zombies.next, lnet_route_t, lr_list);
- list_del(&route->lr_list);
-
- LNET_LOCK();
- lnet_rtr_decref_locked(route->lr_gateway);
- lnet_peer_decref_locked(route->lr_gateway);
- LNET_UNLOCK();
- LIBCFS_FREE(route, sizeof(*route));
- }
-
- return rc;
-}
-
-int
-lnet_check_routes (void)
-{
- lnet_remotenet_t *rnet;
- lnet_route_t *route;
- lnet_route_t *route2;
- struct list_head *e1;
- struct list_head *e2;
-
- LNET_LOCK();
-
- list_for_each (e1, &the_lnet.ln_remote_nets) {
- rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
-
- route2 = NULL;
- list_for_each (e2, &rnet->lrn_routes) {
- route = list_entry(e2, lnet_route_t, lr_list);
-
- if (route2 == NULL)
- route2 = route;
- else if (route->lr_gateway->lp_ni !=
- route2->lr_gateway->lp_ni) {
- LNET_UNLOCK();
-
- CERROR("Routes to %s via %s and %s not supported\n",
- libcfs_net2str(rnet->lrn_net),
- libcfs_nid2str(route->lr_gateway->lp_nid),
- libcfs_nid2str(route2->lr_gateway->lp_nid));
- return -EINVAL;
- }
- }
- }
-
- LNET_UNLOCK();
- return 0;
-}
-
-int
-lnet_del_route (__u32 net, lnet_nid_t gw_nid)
-{
- lnet_remotenet_t *rnet;
- lnet_route_t *route;
- struct list_head *e1;
- struct list_head *e2;
- int rc = -ENOENT;
-
- CDEBUG(D_NET, "Del route: net %s : gw %s\n",
- libcfs_net2str(net), libcfs_nid2str(gw_nid));
-
- /* NB Caller may specify either all routes via the given gateway
- * or a specific route entry actual NIDs) */
-
- again:
- LNET_LOCK();
-
- list_for_each (e1, &the_lnet.ln_remote_nets) {
- rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
-
- if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
- net == rnet->lrn_net))
- continue;
-
- list_for_each (e2, &rnet->lrn_routes) {
- route = list_entry(e2, lnet_route_t, lr_list);
-
- if (!(gw_nid == LNET_NID_ANY ||
- gw_nid == route->lr_gateway->lp_nid))
- continue;
-
- list_del(&route->lr_list);
- the_lnet.ln_remote_nets_version++;
-
- if (list_empty(&rnet->lrn_routes))
- list_del(&rnet->lrn_list);
- else
- rnet = NULL;
-
- lnet_rtr_decref_locked(route->lr_gateway);
- lnet_peer_decref_locked(route->lr_gateway);
- LNET_UNLOCK();
-
- LIBCFS_FREE(route, sizeof (*route));
-
- if (rnet != NULL)
- LIBCFS_FREE(rnet, sizeof(*rnet));
-
- rc = 0;
- goto again;
- }
- }
-
- LNET_UNLOCK();
- return rc;
-}
-
-void
-lnet_destroy_routes (void)
-{
- lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
-}
-
-int
-lnet_get_route (int idx, __u32 *net, __u32 *hops,
- lnet_nid_t *gateway, __u32 *alive)
-{
- struct list_head *e1;
- struct list_head *e2;
- lnet_remotenet_t *rnet;
- lnet_route_t *route;
-
- LNET_LOCK();
-
- list_for_each (e1, &the_lnet.ln_remote_nets) {
- rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
-
- list_for_each (e2, &rnet->lrn_routes) {
- route = list_entry(e2, lnet_route_t, lr_list);
-
- if (idx-- == 0) {
- *net = rnet->lrn_net;
- *hops = rnet->lrn_hops;
- *gateway = route->lr_gateway->lp_nid;
- *alive = route->lr_gateway->lp_alive;
- LNET_UNLOCK();
- return 0;
- }
- }
- }
-
- LNET_UNLOCK();
- return -ENOENT;
-}
-
-#if defined(__KERNEL__) && defined(LNET_ROUTER)
-static void
-lnet_router_checker_event (lnet_event_t *event)
-{
- /* CAVEAT EMPTOR: I'm called with LNET_LOCKed and I'm not allowed to
- * drop it (that's how come I see _every_ event, even ones that would
- * overflow my EQ) */
- lnet_peer_t *lp;
- lnet_nid_t nid;
-
- if (event->unlinked) {
- /* The router checker thread has unlinked the rc_md
- * and exited. */
- LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_UNLINKING);
- the_lnet.ln_rc_state = LNET_RC_STATE_UNLINKED;
- mutex_up(&the_lnet.ln_rc_signal);
- return;
- }
-
- LASSERT (event->type == LNET_EVENT_SEND ||
- event->type == LNET_EVENT_REPLY);
-
- nid = (event->type == LNET_EVENT_SEND) ?
- event->target.nid : event->initiator.nid;
-
- lp = lnet_find_peer_locked(nid);
- if (lp == NULL) {
- /* router may have been removed */
- CDEBUG(D_NET, "Router %s not found\n", libcfs_nid2str(nid));
- return;
- }
-
- if (event->type == LNET_EVENT_SEND) /* re-enable another ping */
- lp->lp_ping_notsent = 0;
-
- if (lnet_isrouter(lp) && /* ignore if no longer a router */
- (event->status != 0 ||
- event->type == LNET_EVENT_REPLY)) {
-
- /* A successful REPLY means the router is up. If _any_ comms
- * to the router fail I assume it's down (this will happen if
- * we ping alive routers to try to detect router death before
- * apps get burned). */
-
- lnet_notify_locked(lp, 1, (event->status == 0),
- cfs_time_current_sec());
-
- /* The router checker will wake up very shortly and do the
- * actual notification.
- * XXX If 'lp' stops being a router before then, it will still
- * have the notification pending!!! */
- }
-
- /* This decref will NOT drop LNET_LOCK (it had to have 1 ref when it
- * was in the peer table and I've not dropped the lock, so no-one else
- * can have reduced the refcount) */
- LASSERT(lp->lp_refcount > 1);
-
- lnet_peer_decref_locked(lp);
-}
-
-static int
-lnet_router_checker(void *arg)
-{
- static lnet_ping_info_t pinginfo;
-
- int rc;
- lnet_handle_md_t mdh;
- lnet_peer_t *rtr;
- struct list_head *entry;
- time_t now;
- lnet_process_id_t rtr_id;
- int secs;
-
- cfs_daemonize("router_checker");
- cfs_block_allsigs();
-
- rtr_id.pid = LUSTRE_SRV_LNET_PID;
-
- LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- rc = LNetMDBind((lnet_md_t){.start = &pinginfo,
- .length = sizeof(pinginfo),
- .threshold = LNET_MD_THRESH_INF,
- .options = LNET_MD_TRUNCATE,
- .eq_handle = the_lnet.ln_rc_eqh},
- LNET_UNLINK,
- &mdh);
-
- if (rc < 0) {
- CERROR("Can't bind MD: %d\n", rc);
- the_lnet.ln_rc_state = rc;
- mutex_up(&the_lnet.ln_rc_signal);
- return rc;
- }
-
- LASSERT (rc == 0);
-
- the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
- mutex_up(&the_lnet.ln_rc_signal); /* let my parent go */
-
- while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
- __u64 version;
-
- LNET_LOCK();
-rescan:
- version = the_lnet.ln_routers_version;
-
- list_for_each (entry, &the_lnet.ln_routers) {
- rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
-
- lnet_peer_addref_locked(rtr);
-
- now = cfs_time_current_sec();
-
- if (rtr->lp_ping_deadline != 0 && /* ping timed out? */
- now > rtr->lp_ping_deadline)
- lnet_notify_locked(rtr, 1, 0, now);
-
- LNET_UNLOCK();
-
- /* Run any outstanding notificiations */
- lnet_do_notify(rtr);
-
- if (rtr->lp_alive) {
- secs = live_router_check_interval;
- } else {
- secs = dead_router_check_interval;
- }
- if (secs <= 0)
- secs = 0;
-
- if (secs != 0 &&
- !rtr->lp_ping_notsent &&
- now > rtr->lp_ping_timestamp + secs) {
- CDEBUG(D_NET, "Check: %s\n",
- libcfs_nid2str(rtr->lp_nid));
-
- LNET_LOCK();
- rtr_id.nid = rtr->lp_nid;
- rtr->lp_ping_notsent = 1;
- rtr->lp_ping_timestamp = now;
-
- if (rtr->lp_ping_deadline == 0)
- rtr->lp_ping_deadline =
- now + router_ping_timeout;
-
- LNET_UNLOCK();
-
- LNetGet(LNET_NID_ANY, mdh, rtr_id,
- LNET_RESERVED_PORTAL,
- LNET_PROTO_PING_MATCHBITS, 0);
- }
-
- LNET_LOCK();
- lnet_peer_decref_locked(rtr);
-
- if (version != the_lnet.ln_routers_version) {
- /* the routers list has changed */
- goto rescan;
- }
- }
-
- LNET_UNLOCK();
-
- /* Call cfs_pause() here always adds 1 to load average
- * because kernel counts # active tasks as nr_running
- * + nr_uninterruptible. */
- set_current_state(CFS_TASK_INTERRUPTIBLE);
- cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE,
- cfs_time_seconds(1));
- }
-
- LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_STOPTHREAD);
- the_lnet.ln_rc_state = LNET_RC_STATE_UNLINKING;
-
- rc = LNetMDUnlink(mdh);
- LASSERT (rc == 0);
-
- /* The unlink event callback will signal final completion */
- return 0;
-}
-
-
-void
-lnet_wait_known_routerstate(void)
-{
- lnet_peer_t *rtr;
- struct list_head *entry;
- int all_known;
-
- for (;;) {
- LNET_LOCK();
-
- all_known = 1;
- list_for_each (entry, &the_lnet.ln_routers) {
- rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
-
- if (rtr->lp_alive_count == 0) {
- all_known = 0;
- break;
- }
- }
-
- LNET_UNLOCK();
-
- if (all_known)
- return;
-
- cfs_pause(cfs_time_seconds(1));
- }
-}
-
-void
-lnet_router_checker_stop(void)
-{
- int rc;
-
- LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING ||
- the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN)
- return;
-
- the_lnet.ln_rc_state = LNET_RC_STATE_STOPTHREAD;
- /* block until event callback signals exit */
- mutex_down(&the_lnet.ln_rc_signal);
-
- LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_UNLINKED);
-
- rc = LNetEQFree(the_lnet.ln_rc_eqh);
- LASSERT (rc == 0);
-
- the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
-}
-
-int
-lnet_router_checker_start(void)
-{
- int rc;
-
- LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- if (check_routers_before_use &&
- dead_router_check_interval <= 0) {
- LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be"
- " set if 'check_routers_before_use' is set"
- "\n");
- return -EINVAL;
- }
-
- if (live_router_check_interval <= 0 &&
- dead_router_check_interval <= 0)
- return 0;
-
- init_mutex_locked(&the_lnet.ln_rc_signal);
-
- /* EQ size doesn't matter; the callback is guaranteed to get every
- * event */
- rc = LNetEQAlloc(1, lnet_router_checker_event,
- &the_lnet.ln_rc_eqh);
- if (rc != 0) {
- CERROR("Can't allocate EQ: %d\n", rc);
- return -ENOMEM;
- }
-
- rc = (int)cfs_kernel_thread(lnet_router_checker, NULL, 0);
- if (rc < 0) {
- CERROR("Can't start router checker thread: %d\n", rc);
- goto failed;
- }
-
- mutex_down(&the_lnet.ln_rc_signal); /* wait for checker to startup */
-
- rc = the_lnet.ln_rc_state;
- if (rc < 0) {
- the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
- goto failed;
- }
-
- LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
-
- if (check_routers_before_use) {
- /* Note that a helpful side-effect of pinging all known routers
- * at startup is that it makes them drop stale connections they
- * may have to a previous instance of me. */
- lnet_wait_known_routerstate();
- }
-
- return 0;
-
- failed:
- rc = LNetEQFree(the_lnet.ln_rc_eqh);
- LASSERT (rc == 0);
- return rc;
-}
-
-void
-lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages)
-{
- int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
-
- while (--npages >= 0)
- cfs_free_page(rb->rb_kiov[npages].kiov_page);
-
- LIBCFS_FREE(rb, sz);
-}
-
-lnet_rtrbuf_t *
-lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp)
-{
- int npages = rbp->rbp_npages;
- int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
- struct page *page;
- lnet_rtrbuf_t *rb;
- int i;
-
- LIBCFS_ALLOC(rb, sz);
- if (rb == NULL)
- return NULL;
-
- rb->rb_pool = rbp;
-
- for (i = 0; i < npages; i++) {
- page = cfs_alloc_page(CFS_ALLOC_ZERO | CFS_ALLOC_STD);
- if (page == NULL) {
- while (--i >= 0)
- cfs_free_page(rb->rb_kiov[i].kiov_page);
-
- LIBCFS_FREE(rb, sz);
- return NULL;
- }
-
- rb->rb_kiov[i].kiov_len = CFS_PAGE_SIZE;
- rb->rb_kiov[i].kiov_offset = 0;
- rb->rb_kiov[i].kiov_page = page;
- }
-
- return rb;
-}
-
-void
-lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp)
-{
- int npages = rbp->rbp_npages;
- int nbuffers = 0;
- lnet_rtrbuf_t *rb;
-
- LASSERT (list_empty(&rbp->rbp_msgs));
- LASSERT (rbp->rbp_credits == rbp->rbp_nbuffers);
-
- while (!list_empty(&rbp->rbp_bufs)) {
- LASSERT (rbp->rbp_credits > 0);
-
- rb = list_entry(rbp->rbp_bufs.next,
- lnet_rtrbuf_t, rb_list);
- list_del(&rb->rb_list);
- lnet_destroy_rtrbuf(rb, npages);
- nbuffers++;
- }
-
- LASSERT (rbp->rbp_nbuffers == nbuffers);
- LASSERT (rbp->rbp_credits == nbuffers);
-
- rbp->rbp_nbuffers = rbp->rbp_credits = 0;
-}
-
-int
-lnet_rtrpool_alloc_bufs(lnet_rtrbufpool_t *rbp, int nbufs)
-{
- lnet_rtrbuf_t *rb;
- int i;
-
- if (rbp->rbp_nbuffers != 0) {
- LASSERT (rbp->rbp_nbuffers == nbufs);
- return 0;
- }
-
- for (i = 0; i < nbufs; i++) {
- rb = lnet_new_rtrbuf(rbp);
-
- if (rb == NULL) {
- CERROR("Failed to allocate %d router bufs of %d pages\n",
- nbufs, rbp->rbp_npages);
- return -ENOMEM;
- }
-
- rbp->rbp_nbuffers++;
- rbp->rbp_credits++;
- rbp->rbp_mincredits++;
- list_add(&rb->rb_list, &rbp->rbp_bufs);
-
- /* No allocation "under fire" */
- /* Otherwise we'd need code to schedule blocked msgs etc */
- LASSERT (!the_lnet.ln_routing);
- }
-
- LASSERT (rbp->rbp_credits == nbufs);
- return 0;
-}
-
-void
-lnet_rtrpool_init(lnet_rtrbufpool_t *rbp, int npages)
-{
- CFS_INIT_LIST_HEAD(&rbp->rbp_msgs);
- CFS_INIT_LIST_HEAD(&rbp->rbp_bufs);
-
- rbp->rbp_npages = npages;
- rbp->rbp_credits = 0;
- rbp->rbp_mincredits = 0;
-}
-
-void
-lnet_free_rtrpools(void)
-{
- lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[0]);
- lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[1]);
- lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[2]);
-}
-
-void
-lnet_init_rtrpools(void)
-{
- int small_pages = 1;
- int large_pages = (LNET_MTU + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-
- lnet_rtrpool_init(&the_lnet.ln_rtrpools[0], 0);
- lnet_rtrpool_init(&the_lnet.ln_rtrpools[1], small_pages);
- lnet_rtrpool_init(&the_lnet.ln_rtrpools[2], large_pages);
-}
-
-
-int
-lnet_alloc_rtrpools(int im_a_router)
-{
- int rc;
-
- if (!strcmp(forwarding, "")) {
- /* not set either way */
- if (!im_a_router)
- return 0;
- } else if (!strcmp(forwarding, "disabled")) {
- /* explicitly disabled */
- return 0;
- } else if (!strcmp(forwarding, "enabled")) {
- /* explicitly enabled */
- } else {
- LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either "
- "'enabled' or 'disabled'\n");
- return -EINVAL;
- }
-
- if (tiny_router_buffers <= 0) {
- LCONSOLE_ERROR_MSG(0x10c, "tiny_router_buffers=%d invalid when "
- "routing enabled\n", tiny_router_buffers);
- rc = -EINVAL;
- goto failed;
- }
-
- rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[0],
- tiny_router_buffers);
- if (rc != 0)
- goto failed;
-
- if (small_router_buffers <= 0) {
- LCONSOLE_ERROR_MSG(0x10d, "small_router_buffers=%d invalid when"
- " routing enabled\n", small_router_buffers);
- rc = -EINVAL;
- goto failed;
- }
-
- rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[1],
- small_router_buffers);
- if (rc != 0)
- goto failed;
-
- if (large_router_buffers <= 0) {
- LCONSOLE_ERROR_MSG(0x10e, "large_router_buffers=%d invalid when"
- " routing enabled\n", large_router_buffers);
- rc = -EINVAL;
- goto failed;
- }
-
- rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[2],
- large_router_buffers);
- if (rc != 0)
- goto failed;
-
- LNET_LOCK();
- the_lnet.ln_routing = 1;
- LNET_UNLOCK();
-
- return 0;
-
- failed:
- lnet_free_rtrpools();
- return rc;
-}
-
-#else
-
-int
-lnet_peers_start_down(void)
-{
- return 0;
-}
-
-void
-lnet_router_checker_stop(void)
-{
- return;
-}
-
-int
-lnet_router_checker_start(void)
-{
- return 0;
-}
-
-void
-lnet_free_rtrpools (void)
-{
-}
-
-void
-lnet_init_rtrpools (void)
-{
-}
-
-int
-lnet_alloc_rtrpools (int im_a_arouter)
-{
- return 0;
-}
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- *
- * This file is part of Portals
- * http://sourceforge.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <libcfs/libcfs.h>
-#include <lnet/lib-lnet.h>
-
-#if defined(__KERNEL__) && defined(LNET_ROUTER)
-
-#include <linux/seq_file.h>
-#include <linux/lustre_compat25.h>
-
-/* this is really lnet_proc.c */
-
-#define LNET_PROC_STATS "sys/lnet/stats"
-#define LNET_PROC_ROUTES "sys/lnet/routes"
-#define LNET_PROC_ROUTERS "sys/lnet/routers"
-#define LNET_PROC_PEERS "sys/lnet/peers"
-#define LNET_PROC_BUFFERS "sys/lnet/buffers"
-#define LNET_PROC_NIS "sys/lnet/nis"
-
-static int
-lnet_router_proc_stats_read (char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- lnet_counters_t *ctrs;
- int rc;
-
- *start = page;
- *eof = 1;
- if (off != 0)
- return 0;
-
- LIBCFS_ALLOC(ctrs, sizeof(*ctrs));
- if (ctrs == NULL)
- return -ENOMEM;
-
- LNET_LOCK();
- *ctrs = the_lnet.ln_counters;
- LNET_UNLOCK();
-
- rc = sprintf(page,
- "%u %u %u %u %u %u %u "LPU64" "LPU64" "LPU64" "LPU64"\n",
- ctrs->msgs_alloc, ctrs->msgs_max,
- ctrs->errors,
- ctrs->send_count, ctrs->recv_count,
- ctrs->route_count, ctrs->drop_count,
- ctrs->send_length, ctrs->recv_length,
- ctrs->route_length, ctrs->drop_length);
-
- LIBCFS_FREE(ctrs, sizeof(*ctrs));
- return rc;
-}
-
-static int
-lnet_router_proc_stats_write(struct file *file, const char *ubuffer,
- unsigned long count, void *data)
-{
- LNET_LOCK();
- memset(&the_lnet.ln_counters, 0, sizeof(the_lnet.ln_counters));
- LNET_UNLOCK();
-
- return (count);
-}
-
-typedef struct {
- __u64 lrsi_version;
- lnet_remotenet_t *lrsi_net;
- lnet_route_t *lrsi_route;
- loff_t lrsi_off;
-} lnet_route_seq_iterator_t;
-
-int
-lnet_route_seq_seek (lnet_route_seq_iterator_t *lrsi, loff_t off)
-{
- struct list_head *n;
- struct list_head *r;
- int rc;
- loff_t here;
-
- if (off == 0) {
- lrsi->lrsi_net = NULL;
- lrsi->lrsi_route = NULL;
- lrsi->lrsi_off = 0;
- return 0;
- }
-
- LNET_LOCK();
-
- if (lrsi->lrsi_net != NULL &&
- lrsi->lrsi_version != the_lnet.ln_remote_nets_version) {
- /* tables have changed */
- rc = -ESTALE;
- goto out;
- }
-
- if (lrsi->lrsi_net == NULL || lrsi->lrsi_off > off) {
- /* search from start */
- n = the_lnet.ln_remote_nets.next;
- r = NULL;
- here = 1;
- } else {
- /* continue search */
- n = &lrsi->lrsi_net->lrn_list;
- r = &lrsi->lrsi_route->lr_list;
- here = lrsi->lrsi_off;
- }
-
- lrsi->lrsi_version = the_lnet.ln_remote_nets_version;
- lrsi->lrsi_off = off;
-
- while (n != &the_lnet.ln_remote_nets) {
- lnet_remotenet_t *rnet =
- list_entry(n, lnet_remotenet_t, lrn_list);
-
- if (r == NULL)
- r = rnet->lrn_routes.next;
-
- while (r != &rnet->lrn_routes) {
- lnet_route_t *re =
- list_entry(r, lnet_route_t,
- lr_list);
-
- if (here == off) {
- lrsi->lrsi_net = rnet;
- lrsi->lrsi_route = re;
- rc = 0;
- goto out;
- }
-
- r = r->next;
- here++;
- }
-
- r = NULL;
- n = n->next;
- }
-
- lrsi->lrsi_net = NULL;
- lrsi->lrsi_route = NULL;
- rc = -ENOENT;
- out:
- LNET_UNLOCK();
- return rc;
-}
-
-static void *
-lnet_route_seq_start (struct seq_file *s, loff_t *pos)
-{
- lnet_route_seq_iterator_t *lrsi;
- int rc;
-
- LIBCFS_ALLOC(lrsi, sizeof(*lrsi));
- if (lrsi == NULL)
- return NULL;
-
- lrsi->lrsi_net = NULL;
- rc = lnet_route_seq_seek(lrsi, *pos);
- if (rc == 0)
- return lrsi;
-
- LIBCFS_FREE(lrsi, sizeof(*lrsi));
- return NULL;
-}
-
-static void
-lnet_route_seq_stop (struct seq_file *s, void *iter)
-{
- lnet_route_seq_iterator_t *lrsi = iter;
-
- if (lrsi != NULL)
- LIBCFS_FREE(lrsi, sizeof(*lrsi));
-}
-
-static void *
-lnet_route_seq_next (struct seq_file *s, void *iter, loff_t *pos)
-{
- lnet_route_seq_iterator_t *lrsi = iter;
- int rc;
- loff_t next = *pos + 1;
-
- rc = lnet_route_seq_seek(lrsi, next);
- if (rc != 0) {
- LIBCFS_FREE(lrsi, sizeof(*lrsi));
- return NULL;
- }
-
- *pos = next;
- return lrsi;
-}
-
-static int
-lnet_route_seq_show (struct seq_file *s, void *iter)
-{
- lnet_route_seq_iterator_t *lrsi = iter;
- __u32 net;
- unsigned int hops;
- lnet_nid_t nid;
- int alive;
-
- if (lrsi->lrsi_off == 0) {
- seq_printf(s, "Routing %s\n",
- the_lnet.ln_routing ? "enabled" : "disabled");
- seq_printf(s, "%-8s %4s %7s %s\n",
- "net", "hops", "state", "router");
- return 0;
- }
-
- LASSERT (lrsi->lrsi_net != NULL);
- LASSERT (lrsi->lrsi_route != NULL);
-
- LNET_LOCK();
-
- if (lrsi->lrsi_version != the_lnet.ln_remote_nets_version) {
- LNET_UNLOCK();
- return -ESTALE;
- }
-
- net = lrsi->lrsi_net->lrn_net;
- hops = lrsi->lrsi_net->lrn_hops;
- nid = lrsi->lrsi_route->lr_gateway->lp_nid;
- alive = lrsi->lrsi_route->lr_gateway->lp_alive;
-
- LNET_UNLOCK();
-
- seq_printf(s, "%-8s %4u %7s %s\n", libcfs_net2str(net), hops,
- alive ? "up" : "down", libcfs_nid2str(nid));
- return 0;
-}
-
-static struct seq_operations lnet_routes_sops = {
- .start = lnet_route_seq_start,
- .stop = lnet_route_seq_stop,
- .next = lnet_route_seq_next,
- .show = lnet_route_seq_show,
-};
-
-static int
-lnet_route_seq_open(struct inode *inode, struct file *file)
-{
- struct proc_dir_entry *dp = PDE(inode);
- struct seq_file *sf;
- int rc;
-
- rc = seq_open(file, &lnet_routes_sops);
- if (rc == 0) {
- sf = file->private_data;
- sf->private = dp->data;
- }
-
- return rc;
-}
-
-static struct file_operations lnet_routes_fops = {
- .owner = THIS_MODULE,
- .open = lnet_route_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-typedef struct {
- __u64 lrtrsi_version;
- lnet_peer_t *lrtrsi_router;
- loff_t lrtrsi_off;
-} lnet_router_seq_iterator_t;
-
-int
-lnet_router_seq_seek (lnet_router_seq_iterator_t *lrtrsi, loff_t off)
-{
- struct list_head *r;
- lnet_peer_t *lp;
- int rc;
- loff_t here;
-
- if (off == 0) {
- lrtrsi->lrtrsi_router = NULL;
- lrtrsi->lrtrsi_off = 0;
- return 0;
- }
-
- LNET_LOCK();
-
- lp = lrtrsi->lrtrsi_router;
-
- if (lp != NULL &&
- lrtrsi->lrtrsi_version != the_lnet.ln_routers_version) {
- /* tables have changed */
- rc = -ESTALE;
- goto out;
- }
-
- if (lp == NULL || lrtrsi->lrtrsi_off > off) {
- /* search from start */
- r = the_lnet.ln_routers.next;
- here = 1;
- } else {
- /* continue search */
- r = &lp->lp_rtr_list;
- here = lrtrsi->lrtrsi_off;
- }
-
- lrtrsi->lrtrsi_version = the_lnet.ln_routers_version;
- lrtrsi->lrtrsi_off = off;
-
- while (r != &the_lnet.ln_routers) {
- lnet_peer_t *rtr = list_entry(r,
- lnet_peer_t,
- lp_rtr_list);
-
- if (here == off) {
- lrtrsi->lrtrsi_router = rtr;
- rc = 0;
- goto out;
- }
-
- r = r->next;
- here++;
- }
-
- lrtrsi->lrtrsi_router = NULL;
- rc = -ENOENT;
- out:
- LNET_UNLOCK();
- return rc;
-}
-
-static void *
-lnet_router_seq_start (struct seq_file *s, loff_t *pos)
-{
- lnet_router_seq_iterator_t *lrtrsi;
- int rc;
-
- LIBCFS_ALLOC(lrtrsi, sizeof(*lrtrsi));
- if (lrtrsi == NULL)
- return NULL;
-
- lrtrsi->lrtrsi_router = NULL;
- rc = lnet_router_seq_seek(lrtrsi, *pos);
- if (rc == 0)
- return lrtrsi;
-
- LIBCFS_FREE(lrtrsi, sizeof(*lrtrsi));
- return NULL;
-}
-
-static void
-lnet_router_seq_stop (struct seq_file *s, void *iter)
-{
- lnet_router_seq_iterator_t *lrtrsi = iter;
-
- if (lrtrsi != NULL)
- LIBCFS_FREE(lrtrsi, sizeof(*lrtrsi));
-}
-
-static void *
-lnet_router_seq_next (struct seq_file *s, void *iter, loff_t *pos)
-{
- lnet_router_seq_iterator_t *lrtrsi = iter;
- int rc;
- loff_t next = *pos + 1;
-
- rc = lnet_router_seq_seek(lrtrsi, next);
- if (rc != 0) {
- LIBCFS_FREE(lrtrsi, sizeof(*lrtrsi));
- return NULL;
- }
-
- *pos = next;
- return lrtrsi;
-}
-
-static int
-lnet_router_seq_show (struct seq_file *s, void *iter)
-{
- lnet_router_seq_iterator_t *lrtrsi = iter;
- lnet_peer_t *lp;
- lnet_nid_t nid;
- int alive;
- int alive_cnt;
- int nrefs;
- int nrtrrefs;
- time_t last_ping;
-
- if (lrtrsi->lrtrsi_off == 0) {
- seq_printf(s, "%-4s %7s %9s %6s %12s %s\n",
- "ref", "rtr_ref", "alive_cnt", "state", "last_ping", "router");
- return 0;
- }
-
- lp = lrtrsi->lrtrsi_router;
- LASSERT (lp != NULL);
-
- LNET_LOCK();
-
- if (lrtrsi->lrtrsi_version != the_lnet.ln_routers_version) {
- LNET_UNLOCK();
- return -ESTALE;
- }
-
- nid = lp->lp_nid;
- alive = lp->lp_alive;
- alive_cnt = lp->lp_alive_count;
- nrefs = lp->lp_refcount;
- nrtrrefs = lp->lp_rtr_refcount;
- last_ping = lp->lp_ping_timestamp;
-
- LNET_UNLOCK();
-
- seq_printf(s,
- "%-4d %7d %9d %6s %12lu %s\n", nrefs, nrtrrefs,
- alive_cnt, alive ? "up" : "down",
- last_ping, libcfs_nid2str(nid));
- return 0;
-}
-
-static struct seq_operations lnet_routers_sops = {
- .start = lnet_router_seq_start,
- .stop = lnet_router_seq_stop,
- .next = lnet_router_seq_next,
- .show = lnet_router_seq_show,
-};
-
-static int
-lnet_router_seq_open(struct inode *inode, struct file *file)
-{
- struct proc_dir_entry *dp = PDE(inode);
- struct seq_file *sf;
- int rc;
-
- rc = seq_open(file, &lnet_routers_sops);
- if (rc == 0) {
- sf = file->private_data;
- sf->private = dp->data;
- }
-
- return rc;
-}
-
-static struct file_operations lnet_routers_fops = {
- .owner = THIS_MODULE,
- .open = lnet_router_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-typedef struct {
- unsigned long long lpsi_version;
- int lpsi_idx;
- lnet_peer_t *lpsi_peer;
- loff_t lpsi_off;
-} lnet_peer_seq_iterator_t;
-
-int
-lnet_peer_seq_seek (lnet_peer_seq_iterator_t *lpsi, loff_t off)
-{
- int idx;
- struct list_head *p;
- loff_t here;
- int rc;
-
- if (off == 0) {
- lpsi->lpsi_idx = 0;
- lpsi->lpsi_peer = NULL;
- lpsi->lpsi_off = 0;
- return 0;
- }
-
- LNET_LOCK();
-
- if (lpsi->lpsi_peer != NULL &&
- lpsi->lpsi_version != the_lnet.ln_peertable_version) {
- /* tables have changed */
- rc = -ESTALE;
- goto out;
- }
-
- if (lpsi->lpsi_peer == NULL ||
- lpsi->lpsi_off > off) {
- /* search from start */
- idx = 0;
- p = NULL;
- here = 1;
- } else {
- /* continue search */
- idx = lpsi->lpsi_idx;
- p = &lpsi->lpsi_peer->lp_hashlist;
- here = lpsi->lpsi_off;
- }
-
- lpsi->lpsi_version = the_lnet.ln_peertable_version;
- lpsi->lpsi_off = off;
-
- while (idx < LNET_PEER_HASHSIZE) {
- if (p == NULL)
- p = the_lnet.ln_peer_hash[idx].next;
-
- while (p != &the_lnet.ln_peer_hash[idx]) {
- lnet_peer_t *lp = list_entry(p, lnet_peer_t,
- lp_hashlist);
-
- if (here == off) {
- lpsi->lpsi_idx = idx;
- lpsi->lpsi_peer = lp;
- rc = 0;
- goto out;
- }
-
- here++;
- p = lp->lp_hashlist.next;
- }
-
- p = NULL;
- idx++;
- }
-
- lpsi->lpsi_idx = 0;
- lpsi->lpsi_peer = NULL;
- rc = -ENOENT;
- out:
- LNET_UNLOCK();
- return rc;
-}
-
-static void *
-lnet_peer_seq_start (struct seq_file *s, loff_t *pos)
-{
- lnet_peer_seq_iterator_t *lpsi;
- int rc;
-
- LIBCFS_ALLOC(lpsi, sizeof(*lpsi));
- if (lpsi == NULL)
- return NULL;
-
- lpsi->lpsi_idx = 0;
- lpsi->lpsi_peer = NULL;
- rc = lnet_peer_seq_seek(lpsi, *pos);
- if (rc == 0)
- return lpsi;
-
- LIBCFS_FREE(lpsi, sizeof(*lpsi));
- return NULL;
-}
-
-static void
-lnet_peer_seq_stop (struct seq_file *s, void *iter)
-{
- lnet_peer_seq_iterator_t *lpsi = iter;
-
- if (lpsi != NULL)
- LIBCFS_FREE(lpsi, sizeof(*lpsi));
-}
-
-static void *
-lnet_peer_seq_next (struct seq_file *s, void *iter, loff_t *pos)
-{
- lnet_peer_seq_iterator_t *lpsi = iter;
- int rc;
- loff_t next = *pos + 1;
-
- rc = lnet_peer_seq_seek(lpsi, next);
- if (rc != 0) {
- LIBCFS_FREE(lpsi, sizeof(*lpsi));
- return NULL;
- }
-
- *pos = next;
- return lpsi;
-}
-
-static int
-lnet_peer_seq_show (struct seq_file *s, void *iter)
-{
- lnet_peer_seq_iterator_t *lpsi = iter;
- lnet_peer_t *lp;
- lnet_nid_t nid;
- int maxcr;
- int mintxcr;
- int txcr;
- int minrtrcr;
- int rtrcr;
- int alive;
- int rtr;
- int txqnob;
- int nrefs;
-
- if (lpsi->lpsi_off == 0) {
- seq_printf(s, "%-24s %4s %5s %5s %5s %5s %5s %5s %s\n",
- "nid", "refs", "state", "max",
- "rtr", "min", "tx", "min", "queue");
- return 0;
- }
-
- LASSERT (lpsi->lpsi_peer != NULL);
-
- LNET_LOCK();
-
- if (lpsi->lpsi_version != the_lnet.ln_peertable_version) {
- LNET_UNLOCK();
- return -ESTALE;
- }
-
- lp = lpsi->lpsi_peer;
-
- nid = lp->lp_nid;
- maxcr = lp->lp_ni->ni_peertxcredits;
- txcr = lp->lp_txcredits;
- mintxcr = lp->lp_mintxcredits;
- rtrcr = lp->lp_rtrcredits;
- minrtrcr = lp->lp_minrtrcredits;
- rtr = lnet_isrouter(lp);
- alive = lp->lp_alive;
- txqnob = lp->lp_txqnob;
- nrefs = lp->lp_refcount;
-
- LNET_UNLOCK();
-
- seq_printf(s, "%-24s %4d %5s %5d %5d %5d %5d %5d %d\n",
- libcfs_nid2str(nid), nrefs,
- !rtr ? "~rtr" : (alive ? "up" : "down"),
- maxcr, rtrcr, minrtrcr, txcr, mintxcr, txqnob);
- return 0;
-}
-
-static struct seq_operations lnet_peer_sops = {
- .start = lnet_peer_seq_start,
- .stop = lnet_peer_seq_stop,
- .next = lnet_peer_seq_next,
- .show = lnet_peer_seq_show,
-};
-
-static int
-lnet_peer_seq_open(struct inode *inode, struct file *file)
-{
- struct proc_dir_entry *dp = PDE(inode);
- struct seq_file *sf;
- int rc;
-
- rc = seq_open(file, &lnet_peer_sops);
- if (rc == 0) {
- sf = file->private_data;
- sf->private = dp->data;
- }
-
- return rc;
-}
-
-static struct file_operations lnet_peer_fops = {
- .owner = THIS_MODULE,
- .open = lnet_peer_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-typedef struct {
- int lbsi_idx;
- loff_t lbsi_off;
-} lnet_buffer_seq_iterator_t;
-
-int
-lnet_buffer_seq_seek (lnet_buffer_seq_iterator_t *lbsi, loff_t off)
-{
- int idx;
- loff_t here;
- int rc;
-
- if (off == 0) {
- lbsi->lbsi_idx = -1;
- lbsi->lbsi_off = 0;
- return 0;
- }
-
- LNET_LOCK();
-
- if (lbsi->lbsi_idx < 0 ||
- lbsi->lbsi_off > off) {
- /* search from start */
- idx = 0;
- here = 1;
- } else {
- /* continue search */
- idx = lbsi->lbsi_idx;
- here = lbsi->lbsi_off;
- }
-
- lbsi->lbsi_off = off;
-
- while (idx < LNET_NRBPOOLS) {
- if (here == off) {
- lbsi->lbsi_idx = idx;
- rc = 0;
- goto out;
- }
- here++;
- idx++;
- }
-
- lbsi->lbsi_idx = -1;
- rc = -ENOENT;
- out:
- LNET_UNLOCK();
- return rc;
-}
-
-static void *
-lnet_buffer_seq_start (struct seq_file *s, loff_t *pos)
-{
- lnet_buffer_seq_iterator_t *lbsi;
- int rc;
-
- LIBCFS_ALLOC(lbsi, sizeof(*lbsi));
- if (lbsi == NULL)
- return NULL;
-
- lbsi->lbsi_idx = -1;
- rc = lnet_buffer_seq_seek(lbsi, *pos);
- if (rc == 0)
- return lbsi;
-
- LIBCFS_FREE(lbsi, sizeof(*lbsi));
- return NULL;
-}
-
-static void
-lnet_buffer_seq_stop (struct seq_file *s, void *iter)
-{
- lnet_buffer_seq_iterator_t *lbsi = iter;
-
- if (lbsi != NULL)
- LIBCFS_FREE(lbsi, sizeof(*lbsi));
-}
-
-static void *
-lnet_buffer_seq_next (struct seq_file *s, void *iter, loff_t *pos)
-{
- lnet_buffer_seq_iterator_t *lbsi = iter;
- int rc;
- loff_t next = *pos + 1;
-
- rc = lnet_buffer_seq_seek(lbsi, next);
- if (rc != 0) {
- LIBCFS_FREE(lbsi, sizeof(*lbsi));
- return NULL;
- }
-
- *pos = next;
- return lbsi;
-}
-
-static int
-lnet_buffer_seq_show (struct seq_file *s, void *iter)
-{
- lnet_buffer_seq_iterator_t *lbsi = iter;
- lnet_rtrbufpool_t *rbp;
- int npages;
- int nbuf;
- int cr;
- int mincr;
-
- if (lbsi->lbsi_off == 0) {
- seq_printf(s, "%5s %5s %7s %7s\n",
- "pages", "count", "credits", "min");
- return 0;
- }
-
- LASSERT (lbsi->lbsi_idx >= 0 && lbsi->lbsi_idx < LNET_NRBPOOLS);
-
- LNET_LOCK();
-
- rbp = &the_lnet.ln_rtrpools[lbsi->lbsi_idx];
-
- npages = rbp->rbp_npages;
- nbuf = rbp->rbp_nbuffers;
- cr = rbp->rbp_credits;
- mincr = rbp->rbp_mincredits;
-
- LNET_UNLOCK();
-
- seq_printf(s, "%5d %5d %7d %7d\n",
- npages, nbuf, cr, mincr);
- return 0;
-}
-
-static struct seq_operations lnet_buffer_sops = {
- .start = lnet_buffer_seq_start,
- .stop = lnet_buffer_seq_stop,
- .next = lnet_buffer_seq_next,
- .show = lnet_buffer_seq_show,
-};
-
-static int
-lnet_buffer_seq_open(struct inode *inode, struct file *file)
-{
- struct proc_dir_entry *dp = PDE(inode);
- struct seq_file *sf;
- int rc;
-
- rc = seq_open(file, &lnet_buffer_sops);
- if (rc == 0) {
- sf = file->private_data;
- sf->private = dp->data;
- }
-
- return rc;
-}
-
-static struct file_operations lnet_buffers_fops = {
- .owner = THIS_MODULE,
- .open = lnet_buffer_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-typedef struct {
- lnet_ni_t *lnsi_ni;
- loff_t lnsi_off;
-} lnet_ni_seq_iterator_t;
-
-int
-lnet_ni_seq_seek (lnet_ni_seq_iterator_t *lnsi, loff_t off)
-{
- struct list_head *n;
- loff_t here;
- int rc;
-
- if (off == 0) {
- lnsi->lnsi_ni = NULL;
- lnsi->lnsi_off = 0;
- return 0;
- }
-
- LNET_LOCK();
-
- if (lnsi->lnsi_ni == NULL ||
- lnsi->lnsi_off > off) {
- /* search from start */
- n = NULL;
- here = 1;
- } else {
- /* continue search */
- n = &lnsi->lnsi_ni->ni_list;
- here = lnsi->lnsi_off;
- }
-
- lnsi->lnsi_off = off;
-
- if (n == NULL)
- n = the_lnet.ln_nis.next;
-
- while (n != &the_lnet.ln_nis) {
- if (here == off) {
- lnsi->lnsi_ni = list_entry(n, lnet_ni_t, ni_list);
- rc = 0;
- goto out;
- }
- here++;
- n = n->next;
- }
-
- lnsi->lnsi_ni = NULL;
- rc = -ENOENT;
- out:
- LNET_UNLOCK();
- return rc;
-}
-
-static void *
-lnet_ni_seq_start (struct seq_file *s, loff_t *pos)
-{
- lnet_ni_seq_iterator_t *lnsi;
- int rc;
-
- LIBCFS_ALLOC(lnsi, sizeof(*lnsi));
- if (lnsi == NULL)
- return NULL;
-
- lnsi->lnsi_ni = NULL;
- rc = lnet_ni_seq_seek(lnsi, *pos);
- if (rc == 0)
- return lnsi;
-
- LIBCFS_FREE(lnsi, sizeof(*lnsi));
- return NULL;
-}
-
-static void
-lnet_ni_seq_stop (struct seq_file *s, void *iter)
-{
- lnet_ni_seq_iterator_t *lnsi = iter;
-
- if (lnsi != NULL)
- LIBCFS_FREE(lnsi, sizeof(*lnsi));
-}
-
-static void *
-lnet_ni_seq_next (struct seq_file *s, void *iter, loff_t *pos)
-{
- lnet_ni_seq_iterator_t *lnsi = iter;
- int rc;
- loff_t next = *pos + 1;
-
- rc = lnet_ni_seq_seek(lnsi, next);
- if (rc != 0) {
- LIBCFS_FREE(lnsi, sizeof(*lnsi));
- return NULL;
- }
-
- *pos = next;
- return lnsi;
-}
-
-static int
-lnet_ni_seq_show (struct seq_file *s, void *iter)
-{
- lnet_ni_seq_iterator_t *lnsi = iter;
- lnet_ni_t *ni;
- int maxtxcr;
- int txcr;
- int mintxcr;
- int npeertxcr;
- lnet_nid_t nid;
- int nref;
-
- if (lnsi->lnsi_off == 0) {
- seq_printf(s, "%-24s %4s %4s %5s %5s %5s\n",
- "nid", "refs", "peer", "max", "tx", "min");
- return 0;
- }
-
- LASSERT (lnsi->lnsi_ni != NULL);
-
- LNET_LOCK();
-
- ni = lnsi->lnsi_ni;
-
- maxtxcr = ni->ni_maxtxcredits;
- txcr = ni->ni_txcredits;
- mintxcr = ni->ni_mintxcredits;
- npeertxcr = ni->ni_peertxcredits;
- nid = ni->ni_nid;
- nref = ni->ni_refcount;
-
- LNET_UNLOCK();
-
- seq_printf(s, "%-24s %4d %4d %5d %5d %5d\n",
- libcfs_nid2str(nid), nref,
- npeertxcr, maxtxcr, txcr, mintxcr);
- return 0;
-}
-
-static struct seq_operations lnet_ni_sops = {
- .start = lnet_ni_seq_start,
- .stop = lnet_ni_seq_stop,
- .next = lnet_ni_seq_next,
- .show = lnet_ni_seq_show,
-};
-
-static int
-lnet_ni_seq_open(struct inode *inode, struct file *file)
-{
- struct proc_dir_entry *dp = PDE(inode);
- struct seq_file *sf;
- int rc;
-
- rc = seq_open(file, &lnet_ni_sops);
- if (rc == 0) {
- sf = file->private_data;
- sf->private = dp->data;
- }
-
- return rc;
-}
-
-static struct file_operations lnet_ni_fops = {
- .owner = THIS_MODULE,
- .open = lnet_ni_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-void
-lnet_proc_init(void)
-{
- struct proc_dir_entry *pde;
-
- /* Initialize LNET_PROC_STATS */
- pde = create_proc_entry (LNET_PROC_STATS, 0644, NULL);
- if (pde == NULL) {
- CERROR("couldn't create proc entry %s\n", LNET_PROC_STATS);
- return;
- }
-
- pde->data = NULL;
- pde->read_proc = lnet_router_proc_stats_read;
- pde->write_proc = lnet_router_proc_stats_write;
-
- /* Initialize LNET_PROC_ROUTES */
- pde = create_proc_entry (LNET_PROC_ROUTES, 0444, NULL);
- if (pde == NULL) {
- CERROR("couldn't create proc entry %s\n", LNET_PROC_ROUTES);
- return;
- }
-
- pde->proc_fops = &lnet_routes_fops;
- pde->data = NULL;
-
- /* Initialize LNET_PROC_ROUTERS */
- pde = create_proc_entry (LNET_PROC_ROUTERS, 0444, NULL);
- if (pde == NULL) {
- CERROR("couldn't create proc entry %s\n", LNET_PROC_ROUTERS);
- return;
- }
-
- pde->proc_fops = &lnet_routers_fops;
- pde->data = NULL;
-
- /* Initialize LNET_PROC_PEERS */
- pde = create_proc_entry (LNET_PROC_PEERS, 0444, NULL);
- if (pde == NULL) {
- CERROR("couldn't create proc entry %s\n", LNET_PROC_PEERS);
- return;
- }
-
- pde->proc_fops = &lnet_peer_fops;
- pde->data = NULL;
-
- /* Initialize LNET_PROC_BUFFERS */
- pde = create_proc_entry (LNET_PROC_BUFFERS, 0444, NULL);
- if (pde == NULL) {
- CERROR("couldn't create proc entry %s\n", LNET_PROC_BUFFERS);
- return;
- }
-
- pde->proc_fops = &lnet_buffers_fops;
- pde->data = NULL;
-
- /* Initialize LNET_PROC_NIS */
- pde = create_proc_entry (LNET_PROC_NIS, 0444, NULL);
- if (pde == NULL) {
- CERROR("couldn't create proc entry %s\n", LNET_PROC_NIS);
- return;
- }
-
- pde->proc_fops = &lnet_ni_fops;
- pde->data = NULL;
-}
-
-void
-lnet_proc_fini(void)
-{
- remove_proc_entry(LNET_PROC_STATS, 0);
- remove_proc_entry(LNET_PROC_ROUTES, 0);
- remove_proc_entry(LNET_PROC_ROUTERS, 0);
- remove_proc_entry(LNET_PROC_PEERS, 0);
- remove_proc_entry(LNET_PROC_BUFFERS, 0);
- remove_proc_entry(LNET_PROC_NIS, 0);
-}
-
-#else
-
-void
-lnet_proc_init(void)
-{
-}
-
-void
-lnet_proc_fini(void)
-{
-}
-
-#endif
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
+++ /dev/null
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
+++ /dev/null
-MODULES := lnet_selftest
-
-lnet_selftest-objs := console.o conrpc.o conctl.o framework.o timer.o rpc.o workitem.o module.o ping_test.o brw_test.o
-
-default: all
-
-@INCLUDE_RULES@
+++ /dev/null
-my_sources = console.c conrpc.c conctl.c console.h conrpc.h \
- framework.c timer.c rpc.c workitem.c module.c \
- ping_test.c brw_test.c
-
-if LIBLUSTRE
-noinst_LIBRARIES= libselftest.a
-libselftest_a_SOURCES= $(my_sources)
-libselftest_a_CPPFLAGS = $(LLCPPFLAGS)
-libselftest_a_CFLAGS = $(LLCFLAGS)
-endif
-
-if MODULES
-
-if LINUX
-modulenet_DATA = lnet_selftest$(KMODEXT)
-endif # LINUX
-
-endif # MODULES
-
-install-data-hook: $(install_data_hook)
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ selftest
-DIST_SOURCES = $(lnet_selftest-objs:%.o=%.c) console.h conrpc.h rpc.h selftest.h timer.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Isaac Huang <isaac@clusterfs.com>
- *
- */
-
-#include "selftest.h"
-
-
-extern int brw_inject_errors;
-
-static void
-brw_client_fini (sfw_test_instance_t *tsi)
-{
- srpc_bulk_t *bulk;
- sfw_test_unit_t *tsu;
-
- LASSERT (tsi->tsi_is_client);
-
- list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) {
- bulk = tsu->tsu_private;
- if (bulk == NULL) continue;
-
- srpc_free_bulk(bulk);
- tsu->tsu_private = NULL;
- }
-}
-
-int
-brw_client_init (sfw_test_instance_t *tsi)
-{
- test_bulk_req_t *breq = &tsi->tsi_u.bulk;
- int flags = breq->blk_flags;
- int npg = breq->blk_npg;
- srpc_bulk_t *bulk;
- sfw_test_unit_t *tsu;
-
- LASSERT (tsi->tsi_is_client);
-
- if (npg > LNET_MAX_IOV || npg <= 0)
- return -EINVAL;
-
- if (breq->blk_opc != LST_BRW_READ && breq->blk_opc != LST_BRW_WRITE)
- return -EINVAL;
-
- if (flags != LST_BRW_CHECK_NONE &&
- flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE)
- return -EINVAL;
-
- list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) {
- bulk = srpc_alloc_bulk(npg, breq->blk_opc == LST_BRW_READ);
- if (bulk == NULL) {
- brw_client_fini(tsi);
- return -ENOMEM;
- }
-
- tsu->tsu_private = bulk;
- }
-
- return 0;
-}
-
-#define BRW_POISON 0xbeefbeefbeefbeefULL
-#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL
-#define BRW_MSIZE sizeof(__u64)
-
-int
-brw_inject_one_error (void)
-{
- struct timeval tv;
-
- if (brw_inject_errors <= 0) return 0;
-
-#ifndef __KERNEL__
- gettimeofday(&tv, NULL);
-#else
- do_gettimeofday(&tv);
-#endif
-
- if ((tv.tv_usec & 1) == 0) return 0;
-
- return brw_inject_errors--;
-}
-
-void
-brw_fill_page (cfs_page_t *pg, int pattern, __u64 magic)
-{
- char *addr = cfs_page_address(pg);
- int i;
-
- LASSERT (addr != NULL);
-
- if (pattern == LST_BRW_CHECK_NONE) return;
-
- if (magic == BRW_MAGIC)
- magic += brw_inject_one_error();
-
- if (pattern == LST_BRW_CHECK_SIMPLE) {
- memcpy(addr, &magic, BRW_MSIZE);
- addr += CFS_PAGE_SIZE - BRW_MSIZE;
- memcpy(addr, &magic, BRW_MSIZE);
- return;
- }
-
- if (pattern == LST_BRW_CHECK_FULL) {
- for (i = 0; i < CFS_PAGE_SIZE / BRW_MSIZE; i++)
- memcpy(addr + i * BRW_MSIZE, &magic, BRW_MSIZE);
- return;
- }
-
- LBUG ();
- return;
-}
-
-int
-brw_check_page (cfs_page_t *pg, int pattern, __u64 magic)
-{
- char *addr = cfs_page_address(pg);
- __u64 data;
- int i;
-
- LASSERT (addr != NULL);
-
- if (pattern == LST_BRW_CHECK_NONE)
- return 0;
-
- if (pattern == LST_BRW_CHECK_SIMPLE) {
- data = *((__u64 *) addr);
- if (data != magic) goto bad_data;
-
- addr += CFS_PAGE_SIZE - BRW_MSIZE;
- data = *((__u64 *) addr);
- if (data != magic) goto bad_data;
-
- return 0;
- }
-
- if (pattern == LST_BRW_CHECK_FULL) {
- for (i = 0; i < CFS_PAGE_SIZE / BRW_MSIZE; i++) {
- data = *(((__u64 *) addr) + i);
- if (data != magic) goto bad_data;
- }
-
- return 0;
- }
-
- LBUG ();
-
-bad_data:
- CERROR ("Bad data in page %p: "LPX64", "LPX64" expected\n",
- pg, data, magic);
- return 1;
-}
-
-void
-brw_fill_bulk (srpc_bulk_t *bk, int pattern, __u64 magic)
-{
- int i;
- cfs_page_t *pg;
-
- for (i = 0; i < bk->bk_niov; i++) {
-#ifdef __KERNEL__
- pg = bk->bk_iovs[i].kiov_page;
-#else
- LASSERT (bk->bk_pages != NULL);
- pg = bk->bk_pages[i];
-#endif
- brw_fill_page(pg, pattern, magic);
- }
-}
-
-int
-brw_check_bulk (srpc_bulk_t *bk, int pattern, __u64 magic)
-{
- int i;
- cfs_page_t *pg;
-
- for (i = 0; i < bk->bk_niov; i++) {
-#ifdef __KERNEL__
- pg = bk->bk_iovs[i].kiov_page;
-#else
- LASSERT (bk->bk_pages != NULL);
- pg = bk->bk_pages[i];
-#endif
- if (brw_check_page(pg, pattern, magic) != 0) {
- CERROR ("Bulk page %p (%d/%d) is corrupted!\n",
- pg, i, bk->bk_niov);
- return 1;
- }
- }
-
- return 0;
-}
-
-static int
-brw_client_prep_rpc (sfw_test_unit_t *tsu,
- lnet_process_id_t dest, srpc_client_rpc_t **rpcpp)
-{
- srpc_bulk_t *bulk = tsu->tsu_private;
- sfw_test_instance_t *tsi = tsu->tsu_instance;
- test_bulk_req_t *breq = &tsi->tsi_u.bulk;
- int npg = breq->blk_npg;
- int flags = breq->blk_flags;
- srpc_client_rpc_t *rpc;
- srpc_brw_reqst_t *req;
- int rc;
-
- LASSERT (bulk != NULL);
- LASSERT (bulk->bk_niov == npg);
-
- rc = sfw_create_test_rpc(tsu, dest, npg, npg * CFS_PAGE_SIZE, &rpc);
- if (rc != 0) return rc;
-
- memcpy(&rpc->crpc_bulk, bulk, offsetof(srpc_bulk_t, bk_iovs[npg]));
- if (breq->blk_opc == LST_BRW_WRITE)
- brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC);
- else
- brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON);
-
- req = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
- req->brw_flags = flags;
- req->brw_rw = breq->blk_opc;
- req->brw_len = npg * CFS_PAGE_SIZE;
-
- *rpcpp = rpc;
- return 0;
-}
-
-static void
-brw_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
-{
- __u64 magic = BRW_MAGIC;
- sfw_test_instance_t *tsi = tsu->tsu_instance;
- sfw_session_t *sn = tsi->tsi_batch->bat_session;
- srpc_msg_t *msg = &rpc->crpc_replymsg;
- srpc_brw_reply_t *reply = &msg->msg_body.brw_reply;
- srpc_brw_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
-
- LASSERT (sn != NULL);
-
- if (rpc->crpc_status != 0) {
- CERROR ("BRW RPC to %s failed with %d\n",
- libcfs_id2str(rpc->crpc_dest), rpc->crpc_status);
- if (!tsi->tsi_stopping) /* rpc could have been aborted */
- atomic_inc(&sn->sn_brw_errors);
- goto out;
- }
-
- if (msg->msg_magic != SRPC_MSG_MAGIC) {
- __swab64s(&magic);
- __swab32s(&reply->brw_status);
- }
-
- CDEBUG (reply->brw_status ? D_WARNING : D_NET,
- "BRW RPC to %s finished with brw_status: %d\n",
- libcfs_id2str(rpc->crpc_dest), reply->brw_status);
-
- if (reply->brw_status != 0) {
- atomic_inc(&sn->sn_brw_errors);
- rpc->crpc_status = -reply->brw_status;
- goto out;
- }
-
- if (reqst->brw_rw == LST_BRW_WRITE) goto out;
-
- if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic) != 0) {
- CERROR ("Bulk data from %s is corrupted!\n",
- libcfs_id2str(rpc->crpc_dest));
- atomic_inc(&sn->sn_brw_errors);
- rpc->crpc_status = -EBADMSG;
- }
-
-out:
-#ifndef __KERNEL__
- rpc->crpc_bulk.bk_pages = NULL;
-#endif
- return;
-}
-
-void
-brw_server_rpc_done (srpc_server_rpc_t *rpc)
-{
- srpc_bulk_t *blk = rpc->srpc_bulk;
-
- if (blk == NULL) return;
-
- if (rpc->srpc_status != 0)
- CERROR ("Bulk transfer %s %s has failed: %d\n",
- blk->bk_sink ? "from" : "to",
- libcfs_id2str(rpc->srpc_peer), rpc->srpc_status);
- else
- CDEBUG (D_NET, "Transfered %d pages bulk data %s %s\n",
- blk->bk_niov, blk->bk_sink ? "from" : "to",
- libcfs_id2str(rpc->srpc_peer));
-
- sfw_free_pages(rpc);
-}
-
-int
-brw_bulk_ready (srpc_server_rpc_t *rpc, int status)
-{
- __u64 magic = BRW_MAGIC;
- srpc_brw_reply_t *reply = &rpc->srpc_replymsg.msg_body.brw_reply;
- srpc_brw_reqst_t *reqst;
- srpc_msg_t *reqstmsg;
-
- LASSERT (rpc->srpc_bulk != NULL);
- LASSERT (rpc->srpc_reqstbuf != NULL);
-
- reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
- reqst = &reqstmsg->msg_body.brw_reqst;
-
- if (status != 0) {
- CERROR ("BRW bulk %s failed for RPC from %s: %d\n",
- reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE",
- libcfs_id2str(rpc->srpc_peer), status);
- return -EIO;
- }
-
- if (reqst->brw_rw == LST_BRW_READ)
- return 0;
-
- if (reqstmsg->msg_magic != SRPC_MSG_MAGIC)
- __swab64s(&magic);
-
- if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic) != 0) {
- CERROR ("Bulk data from %s is corrupted!\n",
- libcfs_id2str(rpc->srpc_peer));
- reply->brw_status = EBADMSG;
- }
-
- return 0;
-}
-
-int
-brw_server_handle (srpc_server_rpc_t *rpc)
-{
- srpc_service_t *sv = rpc->srpc_service;
- srpc_msg_t *replymsg = &rpc->srpc_replymsg;
- srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
- srpc_brw_reply_t *reply = &replymsg->msg_body.brw_reply;
- srpc_brw_reqst_t *reqst = &reqstmsg->msg_body.brw_reqst;
- int rc;
-
- LASSERT (sv->sv_id == SRPC_SERVICE_BRW);
-
- if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
- LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- __swab32s(&reqstmsg->msg_type);
- __swab32s(&reqst->brw_rw);
- __swab32s(&reqst->brw_len);
- __swab32s(&reqst->brw_flags);
- __swab64s(&reqst->brw_rpyid);
- __swab64s(&reqst->brw_bulkid);
- }
- LASSERT (reqstmsg->msg_type == srpc_service2request(sv->sv_id));
-
- rpc->srpc_done = brw_server_rpc_done;
-
- if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) ||
- reqst->brw_len == 0 || (reqst->brw_len & ~CFS_PAGE_MASK) != 0 ||
- reqst->brw_len / CFS_PAGE_SIZE > LNET_MAX_IOV ||
- (reqst->brw_flags != LST_BRW_CHECK_NONE &&
- reqst->brw_flags != LST_BRW_CHECK_FULL &&
- reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) {
- reply->brw_status = EINVAL;
- return 0;
- }
-
- reply->brw_status = 0;
- rc = sfw_alloc_pages(rpc, reqst->brw_len / CFS_PAGE_SIZE,
- reqst->brw_rw == LST_BRW_WRITE);
- if (rc != 0) return rc;
-
- if (reqst->brw_rw == LST_BRW_READ)
- brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
- else
- brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
-
- return 0;
-}
-
-sfw_test_client_ops_t brw_test_client =
-{
- .tso_init = brw_client_init,
- .tso_fini = brw_client_fini,
- .tso_prep_rpc = brw_client_prep_rpc,
- .tso_done_rpc = brw_client_done_rpc,
-};
-
-srpc_service_t brw_test_service =
-{
- .sv_name = "brw test",
- .sv_handler = brw_server_handle,
- .sv_bulk_ready = brw_bulk_ready,
- .sv_id = SRPC_SERVICE_BRW,
-};
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * IOC handle in kernel
- */
-#ifdef __KERNEL__
-
-#include <libcfs/libcfs.h>
-#include <lnet/lib-lnet.h>
-#include <lnet/lnetst.h>
-#include "console.h"
-
-int
-lst_session_new_ioctl(lstio_session_new_args_t *args)
-{
- char *name;
- int rc;
-
- if (args->lstio_ses_idp == NULL || /* address for output sid */
- args->lstio_ses_key == 0 || /* no key is specified */
- args->lstio_ses_namep == NULL || /* session name */
- args->lstio_ses_nmlen <= 0 ||
- args->lstio_ses_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_ses_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name,
- args->lstio_ses_namep,
- args->lstio_ses_nmlen)) {
- LIBCFS_FREE(name, args->lstio_ses_nmlen + 1);
- return -EFAULT;
- }
-
- name[args->lstio_ses_nmlen] = 0;
-
- rc = lstcon_session_new(name,
- args->lstio_ses_key,
- args->lstio_ses_timeout,
- args->lstio_ses_force,
- args->lstio_ses_idp);
-
- LIBCFS_FREE(name, args->lstio_ses_nmlen + 1);
-
- return rc;
-}
-
-int
-lst_session_end_ioctl(lstio_session_end_args_t *args)
-{
- if (args->lstio_ses_key != console_session.ses_key)
- return -EACCES;
-
- return lstcon_session_end();
-}
-
-int
-lst_session_info_ioctl(lstio_session_info_args_t *args)
-{
- /* no checking of key */
-
- if (args->lstio_ses_idp == NULL || /* address for ouput sid */
- args->lstio_ses_keyp == NULL || /* address for ouput key */
- args->lstio_ses_ndinfo == NULL || /* address for output ndinfo */
- args->lstio_ses_namep == NULL || /* address for ouput name */
- args->lstio_ses_nmlen <= 0 ||
- args->lstio_ses_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- return lstcon_session_info(args->lstio_ses_idp,
- args->lstio_ses_keyp,
- args->lstio_ses_ndinfo,
- args->lstio_ses_namep,
- args->lstio_ses_nmlen);
-}
-
-int
-lst_debug_ioctl(lstio_debug_args_t *args)
-{
- char *name = NULL;
- int client = 1;
- int rc;
-
- if (args->lstio_dbg_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_dbg_resultp == NULL)
- return -EINVAL;
-
- if (args->lstio_dbg_namep != NULL && /* name of batch/group */
- (args->lstio_dbg_nmlen <= 0 ||
- args->lstio_dbg_nmlen > LST_NAME_SIZE))
- return -EINVAL;
-
- if (args->lstio_dbg_namep != NULL) {
- LIBCFS_ALLOC(name, args->lstio_dbg_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name, args->lstio_dbg_namep,
- args->lstio_dbg_nmlen)) {
- LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1);
-
- return -EFAULT;
- }
-
- name[args->lstio_dbg_nmlen] = 0;
- }
-
- rc = -EINVAL;
-
- switch (args->lstio_dbg_type) {
- case LST_OPC_SESSION:
- rc = lstcon_session_debug(args->lstio_dbg_timeout,
- args->lstio_dbg_resultp);
- break;
-
- case LST_OPC_BATCHSRV:
- client = 0;
- case LST_OPC_BATCHCLI:
- if (name == NULL)
- goto out;
-
- rc = lstcon_batch_debug(args->lstio_dbg_timeout,
- name, client, args->lstio_dbg_resultp);
- break;
-
- case LST_OPC_GROUP:
- if (name == NULL)
- goto out;
-
- rc = lstcon_group_debug(args->lstio_dbg_timeout,
- name, args->lstio_dbg_resultp);
- break;
-
- case LST_OPC_NODES:
- if (args->lstio_dbg_count <= 0 ||
- args->lstio_dbg_idsp == NULL)
- goto out;
-
- rc = lstcon_nodes_debug(args->lstio_dbg_timeout,
- args->lstio_dbg_count,
- args->lstio_dbg_idsp,
- args->lstio_dbg_resultp);
- break;
-
- default:
- break;
- }
-
-out:
- if (name != NULL)
- LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1);
-
- return rc;
-}
-
-int
-lst_group_add_ioctl(lstio_group_add_args_t *args)
-{
- char *name;
- int rc;
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_grp_namep == NULL||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name,
- args->lstio_grp_namep,
- args->lstio_grp_nmlen)) {
- LIBCFS_FREE(name, args->lstio_grp_nmlen);
- return -EFAULT;
- }
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_group_add(name);
-
- LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
-
- return rc;
-}
-
-int
-lst_group_del_ioctl(lstio_group_del_args_t *args)
-{
- int rc;
- char *name;
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_grp_namep == NULL ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name,
- args->lstio_grp_namep,
- args->lstio_grp_nmlen)) {
- LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
- return -EFAULT;
- }
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_group_del(name);
-
- LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
-
- return rc;
-}
-
-int
-lst_group_update_ioctl(lstio_group_update_args_t *args)
-{
- int rc;
- char *name;
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_grp_resultp == NULL ||
- args->lstio_grp_namep == NULL ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name,
- args->lstio_grp_namep,
- args->lstio_grp_nmlen)) {
- LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
- return -EFAULT;
- }
-
- name[args->lstio_grp_nmlen] = 0;
-
- switch (args->lstio_grp_opc) {
- case LST_GROUP_CLEAN:
- rc = lstcon_group_clean(name, args->lstio_grp_args);
- break;
-
- case LST_GROUP_REFRESH:
- rc = lstcon_group_refresh(name, args->lstio_grp_resultp);
- break;
-
- case LST_GROUP_RMND:
- if (args->lstio_grp_count <= 0 ||
- args->lstio_grp_idsp == NULL) {
- rc = -EINVAL;
- break;
- }
- rc = lstcon_nodes_remove(name, args->lstio_grp_count,
- args->lstio_grp_idsp,
- args->lstio_grp_resultp);
- break;
-
- default:
- rc = -EINVAL;
- break;
- }
-
- LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
-
- return rc;
-}
-
-int
-lst_nodes_add_ioctl(lstio_group_nodes_args_t *args)
-{
- int rc;
- char *name;
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_grp_idsp == NULL || /* array of ids */
- args->lstio_grp_count <= 0 ||
- args->lstio_grp_resultp == NULL ||
- args->lstio_grp_namep == NULL ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen)) {
- LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
-
- return -EFAULT;
- }
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_nodes_add(name, args->lstio_grp_count,
- args->lstio_grp_idsp,
- args->lstio_grp_resultp);
-
- LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
-
- return rc;
-}
-
-int
-lst_group_list_ioctl(lstio_group_list_args_t *args)
-{
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_grp_idx < 0 ||
- args->lstio_grp_namep == NULL ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- return lstcon_group_list(args->lstio_grp_idx,
- args->lstio_grp_nmlen,
- args->lstio_grp_namep);
-}
-
-int
-lst_group_info_ioctl(lstio_group_info_args_t *args)
-{
- char *name;
- int ndent;
- int index;
- int rc;
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_grp_namep == NULL ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (args->lstio_grp_entp == NULL && /* output: group entry */
- args->lstio_grp_dentsp == NULL) /* output: node entry */
- return -EINVAL;
-
- if (args->lstio_grp_dentsp != NULL) { /* have node entry */
- if (args->lstio_grp_idxp == NULL || /* node index */
- args->lstio_grp_ndentp == NULL) /* # of node entry */
- return -EINVAL;
-
- if (copy_from_user(&ndent,
- args->lstio_grp_ndentp, sizeof(ndent)) ||
- copy_from_user(&index, args->lstio_grp_idxp, sizeof(index)))
- return -EFAULT;
-
- if (ndent <= 0 || index < 0)
- return -EINVAL;
- }
-
- LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name,
- args->lstio_grp_namep,
- args->lstio_grp_nmlen)) {
- LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
- return -EFAULT;
- }
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_group_info(name, args->lstio_grp_entp,
- &index, &ndent, args->lstio_grp_dentsp);
-
- LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
-
- if (rc != 0)
- return rc;
-
- if (args->lstio_grp_dentsp != NULL &&
- (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) ||
- copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent))))
- rc = -EFAULT;
-
- return 0;
-}
-
-int
-lst_batch_add_ioctl(lstio_batch_add_args_t *args)
-{
- int rc;
- char *name;
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_bat_namep == NULL ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name,
- args->lstio_bat_namep,
- args->lstio_bat_nmlen)) {
- LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
- return -EFAULT;
- }
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_add(name);
-
- LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
-
- return rc;
-}
-
-int
-lst_batch_run_ioctl(lstio_batch_run_args_t *args)
-{
- int rc;
- char *name;
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_bat_namep == NULL ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name,
- args->lstio_bat_namep,
- args->lstio_bat_nmlen)) {
- LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
- return -EFAULT;
- }
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_run(name, args->lstio_bat_timeout,
- args->lstio_bat_resultp);
-
- LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
-
- return rc;
-}
-
-int
-lst_batch_stop_ioctl(lstio_batch_stop_args_t *args)
-{
- int rc;
- char *name;
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_bat_resultp == NULL ||
- args->lstio_bat_namep == NULL ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name,
- args->lstio_bat_namep,
- args->lstio_bat_nmlen)) {
- LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
- return -EFAULT;
- }
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_stop(name, args->lstio_bat_force,
- args->lstio_bat_resultp);
-
- LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
-
- return rc;
-}
-
-int
-lst_batch_query_ioctl(lstio_batch_query_args_t *args)
-{
- char *name;
- int rc;
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_bat_resultp == NULL ||
- args->lstio_bat_namep == NULL ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (args->lstio_bat_testidx < 0)
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name,
- args->lstio_bat_namep,
- args->lstio_bat_nmlen)) {
- LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
- return -EFAULT;
- }
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_test_batch_query(name,
- args->lstio_bat_testidx,
- args->lstio_bat_client,
- args->lstio_bat_timeout,
- args->lstio_bat_resultp);
-
- LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
-
- return rc;
-}
-
-int
-lst_batch_list_ioctl(lstio_batch_list_args_t *args)
-{
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_bat_idx < 0 ||
- args->lstio_bat_namep == NULL ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- return lstcon_batch_list(args->lstio_bat_idx,
- args->lstio_bat_nmlen,
- args->lstio_bat_namep);
-}
-
-int
-lst_batch_info_ioctl(lstio_batch_info_args_t *args)
-{
- char *name;
- int rc;
- int index;
- int ndent;
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_bat_namep == NULL || /* batch name */
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (args->lstio_bat_entp == NULL && /* output: batch entry */
- args->lstio_bat_dentsp == NULL) /* output: node entry */
- return -EINVAL;
-
- if (args->lstio_bat_dentsp != NULL) { /* have node entry */
- if (args->lstio_bat_idxp == NULL || /* node index */
- args->lstio_bat_ndentp == NULL) /* # of node entry */
- return -EINVAL;
-
- if (copy_from_user(&index, args->lstio_bat_idxp, sizeof(index)) ||
- copy_from_user(&ndent, args->lstio_bat_ndentp, sizeof(ndent)))
- return -EFAULT;
-
- if (ndent <= 0 || index < 0)
- return -EINVAL;
- }
-
- LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name,
- args->lstio_bat_namep, args->lstio_bat_nmlen)) {
- LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
- return -EFAULT;
- }
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_info(name,
- args->lstio_bat_entp, args->lstio_bat_server,
- args->lstio_bat_testidx, &index, &ndent,
- args->lstio_bat_dentsp);
-
- LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
-
- if (rc != 0)
- return rc;
-
- if (args->lstio_bat_dentsp != NULL &&
- (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) ||
- copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent))))
- rc = -EFAULT;
-
- return rc;
-}
-
-int
-lst_stat_query_ioctl(lstio_stat_args_t *args)
-{
- int rc;
- char *name;
-
- /* TODO: not finished */
- if (args->lstio_sta_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_sta_resultp == NULL ||
- (args->lstio_sta_namep == NULL &&
- args->lstio_sta_idsp == NULL) ||
- args->lstio_sta_nmlen <= 0 ||
- args->lstio_sta_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (args->lstio_sta_idsp != NULL &&
- args->lstio_sta_count <= 0)
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_sta_nmlen + 1);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name, args->lstio_sta_namep,
- args->lstio_sta_nmlen)) {
- LIBCFS_FREE(name, args->lstio_sta_nmlen + 1);
- return -EFAULT;
- }
-
- if (args->lstio_sta_idsp == NULL) {
- rc = lstcon_group_stat(name, args->lstio_sta_timeout,
- args->lstio_sta_resultp);
- } else {
- rc = lstcon_nodes_stat(args->lstio_sta_count,
- args->lstio_sta_idsp,
- args->lstio_sta_timeout,
- args->lstio_sta_resultp);
- }
-
- LIBCFS_FREE(name, args->lstio_sta_nmlen + 1);
-
- return rc;
-}
-
-int lst_test_add_ioctl(lstio_test_args_t *args)
-{
- char *name;
- char *srcgrp = NULL;
- char *dstgrp = NULL;
- void *param = NULL;
- int ret = 0;
- int rc = -ENOMEM;
-
- if (args->lstio_tes_resultp == NULL ||
- args->lstio_tes_retp == NULL ||
- args->lstio_tes_bat_name == NULL || /* no specified batch */
- args->lstio_tes_bat_nmlen <= 0 ||
- args->lstio_tes_bat_nmlen > LST_NAME_SIZE ||
- args->lstio_tes_sgrp_name == NULL || /* no source group */
- args->lstio_tes_sgrp_nmlen <= 0 ||
- args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE ||
- args->lstio_tes_dgrp_name == NULL || /* no target group */
- args->lstio_tes_dgrp_nmlen <= 0 ||
- args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- /* have parameter, check if parameter length is valid */
- if (args->lstio_tes_param != NULL &&
- (args->lstio_tes_param_len <= 0 ||
- args->lstio_tes_param_len > CFS_PAGE_SIZE - sizeof(lstcon_test_t)))
- return -EINVAL;
-
- LIBCFS_ALLOC(name, args->lstio_tes_bat_nmlen + 1);
- if (name == NULL)
- return rc;
-
- LIBCFS_ALLOC(srcgrp, args->lstio_tes_sgrp_nmlen + 1);
- if (srcgrp == NULL)
- goto out;
-
- LIBCFS_ALLOC(dstgrp, args->lstio_tes_dgrp_nmlen + 1);
- if (srcgrp == NULL)
- goto out;
-
- if (args->lstio_tes_param != NULL) {
- LIBCFS_ALLOC(param, args->lstio_tes_param_len);
- if (param == NULL)
- goto out;
- }
-
- rc = -EFAULT;
- if (copy_from_user(name,
- args->lstio_tes_bat_name,
- args->lstio_tes_bat_nmlen) ||
- copy_from_user(srcgrp,
- args->lstio_tes_sgrp_name,
- args->lstio_tes_sgrp_nmlen) ||
- copy_from_user(dstgrp,
- args->lstio_tes_dgrp_name,
- args->lstio_tes_dgrp_nmlen) ||
- copy_from_user(param, args->lstio_tes_param,
- args->lstio_tes_param_len))
- goto out;
-
- rc = lstcon_test_add(name,
- args->lstio_tes_type,
- args->lstio_tes_loop,
- args->lstio_tes_concur,
- args->lstio_tes_dist, args->lstio_tes_span,
- srcgrp, dstgrp, param, args->lstio_tes_param_len,
- &ret, args->lstio_tes_resultp);
-
- if (ret != 0)
- rc = (copy_to_user(args->lstio_tes_retp, &ret, sizeof(ret))) ?
- -EFAULT : 0;
-out:
- if (name != NULL)
- LIBCFS_FREE(name, args->lstio_tes_bat_nmlen + 1);
-
- if (srcgrp != NULL)
- LIBCFS_FREE(srcgrp, args->lstio_tes_sgrp_nmlen + 1);
-
- if (dstgrp != NULL)
- LIBCFS_FREE(dstgrp, args->lstio_tes_dgrp_nmlen + 1);
-
- if (param != NULL)
- LIBCFS_FREE(param, args->lstio_tes_param_len);
-
- return rc;
-}
-
-int
-lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data)
-{
- char *buf;
- int opc = data->ioc_u32[0];
- int rc;
-
- if (cmd != IOC_LIBCFS_LNETST)
- return -EINVAL;
-
- if (data->ioc_plen1 > CFS_PAGE_SIZE)
- return -EINVAL;
-
- LIBCFS_ALLOC(buf, data->ioc_plen1);
- if (buf == NULL)
- return -ENOMEM;
-
- /* copy in parameter */
- if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) {
- LIBCFS_FREE(buf, data->ioc_plen1);
- return -EFAULT;
- }
-
- mutex_down(&console_session.ses_mutex);
-
- console_session.ses_laststamp = cfs_time_current_sec();
-
- if (console_session.ses_shutdown) {
- rc = -ESHUTDOWN;
- goto out;
- }
-
- if (console_session.ses_expired)
- lstcon_session_end();
-
- if (opc != LSTIO_SESSION_NEW &&
- console_session.ses_state == LST_SESSION_NONE) {
- CDEBUG(D_NET, "LST no active session\n");
- rc = -ESRCH;
- goto out;
- }
-
- memset(&console_session.ses_trans_stat, 0, sizeof(lstcon_trans_stat_t));
-
- switch (opc) {
- case LSTIO_SESSION_NEW:
- rc = lst_session_new_ioctl((lstio_session_new_args_t *)buf);
- break;
- case LSTIO_SESSION_END:
- rc = lst_session_end_ioctl((lstio_session_end_args_t *)buf);
- break;
- case LSTIO_SESSION_INFO:
- rc = lst_session_info_ioctl((lstio_session_info_args_t *)buf);
- break;
- case LSTIO_DEBUG:
- rc = lst_debug_ioctl((lstio_debug_args_t *)buf);
- break;
- case LSTIO_GROUP_ADD:
- rc = lst_group_add_ioctl((lstio_group_add_args_t *)buf);
- break;
- case LSTIO_GROUP_DEL:
- rc = lst_group_del_ioctl((lstio_group_del_args_t *)buf);
- break;
- case LSTIO_GROUP_UPDATE:
- rc = lst_group_update_ioctl((lstio_group_update_args_t *)buf);
- break;
- case LSTIO_NODES_ADD:
- rc = lst_nodes_add_ioctl((lstio_group_nodes_args_t *)buf);
- break;
- case LSTIO_GROUP_LIST:
- rc = lst_group_list_ioctl((lstio_group_list_args_t *)buf);
- break;
- case LSTIO_GROUP_INFO:
- rc = lst_group_info_ioctl((lstio_group_info_args_t *)buf);
- break;
- case LSTIO_BATCH_ADD:
- rc = lst_batch_add_ioctl((lstio_batch_add_args_t *)buf);
- break;
- case LSTIO_BATCH_START:
- rc = lst_batch_run_ioctl((lstio_batch_run_args_t *)buf);
- break;
- case LSTIO_BATCH_STOP:
- rc = lst_batch_stop_ioctl((lstio_batch_stop_args_t *)buf);
- break;
- case LSTIO_BATCH_QUERY:
- rc = lst_batch_query_ioctl((lstio_batch_query_args_t *)buf);
- break;
- case LSTIO_BATCH_LIST:
- rc = lst_batch_list_ioctl((lstio_batch_list_args_t *)buf);
- break;
- case LSTIO_BATCH_INFO:
- rc = lst_batch_info_ioctl((lstio_batch_info_args_t *)buf);
- break;
- case LSTIO_TEST_ADD:
- rc = lst_test_add_ioctl((lstio_test_args_t *)buf);
- break;
- case LSTIO_STAT_QUERY:
- rc = lst_stat_query_ioctl((lstio_stat_args_t *)buf);
- break;
- default:
- rc = -EINVAL;
- }
-
- if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat,
- sizeof(lstcon_trans_stat_t)))
- rc = -EFAULT;
-out:
- mutex_up(&console_session.ses_mutex);
-
- LIBCFS_FREE(buf, data->ioc_plen1);
-
- return rc;
-}
-
-EXPORT_SYMBOL(lstcon_ioctl_entry);
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Console framework rpcs
- */
-#ifdef __KERNEL__
-
-#include <libcfs/libcfs.h>
-#include <lnet/lib-lnet.h>
-#include "timer.h"
-#include "conrpc.h"
-#include "console.h"
-
-void lstcon_rpc_stat_reply(int, srpc_msg_t *,
- lstcon_node_t *, lstcon_trans_stat_t *);
-
-static void
-lstcon_rpc_done(srpc_client_rpc_t *rpc)
-{
- lstcon_rpc_t *crpc = (lstcon_rpc_t *)rpc->crpc_priv;
-
- LASSERT (crpc != NULL && rpc == crpc->crp_rpc);
- LASSERT (crpc->crp_posted && !crpc->crp_finished);
-
- spin_lock(&rpc->crpc_lock);
-
- if (crpc->crp_trans == NULL) {
- /* Orphan RPC is not in any transaction,
- * I'm just a poor body and nobody loves me */
- spin_unlock(&rpc->crpc_lock);
-
- /* release it */
- lstcon_rpc_put(crpc);
- return;
- }
-
- /* not an orphan RPC */
- crpc->crp_finished = 1;
-
- if (crpc->crp_stamp == 0) {
- /* not aborted */
- LASSERT (crpc->crp_status == 0);
-
- crpc->crp_stamp = cfs_time_current();
- crpc->crp_status = rpc->crpc_status;
- }
-
- /* wakeup (transaction)thread if I'm the last RPC in the transaction */
- if (atomic_dec_and_test(&crpc->crp_trans->tas_remaining))
- cfs_waitq_signal(&crpc->crp_trans->tas_waitq);
-
- spin_unlock(&rpc->crpc_lock);
-}
-
-int
-lstcon_rpc_init(lstcon_node_t *nd, int service,
- int npg, int cached, lstcon_rpc_t *crpc)
-{
-
- crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service,
- npg, npg * CFS_PAGE_SIZE,
- lstcon_rpc_done, (void *)crpc);
- if (crpc->crp_rpc == NULL)
- return -ENOMEM;
-
- crpc->crp_trans = NULL;
- crpc->crp_node = nd;
- crpc->crp_posted = 0;
- crpc->crp_finished = 0;
- crpc->crp_unpacked = 0;
- crpc->crp_status = 0;
- crpc->crp_stamp = 0;
- crpc->crp_static = !cached;
- CFS_INIT_LIST_HEAD(&crpc->crp_link);
-
- atomic_inc(&console_session.ses_rpc_counter);
-
- return 0;
-}
-
-int
-lstcon_rpc_prep(lstcon_node_t *nd, int service,
- int npg, lstcon_rpc_t **crpcpp)
-{
- lstcon_rpc_t *crpc = NULL;
- int rc;
-
- spin_lock(&console_session.ses_rpc_lock);
-
- if (!list_empty(&console_session.ses_rpc_freelist)) {
- crpc = list_entry(console_session.ses_rpc_freelist.next,
- lstcon_rpc_t, crp_link);
- list_del_init(&crpc->crp_link);
- }
-
- spin_unlock(&console_session.ses_rpc_lock);
-
- if (crpc == NULL) {
- LIBCFS_ALLOC(crpc, sizeof(*crpc));
- if (crpc == NULL)
- return -ENOMEM;
- }
-
- rc = lstcon_rpc_init(nd, service, npg, 1, crpc);
- if (rc == 0) {
- *crpcpp = crpc;
- return 0;
- }
-
- LIBCFS_FREE(crpc, sizeof(*crpc));
-
- return rc;
-}
-
-void
-lstcon_rpc_put(lstcon_rpc_t *crpc)
-{
- srpc_bulk_t *bulk = &crpc->crp_rpc->crpc_bulk;
- int i;
-
- LASSERT (list_empty(&crpc->crp_link));
-
- for (i = 0; i < bulk->bk_niov; i++) {
- if (bulk->bk_iovs[i].kiov_page == NULL)
- continue;
-
- cfs_free_page(bulk->bk_iovs[i].kiov_page);
- }
-
- srpc_client_rpc_decref(crpc->crp_rpc);
-
- if (crpc->crp_static) {
- /* Static RPC, not allocated */
- memset(crpc, 0, sizeof(*crpc));
- crpc->crp_static = 1;
-
- } else {
- spin_lock(&console_session.ses_rpc_lock);
-
- list_add(&crpc->crp_link, &console_session.ses_rpc_freelist);
-
- spin_unlock(&console_session.ses_rpc_lock);
- }
-
- /* RPC is not alive now */
- atomic_dec(&console_session.ses_rpc_counter);
-}
-
-void
-lstcon_rpc_post(lstcon_rpc_t *crpc)
-{
- lstcon_rpc_trans_t *trans = crpc->crp_trans;
-
- LASSERT (trans != NULL);
-
- atomic_inc(&trans->tas_remaining);
- crpc->crp_posted = 1;
-
- sfw_post_rpc(crpc->crp_rpc);
-}
-
-static char *
-lstcon_rpc_trans_name(int transop)
-{
- if (transop == LST_TRANS_SESNEW)
- return "SESNEW";
-
- if (transop == LST_TRANS_SESEND)
- return "SESEND";
-
- if (transop == LST_TRANS_SESQRY)
- return "SESQRY";
-
- if (transop == LST_TRANS_SESPING)
- return "SESPING";
-
- if (transop == LST_TRANS_TSBCLIADD)
- return "TSBCLIADD";
-
- if (transop == LST_TRANS_TSBSRVADD)
- return "TSBSRVADD";
-
- if (transop == LST_TRANS_TSBRUN)
- return "TSBRUN";
-
- if (transop == LST_TRANS_TSBSTOP)
- return "TSBSTOP";
-
- if (transop == LST_TRANS_TSBCLIQRY)
- return "TSBCLIQRY";
-
- if (transop == LST_TRANS_TSBSRVQRY)
- return "TSBSRVQRY";
-
- if (transop == LST_TRANS_STATQRY)
- return "STATQRY";
-
- return "Unknown";
-}
-
-int
-lstcon_rpc_trans_prep(struct list_head *translist,
- int transop, lstcon_rpc_trans_t **transpp)
-{
- lstcon_rpc_trans_t *trans;
-
- if (translist != NULL) {
- list_for_each_entry(trans, translist, tas_link) {
- /* Can't enqueue two private transaction on
- * the same object */
- if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE)
- return -EPERM;
- }
- }
-
- /* create a trans group */
- LIBCFS_ALLOC(trans, sizeof(*trans));
- if (trans == NULL)
- return -ENOMEM;
-
- trans->tas_opc = transop;
-
- if (translist == NULL)
- CFS_INIT_LIST_HEAD(&trans->tas_olink);
- else
- list_add_tail(&trans->tas_olink, translist);
-
- list_add_tail(&trans->tas_link, &console_session.ses_trans_list);
-
- CFS_INIT_LIST_HEAD(&trans->tas_rpcs_list);
- atomic_set(&trans->tas_remaining, 0);
- cfs_waitq_init(&trans->tas_waitq);
-
- *transpp = trans;
-
- return 0;
-}
-
-void
-lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *crpc)
-{
- list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list);
- crpc->crp_trans = trans;
-}
-
-void
-lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error)
-{
- srpc_client_rpc_t *rpc;
- lstcon_rpc_t *crpc;
- lstcon_node_t *nd;
-
- list_for_each_entry (crpc, &trans->tas_rpcs_list, crp_link) {
- rpc = crpc->crp_rpc;
-
- spin_lock(&rpc->crpc_lock);
-
- if (!crpc->crp_posted || crpc->crp_stamp != 0) {
- /* rpc done or aborted already */
- spin_unlock(&rpc->crpc_lock);
- continue;
- }
-
- crpc->crp_stamp = cfs_time_current();
- crpc->crp_status = error;
-
- spin_unlock(&rpc->crpc_lock);
-
- sfw_abort_rpc(rpc);
-
- if (error != ETIMEDOUT)
- continue;
-
- nd = crpc->crp_node;
- if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp))
- continue;
-
- nd->nd_stamp = crpc->crp_stamp;
- nd->nd_state = LST_NODE_DOWN;
- }
-}
-
-static int
-lstcon_rpc_trans_check(lstcon_rpc_trans_t *trans)
-{
- if (console_session.ses_shutdown &&
- !list_empty(&trans->tas_olink)) /* It's not an end session RPC */
- return 1;
-
- return (atomic_read(&trans->tas_remaining) == 0) ? 1: 0;
-}
-
-int
-lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout)
-{
- lstcon_rpc_t *crpc;
- int rc;
-
- if (list_empty(&trans->tas_rpcs_list))
- return 0;
-
- if (timeout < LST_TRANS_MIN_TIMEOUT)
- timeout = LST_TRANS_MIN_TIMEOUT;
-
- CDEBUG(D_NET, "Transaction %s started\n",
- lstcon_rpc_trans_name(trans->tas_opc));
-
- /* post all requests */
- list_for_each_entry (crpc, &trans->tas_rpcs_list, crp_link) {
- LASSERT (!crpc->crp_posted);
-
- lstcon_rpc_post(crpc);
- }
-
- mutex_up(&console_session.ses_mutex);
-
- rc = cfs_waitq_wait_event_interruptible_timeout(trans->tas_waitq,
- lstcon_rpc_trans_check(trans),
- timeout * HZ);
-
- rc = (rc > 0)? 0: ((rc < 0)? -EINTR: -ETIMEDOUT);
-
- mutex_down(&console_session.ses_mutex);
-
- if (console_session.ses_shutdown)
- rc = -ESHUTDOWN;
-
- if (rc != 0) {
- /* treat short timeout as canceled */
- if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2)
- rc = -EINTR;
-
- lstcon_rpc_trans_abort(trans, rc);
- }
-
- CDEBUG(D_NET, "Transaction %s stopped: %d\n",
- lstcon_rpc_trans_name(trans->tas_opc), rc);
-
- lstcon_rpc_trans_stat(trans, lstcon_trans_stat());
-
- return rc;
-}
-
-int
-lstcon_rpc_get_reply(lstcon_rpc_t *crpc, srpc_msg_t **msgpp)
-{
- lstcon_node_t *nd = crpc->crp_node;
- srpc_client_rpc_t *rpc = crpc->crp_rpc;
- srpc_generic_reply_t *rep;
-
- LASSERT (nd != NULL && rpc != NULL);
- LASSERT (crpc->crp_stamp != 0);
-
- if (crpc->crp_status != 0) {
- *msgpp = NULL;
- return crpc->crp_status;
- }
-
- *msgpp = &rpc->crpc_replymsg;
- if (!crpc->crp_unpacked) {
- sfw_unpack_message(*msgpp);
- crpc->crp_unpacked = 1;
- }
-
- if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp))
- return 0;
-
- nd->nd_stamp = crpc->crp_stamp;
- rep = &(*msgpp)->msg_body.reply;
-
- if (rep->sid.ses_nid == LNET_NID_ANY)
- nd->nd_state = LST_NODE_UNKNOWN;
- else if (lstcon_session_match(rep->sid))
- nd->nd_state = LST_NODE_ACTIVE;
- else
- nd->nd_state = LST_NODE_BUSY;
-
- return 0;
-}
-
-void
-lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat)
-{
- lstcon_rpc_t *crpc;
- srpc_client_rpc_t *rpc;
- srpc_msg_t *rep;
- int error;
-
- LASSERT (stat != NULL);
-
- memset(stat, 0, sizeof(*stat));
-
- list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
- lstcon_rpc_stat_total(stat, 1);
-
- rpc = crpc->crp_rpc;
-
- LASSERT (crpc->crp_stamp != 0);
-
- error = lstcon_rpc_get_reply(crpc, &rep);
- if (error != 0) {
- lstcon_rpc_stat_failure(stat, 1);
- if (stat->trs_rpc_errno == 0)
- stat->trs_rpc_errno = -error;
-
- continue;
- }
-
- lstcon_rpc_stat_success(stat, 1);
-
- lstcon_rpc_stat_reply(trans->tas_opc, rep,
- crpc->crp_node, stat);
- }
-
- CDEBUG(D_NET, "transaction %s : success %d, failure %d, total %d, "
- "RPC error(%d), Framework error(%d)\n",
- lstcon_rpc_trans_name(trans->tas_opc),
- lstcon_rpc_stat_success(stat, 0),
- lstcon_rpc_stat_failure(stat, 0),
- lstcon_rpc_stat_total(stat, 0),
- stat->trs_rpc_errno, stat->trs_fwk_errno);
-
- return;
-}
-
-int
-lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
- struct list_head *head_up,
- lstcon_rpc_readent_func_t readent)
-{
- struct list_head tmp;
- struct list_head *next;
- lstcon_rpc_ent_t *ent;
- srpc_generic_reply_t *rep;
- srpc_client_rpc_t *rpc;
- lstcon_rpc_t *crpc;
- srpc_msg_t *msg;
- lstcon_node_t *nd;
- cfs_duration_t dur;
- struct timeval tv;
- int error;
-
- LASSERT (head_up != NULL);
-
- next = head_up;
-
- list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
- if (copy_from_user(&tmp, next, sizeof(struct list_head)))
- return -EFAULT;
-
- if (tmp.next == head_up)
- return 0;
-
- next = tmp.next;
-
- ent = list_entry(next, lstcon_rpc_ent_t, rpe_link);
-
- rpc = crpc->crp_rpc;
-
- LASSERT (crpc->crp_stamp != 0);
-
- error = lstcon_rpc_get_reply(crpc, &msg);
-
- nd = crpc->crp_node;
-
- dur = cfs_time_sub(crpc->crp_stamp,
- console_session.ses_id.ses_stamp);
- cfs_duration_usec(dur, &tv);
-
- if (copy_to_user(&ent->rpe_peer,
- &nd->nd_id, sizeof(lnet_process_id_t)) ||
- copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) ||
- copy_to_user(&ent->rpe_state,
- &nd->nd_state, sizeof(nd->nd_state)) ||
- copy_to_user(&ent->rpe_rpc_errno, &error, sizeof(error)))
- return -EFAULT;
-
- if (error != 0)
- continue;
-
- /* RPC is done */
- rep = (srpc_generic_reply_t *)&msg->msg_body.reply;
-
- if (copy_to_user(&ent->rpe_sid,
- &rep->sid, sizeof(lst_sid_t)) ||
- copy_to_user(&ent->rpe_fwk_errno,
- &rep->status, sizeof(rep->status)))
- return -EFAULT;
-
- if (readent == NULL)
- continue;
-
- if ((error = readent(trans->tas_opc, msg, ent)) != 0)
- return error;
- }
-
- return 0;
-}
-
-void
-lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans)
-{
- srpc_client_rpc_t *rpc;
- lstcon_rpc_t *crpc;
- lstcon_rpc_t *tmp;
- int count = 0;
-
- list_for_each_entry_safe(crpc, tmp,
- &trans->tas_rpcs_list, crp_link) {
- rpc = crpc->crp_rpc;
-
- spin_lock(&rpc->crpc_lock);
-
- /* free it if not posted or finished already */
- if (!crpc->crp_posted || crpc->crp_finished) {
- spin_unlock(&rpc->crpc_lock);
-
- list_del_init(&crpc->crp_link);
- lstcon_rpc_put(crpc);
-
- continue;
- }
-
- /* rpcs can be still not callbacked (even LNetMDUnlink is called)
- * because huge timeout for inaccessible network, don't make
- * user wait for them, just abandon them, they will be recycled
- * in callback */
-
- LASSERT (crpc->crp_status != 0);
-
- crpc->crp_node = NULL;
- crpc->crp_trans = NULL;
- list_del_init(&crpc->crp_link);
- count ++;
-
- spin_unlock(&rpc->crpc_lock);
-
- atomic_dec(&trans->tas_remaining);
- }
-
- LASSERT (atomic_read(&trans->tas_remaining) == 0);
-
- list_del(&trans->tas_link);
- if (!list_empty(&trans->tas_olink))
- list_del(&trans->tas_olink);
-
- CDEBUG(D_NET, "Transaction %s destroyed with %d pending RPCs\n",
- lstcon_rpc_trans_name(trans->tas_opc), count);
-
- LIBCFS_FREE(trans, sizeof(*trans));
-
- return;
-}
-
-int
-lstcon_sesrpc_prep(lstcon_node_t *nd, int transop, lstcon_rpc_t **crpc)
-{
- srpc_mksn_reqst_t *msrq;
- srpc_rmsn_reqst_t *rsrq;
- int rc;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION, 0, crpc);
- if (rc != 0)
- return rc;
-
- msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst;
- msrq->mksn_sid = console_session.ses_id;
- msrq->mksn_force = console_session.ses_force;
- strncpy(msrq->mksn_name, console_session.ses_name,
- strlen(console_session.ses_name));
- break;
-
- case LST_TRANS_SESEND:
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION, 0, crpc);
- if (rc != 0)
- return rc;
-
- rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst;
- rsrq->rmsn_sid = console_session.ses_id;
- break;
-
- default:
- LBUG();
- }
-
- return 0;
-}
-
-int
-lstcon_dbgrpc_prep(lstcon_node_t *nd, lstcon_rpc_t **crpc)
-{
- srpc_debug_reqst_t *drq;
- int rc;
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, 0, crpc);
- if (rc != 0)
- return rc;
-
- drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
-
- drq->dbg_sid = console_session.ses_id;
- drq->dbg_flags = 0;
-
- return rc;
-}
-
-int
-lstcon_batrpc_prep(lstcon_node_t *nd, int transop,
- lstcon_tsb_hdr_t *tsb, lstcon_rpc_t **crpc)
-{
- lstcon_batch_t *batch;
- srpc_batch_reqst_t *brq;
- int rc;
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, 0, crpc);
- if (rc != 0)
- return rc;
-
- brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst;
-
- brq->bar_sid = console_session.ses_id;
- brq->bar_bid = tsb->tsb_id;
- brq->bar_testidx = tsb->tsb_index;
- brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN :
- (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP:
- SRPC_BATCH_OPC_QUERY);
-
- if (transop != LST_TRANS_TSBRUN &&
- transop != LST_TRANS_TSBSTOP)
- return 0;
-
- LASSERT (tsb->tsb_index == 0);
-
- batch = (lstcon_batch_t *)tsb;
- brq->bar_arg = batch->bat_arg;
-
- return 0;
-}
-
-int
-lstcon_statrpc_prep(lstcon_node_t *nd, lstcon_rpc_t **crpc)
-{
- srpc_stat_reqst_t *srq;
- int rc;
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, 0, crpc);
- if (rc != 0)
- return rc;
-
- srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst;
-
- srq->str_sid = console_session.ses_id;
- srq->str_type = 0; /* XXX remove it */
-
- return 0;
-}
-
-lnet_process_id_t *
-lstcon_next_id(int idx, int nkiov, lnet_kiov_t *kiov)
-{
- lnet_process_id_t *pid;
- int i;
-
- i = idx / (CFS_PAGE_SIZE / sizeof(lnet_process_id_t));
-
- LASSERT (i < nkiov);
-
- pid = (lnet_process_id_t *)cfs_page_address(kiov[i].kiov_page);
-
- return &pid[idx % (CFS_PAGE_SIZE / sizeof(lnet_process_id_t))];
-}
-
-int
-lstcon_dstnodes_prep(lstcon_group_t *grp, int idx,
- int dist, int span, int nkiov, lnet_kiov_t *kiov)
-{
- lnet_process_id_t *pid;
- lstcon_ndlink_t *ndl;
- lstcon_node_t *nd;
- int start;
- int end;
- int i = 0;
-
- LASSERT (dist >= 1);
- LASSERT (span >= 1);
- LASSERT (grp->grp_nnode >= 1);
-
- if (span > grp->grp_nnode)
- return -EINVAL;
-
- start = ((idx / dist) * span) % grp->grp_nnode;
- end = ((idx / dist) * span + span - 1) % grp->grp_nnode;
-
- list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
- nd = ndl->ndl_node;
- if (i < start) {
- i ++;
- continue;
- }
-
- if (i > (end >= start ? end: grp->grp_nnode))
- break;
-
- pid = lstcon_next_id((i - start), nkiov, kiov);
- *pid = nd->nd_id;
- i++;
- }
-
- if (start <= end) /* done */
- return 0;
-
- list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
- if (i > grp->grp_nnode + end)
- break;
-
- nd = ndl->ndl_node;
- pid = lstcon_next_id((i - start), nkiov, kiov);
- *pid = nd->nd_id;
- i++;
- }
-
- return 0;
-}
-
-int
-lstcon_pingrpc_prep(lst_test_ping_param_t *param, srpc_test_reqst_t *req)
-{
- test_ping_req_t *prq = &req->tsr_u.ping;
-
- prq->png_size = param->png_size;
- prq->png_flags = param->png_flags;
- /* TODO dest */
- return 0;
-}
-
-int
-lstcon_bulkrpc_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req)
-{
- test_bulk_req_t *brq = &req->tsr_u.bulk;
-
- brq->blk_opc = param->blk_opc;
- brq->blk_npg = (param->blk_size + CFS_PAGE_SIZE - 1) / CFS_PAGE_SIZE;
- brq->blk_flags = param->blk_flags;
-
- return 0;
-}
-
-int
-lstcon_testrpc_prep(lstcon_node_t *nd, int transop,
- lstcon_test_t *test, lstcon_rpc_t **crpc)
-{
- lstcon_group_t *sgrp = test->tes_src_grp;
- lstcon_group_t *dgrp = test->tes_dst_grp;
- srpc_test_reqst_t *trq;
- srpc_bulk_t *bulk;
- int i;
- int n = 0;
- int rc = 0;
-
- if (transop == LST_TRANS_TSBCLIADD)
- n = sfw_id_pages(test->tes_span);
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, n, crpc);
- if (rc != 0)
- return rc;
-
- trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst;
-
- if (transop == LST_TRANS_TSBSRVADD) {
- int ndist = (sgrp->grp_nnode + test->tes_dist - 1) / test->tes_dist;
- int nspan = (dgrp->grp_nnode + test->tes_span - 1) / test->tes_span;
- int nmax = (ndist + nspan - 1) / nspan;
-
- trq->tsr_ndest = 0;
- trq->tsr_loop = nmax * test->tes_dist * test->tes_concur;
-
- } else {
- bulk = &(*crpc)->crp_rpc->crpc_bulk;
-
- for (i = 0; i < n; i++) {
- bulk->bk_iovs[i].kiov_offset = 0;
- bulk->bk_iovs[i].kiov_len = CFS_PAGE_SIZE;
- bulk->bk_iovs[i].kiov_page = cfs_alloc_page(CFS_ALLOC_STD);
-
- if (bulk->bk_iovs[i].kiov_page != NULL)
- continue;
-
- lstcon_rpc_put(*crpc);
- return -ENOMEM;
- }
-
- bulk->bk_sink = 0;
-
- LASSERT (transop == LST_TRANS_TSBCLIADD);
-
- rc = lstcon_dstnodes_prep(test->tes_dst_grp,
- test->tes_cliidx++, test->tes_dist,
- test->tes_span, n, &bulk->bk_iovs[0]);
- if (rc != 0) {
- lstcon_rpc_put(*crpc);
- return rc;
- }
-
- trq->tsr_ndest = test->tes_span;
- trq->tsr_loop = test->tes_loop;
- }
-
- trq->tsr_sid = console_session.ses_id;
- trq->tsr_bid = test->tes_hdr.tsb_id;
- trq->tsr_concur = test->tes_concur;
- trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0;
- trq->tsr_stop_onerr = test->tes_stop_onerr;
-
- switch (test->tes_type) {
- case LST_TEST_PING:
- trq->tsr_service = SRPC_SERVICE_PING;
- rc = lstcon_pingrpc_prep((lst_test_ping_param_t *)&test->tes_param[0], trq);
- break;
- case LST_TEST_BULK:
- trq->tsr_service = SRPC_SERVICE_BRW;
- rc = lstcon_bulkrpc_prep((lst_test_bulk_param_t *)&test->tes_param[0], trq);
- break;
- default:
- LBUG();
- break;
- }
-
- return rc;
-}
-
-void
-lstcon_rpc_stat_reply(int transop, srpc_msg_t *msg,
- lstcon_node_t *nd, lstcon_trans_stat_t *stat)
-{
- srpc_mksn_reply_t *mksn_rep;
- srpc_rmsn_reply_t *rmsn_rep;
- srpc_debug_reply_t *dbg_rep;
- srpc_batch_reply_t *bat_rep;
- srpc_test_reply_t *test_rep;
- srpc_stat_reply_t *stat_rep;
- int errno = 0;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- mksn_rep = &msg->msg_body.mksn_reply;
-
- if (mksn_rep->mksn_status == 0) {
- lstcon_sesop_stat_success(stat, 1);
- /* session timeout on remote node */
- nd->nd_timeout = mksn_rep->mksn_timeout;
- return;
- }
-
- LASSERT (mksn_rep->mksn_status == EBUSY ||
- mksn_rep->mksn_status == EINVAL);
-
- lstcon_sesop_stat_failure(stat, 1);
- errno = mksn_rep->mksn_status;
- break;
-
- case LST_TRANS_SESEND:
- rmsn_rep = &msg->msg_body.rmsn_reply;
- /* ESRCH is not an error for end session */
- if (rmsn_rep->rmsn_status == 0 ||
- rmsn_rep->rmsn_status == ESRCH) {
- lstcon_sesop_stat_success(stat, 1);
- return;
- }
-
- LASSERT (rmsn_rep->rmsn_status == EBUSY ||
- rmsn_rep->rmsn_status == EINVAL);
-
- lstcon_sesop_stat_failure(stat, 1);
- errno = rmsn_rep->rmsn_status;
- break;
-
- case LST_TRANS_SESQRY:
- case LST_TRANS_SESPING:
- dbg_rep = &msg->msg_body.dbg_reply;
-
- if (dbg_rep->dbg_status == ESRCH) {
- lstcon_sesqry_stat_unknown(stat, 1);
- return;
- }
-
- LASSERT (dbg_rep->dbg_status == 0);
-
- if (lstcon_session_match(dbg_rep->dbg_sid))
- lstcon_sesqry_stat_active(stat, 1);
- else
- lstcon_sesqry_stat_busy(stat, 1);
- return;
-
- case LST_TRANS_TSBRUN:
- case LST_TRANS_TSBSTOP:
- bat_rep = &msg->msg_body.bat_reply;
-
- if (bat_rep->bar_status == 0) {
- lstcon_tsbop_stat_success(stat, 1);
- return;
- }
-
- if (bat_rep->bar_status == EPERM &&
- transop == LST_TRANS_TSBSTOP) {
- lstcon_tsbop_stat_success(stat, 1);
- return;
- }
-
- lstcon_tsbop_stat_failure(stat, 1);
- errno = bat_rep->bar_status;
- break;
-
- case LST_TRANS_TSBCLIQRY:
- case LST_TRANS_TSBSRVQRY:
- bat_rep = &msg->msg_body.bat_reply;
-
- if (bat_rep->bar_active != 0)
- lstcon_tsbqry_stat_run(stat, 1);
- else
- lstcon_tsbqry_stat_idle(stat, 1);
-
- if (bat_rep->bar_status == 0)
- return;
-
- lstcon_tsbqry_stat_failure(stat, 1);
- errno = bat_rep->bar_status;
- break;
-
- case LST_TRANS_TSBCLIADD:
- case LST_TRANS_TSBSRVADD:
- test_rep = &msg->msg_body.tes_reply;
-
- if (test_rep->tsr_status == 0) {
- lstcon_tsbop_stat_success(stat, 1);
- return;
- }
-
- lstcon_tsbop_stat_failure(stat, 1);
- errno = test_rep->tsr_status;
- break;
-
- case LST_TRANS_STATQRY:
- stat_rep = &msg->msg_body.stat_reply;
-
- if (stat_rep->str_status == 0) {
- lstcon_statqry_stat_success(stat, 1);
- return;
- }
-
- lstcon_statqry_stat_failure(stat, 1);
- errno = stat_rep->str_status;
- break;
-
- default:
- LBUG();
- }
-
- if (stat->trs_fwk_errno == 0)
- stat->trs_fwk_errno = errno;
-
- return;
-}
-
-int
-lstcon_rpc_trans_ndlist(struct list_head *ndlist,
- struct list_head *translist, int transop,
- void *arg, lstcon_rpc_cond_func_t condition,
- lstcon_rpc_trans_t **transpp)
-{
- lstcon_rpc_trans_t *trans;
- lstcon_ndlink_t *ndl;
- lstcon_node_t *nd;
- lstcon_rpc_t *rpc;
- int rc;
-
- /* Creating session RPG for list of nodes */
-
- rc = lstcon_rpc_trans_prep(translist, transop, &trans);
- if (rc != 0) {
- CERROR("Can't create transaction %d: %d\n", transop, rc);
- return rc;
- }
-
- list_for_each_entry(ndl, ndlist, ndl_link) {
- rc = condition == NULL ? 1 :
- condition(transop, ndl->ndl_node, arg);
-
- if (rc == 0)
- continue;
-
- if (rc < 0) {
- CDEBUG(D_NET, "Condition error while creating RPC "
- " for transaction %d: %d\n", transop, rc);
- break;
- }
-
- nd = ndl->ndl_node;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- case LST_TRANS_SESEND:
- rc = lstcon_sesrpc_prep(nd, transop, &rpc);
- break;
- case LST_TRANS_SESQRY:
- case LST_TRANS_SESPING:
- rc = lstcon_dbgrpc_prep(nd, &rpc);
- break;
- case LST_TRANS_TSBCLIADD:
- case LST_TRANS_TSBSRVADD:
- rc = lstcon_testrpc_prep(nd, transop,
- (lstcon_test_t *)arg, &rpc);
- break;
- case LST_TRANS_TSBRUN:
- case LST_TRANS_TSBSTOP:
- case LST_TRANS_TSBCLIQRY:
- case LST_TRANS_TSBSRVQRY:
- rc = lstcon_batrpc_prep(nd, transop,
- (lstcon_tsb_hdr_t *)arg, &rpc);
- break;
- case LST_TRANS_STATQRY:
- rc = lstcon_statrpc_prep(nd, &rpc);
- break;
- default:
- rc = -EINVAL;
- break;
- }
-
- if (rc != 0) {
- CERROR("Failed to create RPC for transaction %s: %d\n",
- lstcon_rpc_trans_name(transop), rc);
- break;
- }
-
- lstcon_rpc_trans_addreq(trans, rpc);
- }
-
- if (rc == 0) {
- *transpp = trans;
- return 0;
- }
-
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-void
-lstcon_rpc_pinger(void *arg)
-{
- stt_timer_t *ptimer = (stt_timer_t *)arg;
- lstcon_rpc_trans_t *trans;
- lstcon_rpc_t *crpc;
- srpc_msg_t *rep;
- srpc_debug_reqst_t *drq;
- lstcon_ndlink_t *ndl;
- lstcon_node_t *nd;
- time_t intv;
- int count = 0;
- int rc;
-
- /* RPC pinger is a special case of transaction,
- * it's called by timer at 8 seconds interval.
- */
- mutex_down(&console_session.ses_mutex);
-
- if (console_session.ses_shutdown || console_session.ses_expired) {
- mutex_up(&console_session.ses_mutex);
- return;
- }
-
- if (!console_session.ses_expired &&
- cfs_time_current_sec() - console_session.ses_laststamp >
- console_session.ses_timeout)
- console_session.ses_expired = 1;
-
- trans = console_session.ses_ping;
-
- LASSERT (trans != NULL);
-
- list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) {
- nd = ndl->ndl_node;
-
- if (console_session.ses_expired) {
- /* idle console, end session on all nodes */
- if (nd->nd_state != LST_NODE_ACTIVE)
- continue;
-
- rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND, &crpc);
- if (rc != 0) {
- CERROR("Out of memory\n");
- break;
- }
-
- lstcon_rpc_trans_addreq(trans, crpc);
- lstcon_rpc_post(crpc);
-
- continue;
- }
-
- crpc = &nd->nd_ping;
-
- if (crpc->crp_rpc != NULL) {
- LASSERT (crpc->crp_trans == trans);
- LASSERT (!list_empty(&crpc->crp_link));
-
- spin_lock(&crpc->crp_rpc->crpc_lock);
-
- LASSERT (crpc->crp_posted);
-
- if (!crpc->crp_finished) {
- /* in flight */
- spin_unlock(&crpc->crp_rpc->crpc_lock);
- continue;
- }
-
- spin_unlock(&crpc->crp_rpc->crpc_lock);
-
- lstcon_rpc_get_reply(crpc, &rep);
-
- list_del_init(&crpc->crp_link);
-
- lstcon_rpc_put(crpc);
- }
-
- if (nd->nd_state != LST_NODE_ACTIVE)
- continue;
-
- intv = cfs_duration_sec(cfs_time_sub(cfs_time_current(),
- nd->nd_stamp));
- if (intv < nd->nd_timeout / 2)
- continue;
-
- rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG, 0, 0, crpc);
- if (rc != 0) {
- CERROR("Out of memory\n");
- break;
- }
-
- drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
-
- drq->dbg_sid = console_session.ses_id;
- drq->dbg_flags = 0;
-
- lstcon_rpc_trans_addreq(trans, crpc);
- lstcon_rpc_post(crpc);
-
- count ++;
- }
-
- if (console_session.ses_expired) {
- mutex_up(&console_session.ses_mutex);
- return;
- }
-
- CDEBUG(D_NET, "Ping %d nodes in session\n", count);
-
- ptimer->stt_expires = cfs_time_current_sec() + LST_PING_INTERVAL;
- stt_add_timer(ptimer);
-
- mutex_up(&console_session.ses_mutex);
-}
-
-int
-lstcon_rpc_pinger_start(void)
-{
- stt_timer_t *ptimer;
- int rc;
-
- LASSERT (list_empty(&console_session.ses_rpc_freelist));
- LASSERT (atomic_read(&console_session.ses_rpc_counter) == 0);
-
- rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING,
- &console_session.ses_ping);
- if (rc != 0) {
- CERROR("Failed to create console pinger\n");
- return rc;
- }
-
- ptimer = &console_session.ses_ping_timer;
- ptimer->stt_expires = cfs_time_current_sec() + LST_PING_INTERVAL;
-
- stt_add_timer(ptimer);
-
- return 0;
-}
-
-void
-lstcon_rpc_pinger_stop(void)
-{
- LASSERT (console_session.ses_shutdown);
-
- stt_del_timer(&console_session.ses_ping_timer);
-
- lstcon_rpc_trans_abort(console_session.ses_ping, -ESHUTDOWN);
- lstcon_rpc_trans_stat(console_session.ses_ping, lstcon_trans_stat());
- lstcon_rpc_trans_destroy(console_session.ses_ping);
-
- memset(lstcon_trans_stat(), 0, sizeof(lstcon_trans_stat_t));
-
- console_session.ses_ping = NULL;
-}
-
-void
-lstcon_rpc_cleanup_wait(void)
-{
- lstcon_rpc_trans_t *trans;
- lstcon_rpc_t *crpc;
- struct list_head *pacer;
- struct list_head zlist;
-
- /* Called with hold of global mutex */
-
- LASSERT (console_session.ses_shutdown);
-
- while (!list_empty(&console_session.ses_trans_list)) {
- list_for_each(pacer, &console_session.ses_trans_list) {
- trans = list_entry(pacer, lstcon_rpc_trans_t, tas_link);
-
- CDEBUG(D_NET, "Session closed, wakeup transaction %s\n",
- lstcon_rpc_trans_name(trans->tas_opc));
-
- cfs_waitq_signal(&trans->tas_waitq);
- }
-
- mutex_up(&console_session.ses_mutex);
-
- CWARN("Session is shutting down, "
- "waiting for termination of transactions\n");
- cfs_pause(cfs_time_seconds(1));
-
- mutex_down(&console_session.ses_mutex);
- }
-
- spin_lock(&console_session.ses_rpc_lock);
-
- lst_wait_until((atomic_read(&console_session.ses_rpc_counter) == 0),
- console_session.ses_rpc_lock,
- "Network is not accessable or target is down, "
- "waiting for %d console RPCs to being recycled\n",
- atomic_read(&console_session.ses_rpc_counter));
-
- list_add(&zlist, &console_session.ses_rpc_freelist);
- list_del_init(&console_session.ses_rpc_freelist);
-
- spin_unlock(&console_session.ses_rpc_lock);
-
- while (!list_empty(&zlist)) {
- crpc = list_entry(zlist.next, lstcon_rpc_t, crp_link);
-
- list_del(&crpc->crp_link);
- LIBCFS_FREE(crpc, sizeof(lstcon_rpc_t));
- }
-}
-
-int
-lstcon_rpc_module_init(void)
-{
- CFS_INIT_LIST_HEAD(&console_session.ses_ping_timer.stt_list);
- console_session.ses_ping_timer.stt_func = lstcon_rpc_pinger;
- console_session.ses_ping_timer.stt_data = &console_session.ses_ping_timer;
-
- console_session.ses_ping = NULL;
-
- spin_lock_init(&console_session.ses_rpc_lock);
- atomic_set(&console_session.ses_rpc_counter, 0);
- CFS_INIT_LIST_HEAD(&console_session.ses_rpc_freelist);
-
- return 0;
-}
-
-void
-lstcon_rpc_module_fini(void)
-{
- LASSERT (list_empty(&console_session.ses_rpc_freelist));
- LASSERT (atomic_read(&console_session.ses_rpc_counter) == 0);
-}
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Console rpc
- */
-
-#ifndef __LST_CONRPC_H__
-#define __LST_CONRPC_H__
-
-#ifdef __KERNEL__
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-types.h>
-#include <lnet/lnetst.h>
-#include "rpc.h"
-#include "selftest.h"
-
-/* Console rpc and rpc transaction */
-#define LST_TRANS_TIMEOUT 30
-#define LST_TRANS_MIN_TIMEOUT 3
-#define LST_PING_INTERVAL 8
-
-struct lstcon_rpc_trans;
-struct lstcon_tsb_hdr;
-struct lstcon_test;
-struct lstcon_node;
-
-typedef struct lstcon_rpc {
- struct list_head crp_link; /* chain on rpc transaction */
- srpc_client_rpc_t *crp_rpc; /* client rpc */
- struct lstcon_node *crp_node; /* destination node */
- struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */
-
- int crp_posted:1; /* rpc is posted */
- int crp_finished:1; /* rpc is finished */
- int crp_unpacked:1; /* reply is unpacked */
- int crp_static:1; /* not from RPC buffer */
- int crp_status; /* console rpc errors */
- cfs_time_t crp_stamp; /* replied time stamp */
-} lstcon_rpc_t;
-
-typedef struct lstcon_rpc_trans {
- struct list_head tas_olink; /* link chain on owner list */
- struct list_head tas_link; /* link chain on global list */
- int tas_opc; /* operation code of transaction */
- cfs_waitq_t tas_waitq; /* wait queue head */
- atomic_t tas_remaining; /* # of un-scheduled rpcs */
- struct list_head tas_rpcs_list; /* queued requests */
-} lstcon_rpc_trans_t;
-
-#define LST_TRANS_PRIVATE 0x1000
-
-#define LST_TRANS_SESNEW (LST_TRANS_PRIVATE | 0x01)
-#define LST_TRANS_SESEND (LST_TRANS_PRIVATE | 0x02)
-#define LST_TRANS_SESQRY 0x03
-#define LST_TRANS_SESPING 0x04
-
-#define LST_TRANS_TSBCLIADD (LST_TRANS_PRIVATE | 0x11)
-#define LST_TRANS_TSBSRVADD (LST_TRANS_PRIVATE | 0x12)
-#define LST_TRANS_TSBRUN (LST_TRANS_PRIVATE | 0x13)
-#define LST_TRANS_TSBSTOP (LST_TRANS_PRIVATE | 0x14)
-#define LST_TRANS_TSBCLIQRY 0x15
-#define LST_TRANS_TSBSRVQRY 0x16
-
-#define LST_TRANS_STATQRY 0x21
-
-typedef int (* lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *);
-typedef int (* lstcon_rpc_readent_func_t)(int, srpc_msg_t *, lstcon_rpc_ent_t *);
-
-int lstcon_sesrpc_prep(struct lstcon_node *nd,
- int transop, lstcon_rpc_t **crpc);
-int lstcon_dbgrpc_prep(struct lstcon_node *nd, lstcon_rpc_t **crpc);
-int lstcon_batrpc_prep(struct lstcon_node *nd, int transop,
- struct lstcon_tsb_hdr *tsb, lstcon_rpc_t **crpc);
-int lstcon_testrpc_prep(struct lstcon_node *nd, int transop,
- struct lstcon_test *test, lstcon_rpc_t **crpc);
-int lstcon_statrpc_prep(struct lstcon_node *nd, lstcon_rpc_t **crpc);
-void lstcon_rpc_put(lstcon_rpc_t *crpc);
-int lstcon_rpc_trans_prep(struct list_head *translist,
- int transop, lstcon_rpc_trans_t **transpp);
-int lstcon_rpc_trans_ndlist(struct list_head *ndlist,
- struct list_head *translist, int transop,
- void *arg, lstcon_rpc_cond_func_t condition,
- lstcon_rpc_trans_t **transpp);
-void lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans,
- lstcon_trans_stat_t *stat);
-int lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
- struct list_head *head_up,
- lstcon_rpc_readent_func_t readent);
-void lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error);
-void lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans);
-void lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *req);
-int lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout);
-int lstcon_rpc_pinger_start(void);
-void lstcon_rpc_pinger_stop(void);
-void lstcon_rpc_cleanup_wait(void);
-int lstcon_rpc_module_init(void);
-void lstcon_rpc_module_fini(void);
-
-#endif
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Infrastructure of LST console
- */
-#ifdef __KERNEL__
-
-#include <libcfs/libcfs.h>
-#include <lnet/lib-lnet.h>
-#include "console.h"
-#include "conrpc.h"
-
-#define LST_NODE_STATE_COUNTER(nd, p) \
-do { \
- if ((nd)->nd_state == LST_NODE_ACTIVE) \
- (p)->nle_nactive ++; \
- else if ((nd)->nd_state == LST_NODE_BUSY) \
- (p)->nle_nbusy ++; \
- else if ((nd)->nd_state == LST_NODE_DOWN) \
- (p)->nle_ndown ++; \
- else \
- (p)->nle_nunknown ++; \
- (p)->nle_nnode ++; \
-} while (0)
-
-lstcon_session_t console_session;
-
-void
-lstcon_node_get(lstcon_node_t *nd)
-{
- LASSERT (nd->nd_ref >= 1);
-
- nd->nd_ref++;
-}
-
-static int
-lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create)
-{
- lstcon_ndlink_t *ndl;
- unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE;
-
- LASSERT (id.nid != LNET_NID_ANY);
-
- list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx], ndl_hlink) {
- if (ndl->ndl_node->nd_id.nid != id.nid ||
- ndl->ndl_node->nd_id.pid != id.pid)
- continue;
-
- lstcon_node_get(ndl->ndl_node);
- *ndpp = ndl->ndl_node;
- return 0;
- }
-
- if (!create)
- return -ENOENT;
-
- LIBCFS_ALLOC(*ndpp, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t));
- if (*ndpp == NULL)
- return -ENOMEM;
-
- ndl = (lstcon_ndlink_t *)(*ndpp + 1);
-
- ndl->ndl_node = *ndpp;
-
- ndl->ndl_node->nd_ref = 1;
- ndl->ndl_node->nd_id = id;
- ndl->ndl_node->nd_stamp = cfs_time_current();
- ndl->ndl_node->nd_state = LST_NODE_UNKNOWN;
- ndl->ndl_node->nd_timeout = 0;
- memset(&ndl->ndl_node->nd_ping, 0, sizeof(lstcon_rpc_t));
-
- /* queued in global hash & list, no refcount is taken by
- * global hash & list, if caller release his refcount,
- * node will be released */
- list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]);
- list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list);
-
- return 0;
-}
-
-void
-lstcon_node_put(lstcon_node_t *nd)
-{
- lstcon_ndlink_t *ndl;
-
- LASSERT (nd->nd_ref > 0);
-
- if (--nd->nd_ref > 0)
- return;
-
- ndl = (lstcon_ndlink_t *)(nd + 1);
-
- LASSERT (!list_empty(&ndl->ndl_link));
- LASSERT (!list_empty(&ndl->ndl_hlink));
-
- /* remove from session */
- list_del(&ndl->ndl_link);
- list_del(&ndl->ndl_hlink);
-
- LIBCFS_FREE(nd, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t));
-}
-
-static int
-lstcon_ndlink_find(struct list_head *hash,
- lnet_process_id_t id, lstcon_ndlink_t **ndlpp, int create)
-{
- unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
- lstcon_ndlink_t *ndl;
- lstcon_node_t *nd;
- int rc;
-
- if (id.nid == LNET_NID_ANY)
- return -EINVAL;
-
- /* search in hash */
- list_for_each_entry(ndl, &hash[idx], ndl_hlink) {
- if (ndl->ndl_node->nd_id.nid != id.nid ||
- ndl->ndl_node->nd_id.pid != id.pid)
- continue;
-
- *ndlpp = ndl;
- return 0;
- }
-
- if (create == 0)
- return -ENOENT;
-
- /* find or create in session hash */
- rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0);
- if (rc != 0)
- return rc;
-
- LIBCFS_ALLOC(ndl, sizeof(lstcon_ndlink_t));
- if (ndl == NULL) {
- lstcon_node_put(nd);
- return -ENOMEM;
- }
-
- *ndlpp = ndl;
-
- ndl->ndl_node = nd;
- CFS_INIT_LIST_HEAD(&ndl->ndl_link);
- list_add_tail(&ndl->ndl_hlink, &hash[idx]);
-
- return 0;
-}
-
-static void
-lstcon_ndlink_release(lstcon_ndlink_t *ndl)
-{
- LASSERT (list_empty(&ndl->ndl_link));
- LASSERT (!list_empty(&ndl->ndl_hlink));
-
- list_del(&ndl->ndl_hlink); /* delete from hash */
- lstcon_node_put(ndl->ndl_node);
-
- LIBCFS_FREE(ndl, sizeof(*ndl));
-}
-
-static int
-lstcon_group_alloc(char *name, lstcon_group_t **grpp)
-{
- lstcon_group_t *grp;
- int i;
-
- LIBCFS_ALLOC(grp, offsetof(lstcon_group_t,
- grp_ndl_hash[LST_NODE_HASHSIZE]));
- if (grp == NULL)
- return -ENOMEM;
-
- memset(grp, 0, offsetof(lstcon_group_t,
- grp_ndl_hash[LST_NODE_HASHSIZE]));
-
- grp->grp_ref = 1;
- if (name != NULL)
- strcpy(grp->grp_name, name);
-
- CFS_INIT_LIST_HEAD(&grp->grp_link);
- CFS_INIT_LIST_HEAD(&grp->grp_ndl_list);
- CFS_INIT_LIST_HEAD(&grp->grp_trans_list);
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++)
- CFS_INIT_LIST_HEAD(&grp->grp_ndl_hash[i]);
-
- *grpp = grp;
-
- return 0;
-}
-
-static void
-lstcon_group_addref(lstcon_group_t *grp)
-{
- grp->grp_ref ++;
-}
-
-static void lstcon_group_ndlink_release(lstcon_group_t *, lstcon_ndlink_t *);
-
-static void
-lstcon_group_drain(lstcon_group_t *grp, int keep)
-{
- lstcon_ndlink_t *ndl;
- lstcon_ndlink_t *tmp;
-
- list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) {
- if ((ndl->ndl_node->nd_state & keep) == 0)
- lstcon_group_ndlink_release(grp, ndl);
- }
-}
-
-static void
-lstcon_group_decref(lstcon_group_t *grp)
-{
- int i;
-
- if (--grp->grp_ref > 0)
- return;
-
- if (!list_empty(&grp->grp_link))
- list_del(&grp->grp_link);
-
- lstcon_group_drain(grp, 0);
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++) {
- LASSERT (list_empty(&grp->grp_ndl_hash[i]));
- }
-
- LIBCFS_FREE(grp, offsetof(lstcon_group_t,
- grp_ndl_hash[LST_NODE_HASHSIZE]));
-}
-
-static int
-lstcon_group_find(char *name, lstcon_group_t **grpp)
-{
- lstcon_group_t *grp;
-
- list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
- if (strncmp(grp->grp_name, name, LST_NAME_SIZE) != 0)
- continue;
-
- lstcon_group_addref(grp); /* +1 ref for caller */
- *grpp = grp;
- return 0;
- }
-
- return -ENOENT;
-}
-
-static void
-lstcon_group_put(lstcon_group_t *grp)
-{
- lstcon_group_decref(grp);
-}
-
-static int
-lstcon_group_ndlink_find(lstcon_group_t *grp, lnet_process_id_t id,
- lstcon_ndlink_t **ndlpp, int create)
-{
- int rc;
-
- rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create);
- if (rc != 0)
- return rc;
-
- if (!list_empty(&(*ndlpp)->ndl_link))
- return 0;
-
- list_add_tail(&(*ndlpp)->ndl_link, &grp->grp_ndl_list);
- grp->grp_nnode ++;
-
- return 0;
-}
-
-static void
-lstcon_group_ndlink_release(lstcon_group_t *grp, lstcon_ndlink_t *ndl)
-{
- list_del_init(&ndl->ndl_link);
- lstcon_ndlink_release(ndl);
- grp->grp_nnode --;
-}
-
-static void
-lstcon_group_ndlink_move(lstcon_group_t *old,
- lstcon_group_t *new, lstcon_ndlink_t *ndl)
-{
- unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) %
- LST_NODE_HASHSIZE;
-
- list_del(&ndl->ndl_hlink);
- list_del(&ndl->ndl_link);
- old->grp_nnode --;
-
- list_add_tail(&ndl->ndl_hlink, &new->grp_ndl_hash[idx]);
- list_add_tail(&ndl->ndl_link, &new->grp_ndl_list);
- new->grp_nnode ++;
-
- return;
-}
-
-static void
-lstcon_group_move(lstcon_group_t *old, lstcon_group_t *new)
-{
- lstcon_ndlink_t *ndl;
-
- while (!list_empty(&old->grp_ndl_list)) {
- ndl = list_entry(old->grp_ndl_list.next,
- lstcon_ndlink_t, ndl_link);
- lstcon_group_ndlink_move(old, new, ndl);
- }
-}
-
-int
-lstcon_sesrpc_condition(int transop, lstcon_node_t *nd, void *arg)
-{
- lstcon_group_t *grp = (lstcon_group_t *)arg;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- if (nd->nd_state == LST_NODE_ACTIVE)
- return 0;
- break;
-
- case LST_TRANS_SESEND:
- if (nd->nd_state != LST_NODE_ACTIVE)
- return 0;
-
- if (grp != NULL && nd->nd_ref > 1)
- return 0;
- break;
-
- case LST_TRANS_SESQRY:
- break;
-
- default:
- LBUG();
- }
-
- return 1;
-}
-
-int
-lstcon_sesrpc_readent(int transop, srpc_msg_t *msg,
- lstcon_rpc_ent_t *ent_up)
-{
- srpc_debug_reply_t *rep;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- case LST_TRANS_SESEND:
- return 0;
-
- case LST_TRANS_SESQRY:
- rep = &msg->msg_body.dbg_reply;
-
- if (copy_to_user(&ent_up->rpe_priv[0],
- &rep->dbg_timeout, sizeof(int)) ||
- copy_to_user(&ent_up->rpe_payload[0],
- &rep->dbg_name, LST_NAME_SIZE))
- return -EFAULT;
-
- return 0;
-
- default:
- LBUG();
- }
-
- return 0;
-}
-
-static int
-lstcon_group_nodes_add(lstcon_group_t *grp, int count,
- lnet_process_id_t *ids_up, struct list_head *result_up)
-{
- lstcon_rpc_trans_t *trans;
- lstcon_ndlink_t *ndl;
- lstcon_group_t *tmp;
- lnet_process_id_t id;
- int i;
- int rc;
-
- rc = lstcon_group_alloc(NULL, &tmp);
- if (rc != 0) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- for (i = 0 ; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- break;
- }
-
- /* skip if it's in this group already */
- rc = lstcon_group_ndlink_find(grp, id, &ndl, 0);
- if (rc == 0)
- continue;
-
- /* add to tmp group */
- rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1);
- if (rc != 0) {
- CERROR("Can't create ndlink, out of memory\n");
- break;
- }
- }
-
- if (rc != 0) {
- lstcon_group_put(tmp);
- return rc;
- }
-
- rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
- &tmp->grp_trans_list, LST_TRANS_SESNEW,
- tmp, lstcon_sesrpc_condition, &trans);
- if (rc != 0) {
- CERROR("Can't create transaction: %d\n", rc);
- lstcon_group_put(tmp);
- return rc;
- }
-
- /* post all RPCs */
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_sesrpc_readent);
- /* destroy all RPGs */
- lstcon_rpc_trans_destroy(trans);
-
- lstcon_group_move(tmp, grp);
- lstcon_group_put(tmp);
-
- return rc;
-}
-
-static int
-lstcon_group_nodes_remove(lstcon_group_t *grp,
- int count, lnet_process_id_t *ids_up,
- struct list_head *result_up)
-{
- lstcon_rpc_trans_t *trans;
- lstcon_ndlink_t *ndl;
- lstcon_group_t *tmp;
- lnet_process_id_t id;
- int rc;
- int i;
-
- /* End session and remove node from the group */
-
- rc = lstcon_group_alloc(NULL, &tmp);
- if (rc != 0) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- goto error;
- }
-
- /* move node to tmp group */
- if (lstcon_group_ndlink_find(grp, id, &ndl, 0) == 0)
- lstcon_group_ndlink_move(grp, tmp, ndl);
- }
-
- rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
- &tmp->grp_trans_list, LST_TRANS_SESEND,
- tmp, lstcon_sesrpc_condition, &trans);
- if (rc != 0) {
- CERROR("Can't create transaction: %d\n", rc);
- goto error;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
- /* release nodes anyway, because we can't rollback status */
- lstcon_group_put(tmp);
-
- return rc;
-error:
- lstcon_group_move(tmp, grp);
- lstcon_group_put(tmp);
-
- return rc;
-}
-
-int
-lstcon_group_add(char *name)
-{
- lstcon_group_t *grp;
- int rc;
-
- rc = (lstcon_group_find(name, &grp) == 0)? -EEXIST: 0;
- if (rc != 0) {
- /* find a group with same name */
- lstcon_group_put(grp);
- return rc;
- }
-
- rc = lstcon_group_alloc(name, &grp);
- if (rc != 0) {
- CERROR("Can't allocate descriptor for group %s\n", name);
- return -ENOMEM;
- }
-
- list_add_tail(&grp->grp_link, &console_session.ses_grp_list);
-
- return rc;
-}
-
-int
-lstcon_nodes_add(char *name, int count,
- lnet_process_id_t *ids_up, struct list_head *result_up)
-{
- lstcon_group_t *grp;
- int rc;
-
- LASSERT (count > 0);
- LASSERT (ids_up != NULL);
-
- rc = lstcon_group_find(name, &grp);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find group %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by other threads or test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_put(grp);
-
- return -EBUSY;
- }
-
- rc = lstcon_group_nodes_add(grp, count, ids_up, result_up);
-
- lstcon_group_put(grp);
-
- return rc;
-}
-
-int
-lstcon_group_del(char *name)
-{
- lstcon_rpc_trans_t *trans;
- lstcon_group_t *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find group: %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by others threads or test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_put(grp);
- return -EBUSY;
- }
-
- rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
- &grp->grp_trans_list, LST_TRANS_SESEND,
- grp, lstcon_sesrpc_condition, &trans);
- if (rc != 0) {
- CERROR("Can't create transaction: %d\n", rc);
- lstcon_group_put(grp);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- lstcon_rpc_trans_destroy(trans);
-
- lstcon_group_put(grp);
- /* -ref for session, it's destroyed,
- * status can't be rolled back, destroy group anway */
- lstcon_group_put(grp);
-
- return rc;
-}
-
-int
-lstcon_group_clean(char *name, int args)
-{
- lstcon_group_t *grp = NULL;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find group %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_put(grp);
- return -EBUSY;
- }
-
- args = (LST_NODE_ACTIVE | LST_NODE_BUSY |
- LST_NODE_DOWN | LST_NODE_UNKNOWN) & ~args;
-
- lstcon_group_drain(grp, args);
-
- lstcon_group_put(grp);
- /* release empty group */
- if (list_empty(&grp->grp_ndl_list))
- lstcon_group_put(grp);
-
- return 0;
-}
-
-int
-lstcon_nodes_remove(char *name, int count,
- lnet_process_id_t *ids_up, struct list_head *result_up)
-{
- lstcon_group_t *grp = NULL;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find group: %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_put(grp);
- return -EBUSY;
- }
-
- rc = lstcon_group_nodes_remove(grp, count, ids_up, result_up);
-
- lstcon_group_put(grp);
- /* release empty group */
- if (list_empty(&grp->grp_ndl_list))
- lstcon_group_put(grp);
-
- return rc;
-}
-
-int
-lstcon_group_refresh(char *name, struct list_head *result_up)
-{
- lstcon_rpc_trans_t *trans;
- lstcon_group_t *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find group: %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_put(grp);
- return -EBUSY;
- }
-
- /* re-invite all inactive nodes int the group */
- rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
- &grp->grp_trans_list, LST_TRANS_SESNEW,
- grp, lstcon_sesrpc_condition, &trans);
- if (rc != 0) {
- /* local error, return */
- CDEBUG(D_NET, "Can't create transaction: %d\n", rc);
- lstcon_group_put(grp);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
- /* -ref for me */
- lstcon_group_put(grp);
-
- return rc;
-}
-
-int
-lstcon_group_list(int index, int len, char *name_up)
-{
- lstcon_group_t *grp;
-
- LASSERT (index >= 0);
- LASSERT (name_up != NULL);
-
- list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
- if (index-- == 0) {
- return copy_to_user(name_up, grp->grp_name, len) ?
- -EFAULT : 0;
- }
- }
-
- return -ENOENT;
-}
-
-static int
-lstcon_nodes_getent(struct list_head *head, int *index_p,
- int *count_p, lstcon_node_ent_t *dents_up)
-{
- lstcon_ndlink_t *ndl;
- lstcon_node_t *nd;
- int count = 0;
- int index = 0;
-
- LASSERT (index_p != NULL && count_p != NULL);
- LASSERT (dents_up != NULL);
- LASSERT (*index_p >= 0);
- LASSERT (*count_p > 0);
-
- list_for_each_entry(ndl, head, ndl_link) {
- if (index++ < *index_p)
- continue;
-
- if (count >= *count_p)
- break;
-
- nd = ndl->ndl_node;
- if (copy_to_user(&dents_up[count].nde_id,
- &nd->nd_id, sizeof(nd->nd_id)) ||
- copy_to_user(&dents_up[count].nde_state,
- &nd->nd_state, sizeof(nd->nd_state)))
- return -EFAULT;
-
- count ++;
- }
-
- if (index <= *index_p)
- return -ENOENT;
-
- *count_p = count;
- *index_p = index;
-
- return 0;
-}
-
-int
-lstcon_group_info(char *name, lstcon_ndlist_ent_t *gents_p,
- int *index_p, int *count_p, lstcon_node_ent_t *dents_up)
-{
- lstcon_ndlist_ent_t *gentp;
- lstcon_group_t *grp;
- lstcon_ndlink_t *ndl;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find group %s\n", name);
- return rc;
- }
-
- if (dents_up != 0) {
- /* verbose query */
- rc = lstcon_nodes_getent(&grp->grp_ndl_list,
- index_p, count_p, dents_up);
- lstcon_group_put(grp);
-
- return rc;
- }
-
- /* non-verbose query */
- LIBCFS_ALLOC(gentp, sizeof(lstcon_ndlist_ent_t));
- if (gentp == NULL) {
- CERROR("Can't allocate ndlist_ent\n");
- lstcon_group_put(grp);
-
- return -ENOMEM;
- }
-
- memset(gentp, 0, sizeof(lstcon_ndlist_ent_t));
-
- list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp);
-
- rc = copy_to_user(gents_p, gentp,
- sizeof(lstcon_ndlist_ent_t)) ? -EFAULT: 0;
-
- LIBCFS_FREE(gentp, sizeof(lstcon_ndlist_ent_t));
-
- lstcon_group_put(grp);
-
- return 0;
-}
-
-int
-lstcon_batch_find(char *name, lstcon_batch_t **batpp)
-{
- lstcon_batch_t *bat;
-
- list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
- if (strncmp(bat->bat_name, name, LST_NAME_SIZE) == 0) {
- *batpp = bat;
- return 0;
- }
- }
-
- return -ENOENT;
-}
-
-int
-lstcon_batch_add(char *name)
-{
- lstcon_batch_t *bat;
- int i;
- int rc;
-
- rc = (lstcon_batch_find(name, &bat) == 0)? -EEXIST: 0;
- if (rc != 0) {
- CDEBUG(D_NET, "Batch %s already exists\n", name);
- return rc;
- }
-
- LIBCFS_ALLOC(bat, sizeof(lstcon_batch_t));
- if (bat == NULL) {
- CERROR("Can't allocate descriptor for batch %s\n", name);
- return -ENOMEM;
- }
-
- LIBCFS_ALLOC(bat->bat_cli_hash,
- sizeof(struct list_head) * LST_NODE_HASHSIZE);
- if (bat->bat_cli_hash == NULL) {
- CERROR("Can't allocate hash for batch %s\n", name);
- LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
-
- return -ENOMEM;
- }
-
- LIBCFS_ALLOC(bat->bat_srv_hash,
- sizeof(struct list_head) * LST_NODE_HASHSIZE);
- if (bat->bat_srv_hash == NULL) {
- CERROR("Can't allocate hash for batch %s\n", name);
- LIBCFS_FREE(bat->bat_cli_hash, LST_NODE_HASHSIZE);
- LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
-
- return -ENOMEM;
- }
-
- strcpy(bat->bat_name, name);
- bat->bat_hdr.tsb_index = 0;
- bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie;
-
- bat->bat_ntest = 0;
- bat->bat_state = LST_BATCH_IDLE;
-
- CFS_INIT_LIST_HEAD(&bat->bat_cli_list);
- CFS_INIT_LIST_HEAD(&bat->bat_srv_list);
- CFS_INIT_LIST_HEAD(&bat->bat_test_list);
- CFS_INIT_LIST_HEAD(&bat->bat_trans_list);
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++) {
- CFS_INIT_LIST_HEAD(&bat->bat_cli_hash[i]);
- CFS_INIT_LIST_HEAD(&bat->bat_srv_hash[i]);
- }
-
- list_add_tail(&bat->bat_link, &console_session.ses_bat_list);
-
- return rc;
-}
-
-int
-lstcon_batch_list(int index, int len, char *name_up)
-{
- lstcon_batch_t *bat;
-
- LASSERT (name_up != NULL);
- LASSERT (index >= 0);
-
- list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
- if (index-- == 0) {
- return copy_to_user(name_up,bat->bat_name, len) ?
- -EFAULT: 0;
- }
- }
-
- return -ENOENT;
-}
-
-int
-lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up, int server,
- int testidx, int *index_p, int *ndent_p,
- lstcon_node_ent_t *dents_up)
-{
- lstcon_test_batch_ent_t *entp;
- struct list_head *clilst;
- struct list_head *srvlst;
- lstcon_test_t *test = NULL;
- lstcon_batch_t *bat;
- lstcon_ndlink_t *ndl;
- int rc;
-
- rc = lstcon_batch_find(name, &bat);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return -ENOENT;
- }
-
- if (testidx > 0) {
- /* query test, test index start from 1 */
- list_for_each_entry(test, &bat->bat_test_list, tes_link) {
- if (testidx-- == 1)
- break;
- }
-
- if (testidx > 0) {
- CDEBUG(D_NET, "Can't find specified test in batch\n");
- return -ENOENT;
- }
- }
-
- clilst = (test == NULL) ? &bat->bat_cli_list :
- &test->tes_src_grp->grp_ndl_list;
- srvlst = (test == NULL) ? &bat->bat_srv_list :
- &test->tes_dst_grp->grp_ndl_list;
-
- if (dents_up != NULL) {
- rc = lstcon_nodes_getent((server ? srvlst: clilst),
- index_p, ndent_p, dents_up);
- return rc;
- }
-
- /* non-verbose query */
- LIBCFS_ALLOC(entp, sizeof(lstcon_test_batch_ent_t));
- if (entp == NULL)
- return -ENOMEM;
-
- memset(entp, 0, sizeof(lstcon_test_batch_ent_t));
-
- if (test == NULL) {
- entp->u.tbe_batch.bae_ntest = bat->bat_ntest;
- entp->u.tbe_batch.bae_state = bat->bat_state;
-
- } else {
-
- entp->u.tbe_test.tse_type = test->tes_type;
- entp->u.tbe_test.tse_loop = test->tes_loop;
- entp->u.tbe_test.tse_concur = test->tes_concur;
- }
-
- list_for_each_entry(ndl, clilst, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_cli_nle);
-
- list_for_each_entry(ndl, srvlst, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle);
-
- rc = copy_to_user(ent_up, entp,
- sizeof(lstcon_test_batch_ent_t)) ? -EFAULT : 0;
-
- LIBCFS_FREE(entp, sizeof(lstcon_test_batch_ent_t));
-
- return rc;
-}
-
-int
-lstcon_batrpc_condition(int transop, lstcon_node_t *nd, void *arg)
-{
- switch (transop) {
- case LST_TRANS_TSBRUN:
- if (nd->nd_state != LST_NODE_ACTIVE)
- return -ENETDOWN;
- break;
-
- case LST_TRANS_TSBSTOP:
- if (nd->nd_state != LST_NODE_ACTIVE)
- return 0;
- break;
-
- case LST_TRANS_TSBCLIQRY:
- case LST_TRANS_TSBSRVQRY:
- break;
- }
-
- return 1;
-}
-
-static int
-lstcon_batch_op(lstcon_batch_t *bat, int transop, struct list_head *result_up)
-{
- lstcon_rpc_trans_t *trans;
- int rc;
-
- rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list,
- &bat->bat_trans_list, transop,
- bat, lstcon_batrpc_condition, &trans);
- if (rc != 0) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-int
-lstcon_batch_run(char *name, int timeout, struct list_head *result_up)
-{
- lstcon_batch_t *bat;
- int rc;
-
- if (lstcon_batch_find(name, &bat) != 0) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return -ENOENT;
- }
-
- bat->bat_arg = timeout;
-
- rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up);
-
- /* mark batch as running if it's started in any node */
- if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0) != 0)
- bat->bat_state = LST_BATCH_RUNNING;
-
- return rc;
-}
-
-int
-lstcon_batch_stop(char *name, int force, struct list_head *result_up)
-{
- lstcon_batch_t *bat;
- int rc;
-
- if (lstcon_batch_find(name, &bat) != 0) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return -ENOENT;
- }
-
- bat->bat_arg = force;
-
- rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up);
-
- /* mark batch as stopped if all RPCs finished */
- if (lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0) == 0)
- bat->bat_state = LST_BATCH_IDLE;
-
- return rc;
-}
-
-static void
-lstcon_batch_destroy(lstcon_batch_t *bat)
-{
- lstcon_ndlink_t *ndl;
- lstcon_test_t *test;
- int i;
-
- list_del(&bat->bat_link);
-
- while (!list_empty(&bat->bat_test_list)) {
- test = list_entry(bat->bat_test_list.next,
- lstcon_test_t, tes_link);
- LASSERT (list_empty(&test->tes_trans_list));
-
- list_del(&test->tes_link);
-
- lstcon_group_put(test->tes_src_grp);
- lstcon_group_put(test->tes_dst_grp);
-
- LIBCFS_FREE(test, offsetof(lstcon_test_t,
- tes_param[test->tes_paramlen]));
- }
-
- LASSERT (list_empty(&bat->bat_trans_list));
-
- while (!list_empty(&bat->bat_cli_list)) {
- ndl = list_entry(bat->bat_cli_list.next,
- lstcon_ndlink_t, ndl_link);
- list_del_init(&ndl->ndl_link);
-
- lstcon_ndlink_release(ndl);
- }
-
- while (!list_empty(&bat->bat_srv_list)) {
- ndl = list_entry(bat->bat_srv_list.next,
- lstcon_ndlink_t, ndl_link);
- list_del_init(&ndl->ndl_link);
-
- lstcon_ndlink_release(ndl);
- }
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++) {
- LASSERT (list_empty(&bat->bat_cli_hash[i]));
- LASSERT (list_empty(&bat->bat_srv_hash[i]));
- }
-
- LIBCFS_FREE(bat->bat_cli_hash,
- sizeof(struct list_head) * LST_NODE_HASHSIZE);
- LIBCFS_FREE(bat->bat_srv_hash,
- sizeof(struct list_head) * LST_NODE_HASHSIZE);
- LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
-}
-
-int
-lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg)
-{
- lstcon_test_t *test;
- lstcon_batch_t *batch;
- lstcon_ndlink_t *ndl;
- struct list_head *hash;
- struct list_head *head;
-
- test = (lstcon_test_t *)arg;
- LASSERT (test != NULL);
-
- batch = test->tes_batch;
- LASSERT (batch != NULL);
-
- if (test->tes_oneside &&
- transop == LST_TRANS_TSBSRVADD)
- return 0;
-
- if (nd->nd_state != LST_NODE_ACTIVE)
- return -ENETDOWN;
-
- if (transop == LST_TRANS_TSBCLIADD) {
- hash = batch->bat_cli_hash;
- head = &batch->bat_cli_list;
-
- } else {
- LASSERT (transop == LST_TRANS_TSBSRVADD);
-
- hash = batch->bat_srv_hash;
- head = &batch->bat_srv_list;
- }
-
- LASSERT (nd->nd_id.nid != LNET_NID_ANY);
-
- if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1) != 0)
- return -ENOMEM;
-
- if (list_empty(&ndl->ndl_link))
- list_add_tail(&ndl->ndl_link, head);
-
- return 1;
-}
-
-static int
-lstcon_test_nodes_add(lstcon_test_t *test, struct list_head *result_up)
-{
- lstcon_rpc_trans_t *trans;
- lstcon_group_t *grp;
- int transop;
- int rc;
-
- LASSERT (test->tes_src_grp != NULL);
- LASSERT (test->tes_dst_grp != NULL);
-
- transop = LST_TRANS_TSBSRVADD;
- grp = test->tes_dst_grp;
-again:
- rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
- &test->tes_trans_list, transop,
- test, lstcon_testrpc_condition, &trans);
- if (rc != 0) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- if (lstcon_trans_stat()->trs_rpc_errno != 0 ||
- lstcon_trans_stat()->trs_fwk_errno != 0) {
- lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
- /* return if any error */
- CDEBUG(D_NET, "Failed to add test %s, "
- "RPC error %d, framework error %d\n",
- transop == LST_TRANS_TSBCLIADD ? "client" : "server",
- lstcon_trans_stat()->trs_rpc_errno,
- lstcon_trans_stat()->trs_fwk_errno);
-
- return rc;
- }
-
- lstcon_rpc_trans_destroy(trans);
-
- if (transop == LST_TRANS_TSBCLIADD)
- return rc;
-
- transop = LST_TRANS_TSBCLIADD;
- grp = test->tes_src_grp;
- test->tes_cliidx = 0;
-
- /* requests to test clients */
- goto again;
-}
-
-int
-lstcon_test_add(char *name, int type, int loop, int concur,
- int dist, int span, char *src_name, char * dst_name,
- void *param, int paramlen, int *retp, struct list_head *result_up)
-
-{
- lstcon_group_t *src_grp = NULL;
- lstcon_group_t *dst_grp = NULL;
- lstcon_test_t *test = NULL;
- lstcon_batch_t *batch;
- int rc;
-
- rc = lstcon_batch_find(name, &batch);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return rc;
- }
-
- if (batch->bat_state != LST_BATCH_IDLE) {
- CDEBUG(D_NET, "Can't change running batch %s\n", name);
- return rc;
- }
-
- rc = lstcon_group_find(src_name, &src_grp);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find group %s\n", src_name);
- goto out;
- }
-
- rc = lstcon_group_find(dst_name, &dst_grp);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find group %s\n", dst_name);
- goto out;
- }
-
- if (dst_grp->grp_userland)
- *retp = 1;
-
- LIBCFS_ALLOC(test, offsetof(lstcon_test_t, tes_param[paramlen]));
- if (!test) {
- CERROR("Can't allocate test descriptor\n");
- rc = -ENOMEM;
-
- goto out;
- }
-
- memset(test, 0, offsetof(lstcon_test_t, tes_param[paramlen]));
- test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id;
- test->tes_batch = batch;
- test->tes_type = type;
- test->tes_oneside = 0; /* TODO */
- test->tes_loop = loop;
- test->tes_concur = concur;
- test->tes_stop_onerr = 1; /* TODO */
- test->tes_span = span;
- test->tes_dist = dist;
- test->tes_cliidx = 0; /* just used for creating RPC */
- test->tes_src_grp = src_grp;
- test->tes_dst_grp = dst_grp;
- CFS_INIT_LIST_HEAD(&test->tes_trans_list);
-
- if (param != NULL) {
- test->tes_paramlen = paramlen;
- memcpy(&test->tes_param[0], param, paramlen);
- }
-
- rc = lstcon_test_nodes_add(test, result_up);
-
- if (rc != 0)
- goto out;
-
- if (lstcon_trans_stat()->trs_rpc_errno != 0 ||
- lstcon_trans_stat()->trs_fwk_errno != 0)
- CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type, name);
-
- /* add to test list anyway, so user can check what's going on */
- list_add_tail(&test->tes_link, &batch->bat_test_list);
-
- batch->bat_ntest ++;
- test->tes_hdr.tsb_index = batch->bat_ntest;
-
- /* hold groups so nobody can change them */
- return rc;
-out:
- if (test != NULL)
- LIBCFS_FREE(test, offsetof(lstcon_test_t, tes_param[paramlen]));
-
- if (dst_grp != NULL)
- lstcon_group_put(dst_grp);
-
- if (src_grp != NULL)
- lstcon_group_put(src_grp);
-
- return rc;
-}
-
-int
-lstcon_test_find(lstcon_batch_t *batch, int idx, lstcon_test_t **testpp)
-{
- lstcon_test_t *test;
-
- list_for_each_entry(test, &batch->bat_test_list, tes_link) {
- if (idx == test->tes_hdr.tsb_index) {
- *testpp = test;
- return 0;
- }
- }
-
- return -ENOENT;
-}
-
-int
-lstcon_tsbrpc_readent(int transop, srpc_msg_t *msg,
- lstcon_rpc_ent_t *ent_up)
-{
- srpc_batch_reply_t *rep = &msg->msg_body.bat_reply;
-
- LASSERT (transop == LST_TRANS_TSBCLIQRY ||
- transop == LST_TRANS_TSBSRVQRY);
-
- /* positive errno, framework error code */
- if (copy_to_user(&ent_up->rpe_priv[0],
- &rep->bar_active, sizeof(rep->bar_active)))
- return -EFAULT;
-
- return 0;
-}
-
-int
-lstcon_test_batch_query(char *name, int testidx, int client,
- int timeout, struct list_head *result_up)
-{
- lstcon_rpc_trans_t *trans;
- struct list_head *translist;
- struct list_head *ndlist;
- lstcon_tsb_hdr_t *hdr;
- lstcon_batch_t *batch;
- lstcon_test_t *test = NULL;
- int transop;
- int rc;
-
- rc = lstcon_batch_find(name, &batch);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find batch: %s\n", name);
- return rc;
- }
-
- if (testidx == 0) {
- translist = &batch->bat_trans_list;
- ndlist = &batch->bat_cli_list;
- hdr = &batch->bat_hdr;
-
- } else {
- /* query specified test only */
- rc = lstcon_test_find(batch, testidx, &test);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find test: %d\n", testidx);
- return rc;
- }
-
- translist = &test->tes_trans_list;
- ndlist = &test->tes_src_grp->grp_ndl_list;
- hdr = &test->tes_hdr;
- }
-
- transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY;
-
- rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr,
- lstcon_batrpc_condition, &trans);
- if (rc != 0) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, timeout);
-
- if (testidx == 0 && /* query a batch, not a test */
- lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) == 0 &&
- lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0) == 0) {
- /* all RPCs finished, and no active test */
- batch->bat_state = LST_BATCH_IDLE;
- }
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_tsbrpc_readent);
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-int
-lstcon_statrpc_readent(int transop, srpc_msg_t *msg,
- lstcon_rpc_ent_t *ent_up)
-{
- srpc_stat_reply_t *rep = &msg->msg_body.stat_reply;
- sfw_counters_t *sfwk_stat;
- srpc_counters_t *srpc_stat;
- lnet_counters_t *lnet_stat;
-
- if (rep->str_status != 0)
- return 0;
-
- sfwk_stat = (sfw_counters_t *)&ent_up->rpe_payload[0];
- srpc_stat = (srpc_counters_t *)((char *)sfwk_stat + sizeof(*sfwk_stat));
- lnet_stat = (lnet_counters_t *)((char *)srpc_stat + sizeof(*srpc_stat));
-
- if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) ||
- copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) ||
- copy_to_user(lnet_stat, &rep->str_lnet, sizeof(*lnet_stat)))
- return -EFAULT;
-
- return 0;
-}
-
-int
-lstcon_ndlist_stat(struct list_head *ndlist,
- int timeout, struct list_head *result_up)
-{
- struct list_head head;
- lstcon_rpc_trans_t *trans;
- int rc;
-
- CFS_INIT_LIST_HEAD(&head);
-
- rc = lstcon_rpc_trans_ndlist(ndlist, &head,
- LST_TRANS_STATQRY, NULL, NULL, &trans);
- if (rc != 0) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- timeout = (timeout > LST_TRANS_MIN_TIMEOUT) ? timeout :
- LST_TRANS_MIN_TIMEOUT;
- lstcon_rpc_trans_postwait(trans, timeout);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_statrpc_readent);
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-int
-lstcon_group_stat(char *grp_name, int timeout, struct list_head *result_up)
-{
- lstcon_group_t *grp;
- int rc;
-
- rc = lstcon_group_find(grp_name, &grp);
- if (rc != 0) {
- CDEBUG(D_NET, "Can't find group %s\n", grp_name);
- return rc;
- }
-
- rc = lstcon_ndlist_stat(&grp->grp_ndl_list, timeout, result_up);
-
- lstcon_group_put(grp);
-
- return rc;
-}
-
-int
-lstcon_nodes_stat(int count, lnet_process_id_t *ids_up,
- int timeout, struct list_head *result_up)
-{
- lstcon_ndlink_t *ndl;
- lstcon_group_t *tmp;
- lnet_process_id_t id;
- int i;
- int rc;
-
- rc = lstcon_group_alloc(NULL, &tmp);
- if (rc != 0) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- for (i = 0 ; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- break;
- }
-
- /* add to tmp group */
- rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2);
- if (rc != 0) {
- CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET,
- "Failed to find or create %s: %d\n",
- libcfs_id2str(id), rc);
- break;
- }
- }
-
- if (rc != 0) {
- lstcon_group_put(tmp);
- return rc;
- }
-
- rc = lstcon_ndlist_stat(&tmp->grp_ndl_list, timeout, result_up);
-
- lstcon_group_put(tmp);
-
- return rc;
-}
-
-int
-lstcon_debug_ndlist(struct list_head *ndlist,
- struct list_head *translist,
- int timeout, struct list_head *result_up)
-{
- lstcon_rpc_trans_t *trans;
- int rc;
-
- rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY,
- NULL, lstcon_sesrpc_condition, &trans);
- if (rc != 0) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- timeout = (timeout > LST_TRANS_MIN_TIMEOUT) ? timeout :
- LST_TRANS_MIN_TIMEOUT;
-
- lstcon_rpc_trans_postwait(trans, timeout);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_sesrpc_readent);
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-int
-lstcon_session_debug(int timeout, struct list_head *result_up)
-{
- return lstcon_debug_ndlist(&console_session.ses_ndl_list,
- NULL, timeout, result_up);
-}
-
-int
-lstcon_batch_debug(int timeout, char *name,
- int client, struct list_head *result_up)
-{
- lstcon_batch_t *bat;
- int rc;
-
- rc = lstcon_batch_find(name, &bat);
- if (rc != 0)
- return -ENOENT;
-
- rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list :
- &bat->bat_srv_list,
- NULL, timeout, result_up);
-
- return rc;
-}
-
-int
-lstcon_group_debug(int timeout, char *name,
- struct list_head *result_up)
-{
- lstcon_group_t *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc != 0)
- return -ENOENT;
-
- rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
- timeout, result_up);
- lstcon_group_put(grp);
-
- return rc;
-}
-
-int
-lstcon_nodes_debug(int timeout,
- int count, lnet_process_id_t *ids_up,
- struct list_head *result_up)
-{
- lnet_process_id_t id;
- lstcon_ndlink_t *ndl;
- lstcon_group_t *grp;
- int i;
- int rc;
-
- rc = lstcon_group_alloc(NULL, &grp);
- if (rc != 0) {
- CDEBUG(D_NET, "Out of memory\n");
- return rc;
- }
-
- for (i = 0; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- break;
- }
-
- /* node is added to tmp group */
- rc = lstcon_group_ndlink_find(grp, id, &ndl, 1);
- if (rc != 0) {
- CERROR("Can't create node link\n");
- break;
- }
- }
-
- if (rc != 0) {
- lstcon_group_put(grp);
- return rc;
- }
-
- rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
- timeout, result_up);
-
- lstcon_group_put(grp);
-
- return rc;
-}
-
-int
-lstcon_session_match(lst_sid_t sid)
-{
- return (console_session.ses_id.ses_nid == sid.ses_nid &&
- console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1: 0;
-}
-
-static void
-lstcon_new_session_id(lst_sid_t *sid)
-{
- lnet_process_id_t id;
-
- LASSERT (console_session.ses_state == LST_SESSION_NONE);
-
- LNetGetId(1, &id);
- sid->ses_nid = id.nid;
- sid->ses_stamp = cfs_time_current();
-}
-
-extern srpc_service_t lstcon_acceptor_service;
-
-int
-lstcon_session_new(char *name, int key,
- int timeout,int force, lst_sid_t *sid_up)
-{
- int rc = 0;
- int i;
-
- if (console_session.ses_state != LST_SESSION_NONE) {
- /* session exists */
- if (!force) {
- CERROR("Session %s already exists\n",
- console_session.ses_name);
- return -EEXIST;
- }
-
- rc = lstcon_session_end();
-
- /* lstcon_session_end() only return local error */
- if (rc != 0)
- return rc;
- }
-
- for (i = 0; i < LST_GLOBAL_HASHSIZE; i++) {
- LASSERT (list_empty(&console_session.ses_ndl_hash[i]));
- }
-
- rc = lstcon_batch_add(LST_DEFAULT_BATCH);
- if (rc != 0)
- return rc;
-
- rc = lstcon_rpc_pinger_start();
- if (rc != 0) {
- lstcon_batch_t *bat;
-
- lstcon_batch_find(LST_DEFAULT_BATCH, &bat);
- lstcon_batch_destroy(bat);
-
- return rc;
- }
-
- lstcon_new_session_id(&console_session.ses_id);
-
- console_session.ses_key = key;
- console_session.ses_state = LST_SESSION_ACTIVE;
- console_session.ses_force = !!force;
- console_session.ses_timeout = (timeout <= 0)? LST_CONSOLE_TIMEOUT:
- timeout;
- strcpy(console_session.ses_name, name);
-
- if (copy_to_user(sid_up, &console_session.ses_id,
- sizeof(lst_sid_t)) == 0)
- return rc;
-
- lstcon_session_end();
-
- return -EFAULT;
-}
-
-int
-lstcon_session_info(lst_sid_t *sid_up, int *key_up,
- lstcon_ndlist_ent_t *ndinfo_up, char *name_up, int len)
-{
- lstcon_ndlist_ent_t *entp;
- lstcon_ndlink_t *ndl;
- int rc = 0;
-
- if (console_session.ses_state != LST_SESSION_ACTIVE)
- return -ESRCH;
-
- LIBCFS_ALLOC(entp, sizeof(*entp));
- if (entp == NULL)
- return -ENOMEM;
-
- memset(entp, 0, sizeof(*entp));
-
- list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, entp);
-
- if (copy_to_user(sid_up, &console_session.ses_id, sizeof(lst_sid_t)) ||
- copy_to_user(key_up, &console_session.ses_key, sizeof(int)) ||
- copy_to_user(ndinfo_up, entp, sizeof(*entp)) ||
- copy_to_user(name_up, console_session.ses_name, len))
- rc = -EFAULT;
-
- LIBCFS_FREE(entp, sizeof(*entp));
-
- return rc;
-}
-
-int
-lstcon_session_end()
-{
- lstcon_rpc_trans_t *trans;
- lstcon_group_t *grp;
- lstcon_batch_t *bat;
- int rc = 0;
-
- LASSERT (console_session.ses_state == LST_SESSION_ACTIVE);
-
- rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list,
- NULL, LST_TRANS_SESEND, NULL,
- lstcon_sesrpc_condition, &trans);
- if (rc != 0) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- console_session.ses_shutdown = 1;
-
- lstcon_rpc_pinger_stop();
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- lstcon_rpc_trans_destroy(trans);
- /* User can do nothing even rpc failed, so go on */
-
- /* waiting for orphan rpcs to die */
- lstcon_rpc_cleanup_wait();
-
- console_session.ses_id = LST_INVALID_SID;
- console_session.ses_state = LST_SESSION_NONE;
- console_session.ses_key = 0;
- console_session.ses_force = 0;
-
- /* destroy all batches */
- while (!list_empty(&console_session.ses_bat_list)) {
- bat = list_entry(console_session.ses_bat_list.next,
- lstcon_batch_t, bat_link);
-
- lstcon_batch_destroy(bat);
- }
-
- /* destroy all groups */
- while (!list_empty(&console_session.ses_grp_list)) {
- grp = list_entry(console_session.ses_grp_list.next,
- lstcon_group_t, grp_link);
- LASSERT (grp->grp_ref == 1);
-
- lstcon_group_put(grp);
- }
-
- /* all nodes should be released */
- LASSERT (list_empty(&console_session.ses_ndl_list));
-
- console_session.ses_shutdown = 0;
- console_session.ses_expired = 0;
-
- return rc;
-}
-
-static int
-lstcon_acceptor_handle (srpc_server_rpc_t *rpc)
-{
- srpc_msg_t *rep = &rpc->srpc_replymsg;
- srpc_msg_t *req = &rpc->srpc_reqstbuf->buf_msg;
- srpc_join_reqst_t *jreq = &req->msg_body.join_reqst;
- srpc_join_reply_t *jrep = &rep->msg_body.join_reply;
- lstcon_group_t *grp = NULL;
- lstcon_ndlink_t *ndl;
- int rc = 0;
-
- sfw_unpack_message(req);
-
- mutex_down(&console_session.ses_mutex);
-
- jrep->join_sid = console_session.ses_id;
-
- if (console_session.ses_id.ses_nid == LNET_NID_ANY) {
- jrep->join_status = ESRCH;
- goto out;
- }
-
- if (jreq->join_sid.ses_nid != LNET_NID_ANY &&
- !lstcon_session_match(jreq->join_sid)) {
- jrep->join_status = EBUSY;
- goto out;
- }
-
- if (lstcon_group_find(jreq->join_group, &grp) != 0) {
- rc = lstcon_group_alloc(jreq->join_group, &grp);
- if (rc != 0) {
- CERROR("Out of memory\n");
- goto out;
- }
-
- list_add_tail(&grp->grp_link,
- &console_session.ses_grp_list);
- lstcon_group_addref(grp);
- }
-
- if (grp->grp_ref > 2) {
- /* Group in using */
- jrep->join_status = EBUSY;
- goto out;
- }
-
- rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0);
- if (rc == 0) {
- jrep->join_status = EEXIST;
- goto out;
- }
-
- rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1);
- if (rc != 0) {
- CERROR("Out of memory\n");
- goto out;
- }
-
- ndl->ndl_node->nd_state = LST_NODE_ACTIVE;
- ndl->ndl_node->nd_timeout = console_session.ses_timeout;
-
- if (grp->grp_userland == 0)
- grp->grp_userland = 1;
-
- strcpy(jrep->join_session, console_session.ses_name);
- jrep->join_timeout = console_session.ses_timeout;
- jrep->join_status = 0;
-
-out:
- if (grp != NULL)
- lstcon_group_put(grp);
-
- mutex_up(&console_session.ses_mutex);
-
- return rc;
-}
-
-srpc_service_t lstcon_acceptor_service =
-{
- .sv_name = "join session",
- .sv_handler = lstcon_acceptor_handle,
- .sv_bulk_ready = NULL,
- .sv_id = SRPC_SERVICE_JOIN,
- .sv_concur = SFW_SERVICE_CONCURRENCY,
-};
-
-extern int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data);
-
-DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry);
-
-/* initialize console */
-int
-lstcon_console_init(void)
-{
- int i;
- int n;
- int rc;
-
- memset(&console_session, 0, sizeof(lstcon_session_t));
-
- console_session.ses_id = LST_INVALID_SID;
- console_session.ses_state = LST_SESSION_NONE;
- console_session.ses_timeout = 0;
- console_session.ses_force = 0;
- console_session.ses_expired = 0;
- console_session.ses_laststamp = cfs_time_current_sec();
-
- init_mutex(&console_session.ses_mutex);
-
- CFS_INIT_LIST_HEAD(&console_session.ses_ndl_list);
- CFS_INIT_LIST_HEAD(&console_session.ses_grp_list);
- CFS_INIT_LIST_HEAD(&console_session.ses_bat_list);
- CFS_INIT_LIST_HEAD(&console_session.ses_trans_list);
-
- LIBCFS_ALLOC(console_session.ses_ndl_hash,
- sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
- if (console_session.ses_ndl_hash == NULL)
- return -ENOMEM;
-
- for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
- CFS_INIT_LIST_HEAD(&console_session.ses_ndl_hash[i]);
-
- rc = srpc_add_service(&lstcon_acceptor_service);
- LASSERT (rc != -EBUSY);
- if (rc != 0) {
- LIBCFS_FREE(console_session.ses_ndl_hash,
- sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
- return rc;
- }
-
- n = srpc_service_add_buffers(&lstcon_acceptor_service, SFW_POST_BUFFERS);
- if (n != SFW_POST_BUFFERS) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = libcfs_register_ioctl(&lstcon_ioctl_handler);
-
- if (rc == 0) {
- lstcon_rpc_module_init();
- return 0;
- }
-
-out:
- srpc_shutdown_service(&lstcon_acceptor_service);
- srpc_remove_service(&lstcon_acceptor_service);
-
- LIBCFS_FREE(console_session.ses_ndl_hash,
- sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
-
- srpc_wait_service_shutdown(&lstcon_acceptor_service);
-
- return rc;
-}
-
-int
-lstcon_console_fini(void)
-{
- int i;
-
- mutex_down(&console_session.ses_mutex);
-
- libcfs_deregister_ioctl(&lstcon_ioctl_handler);
-
- srpc_shutdown_service(&lstcon_acceptor_service);
- srpc_remove_service(&lstcon_acceptor_service);
-
- if (console_session.ses_state != LST_SESSION_NONE)
- lstcon_session_end();
-
- lstcon_rpc_module_fini();
-
- mutex_up(&console_session.ses_mutex);
-
- LASSERT (list_empty(&console_session.ses_ndl_list));
- LASSERT (list_empty(&console_session.ses_grp_list));
- LASSERT (list_empty(&console_session.ses_bat_list));
- LASSERT (list_empty(&console_session.ses_trans_list));
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++) {
- LASSERT (list_empty(&console_session.ses_ndl_hash[i]));
- }
-
- LIBCFS_FREE(console_session.ses_ndl_hash,
- sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
-
- srpc_wait_service_shutdown(&lstcon_acceptor_service);
-
- return 0;
-}
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * kernel structure for LST console
- */
-
-#ifndef __LST_CONSOLE_H__
-#define __LST_CONSOLE_H__
-
-#ifdef __KERNEL__
-
-#include <libcfs/kp30.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-types.h>
-#include <lnet/lnetst.h>
-#include "selftest.h"
-#include "conrpc.h"
-
-typedef struct lstcon_node {
- lnet_process_id_t nd_id; /* id of the node */
- int nd_ref; /* reference count */
- int nd_state; /* state of the node */
- int nd_timeout; /* session timeout */
- cfs_time_t nd_stamp; /* timestamp of last replied RPC */
- struct lstcon_rpc nd_ping; /* ping rpc */
-} lstcon_node_t; /*** node descriptor */
-
-typedef struct {
- struct list_head ndl_link; /* chain on list */
- struct list_head ndl_hlink; /* chain on hash */
- lstcon_node_t *ndl_node; /* pointer to node */
-} lstcon_ndlink_t; /*** node link descriptor */
-
-typedef struct {
- struct list_head grp_link; /* chain on global group list */
- int grp_ref; /* reference count */
- int grp_userland; /* has userland nodes */
- int grp_nnode; /* # of nodes */
- char grp_name[LST_NAME_SIZE]; /* group name */
-
- struct list_head grp_trans_list; /* transaction list */
- struct list_head grp_ndl_list; /* nodes list */
- struct list_head grp_ndl_hash[0];/* hash table for nodes */
-} lstcon_group_t; /*** (alias of nodes) group descriptor */
-
-#define LST_BATCH_IDLE 0xB0 /* idle batch */
-#define LST_BATCH_RUNNING 0xB1 /* running batch */
-
-typedef struct lstcon_tsb_hdr {
- lst_bid_t tsb_id; /* batch ID */
- int tsb_index; /* test index */
-} lstcon_tsb_hdr_t;
-
-typedef struct {
- lstcon_tsb_hdr_t bat_hdr; /* test_batch header */
- struct list_head bat_link; /* chain on session's batches list */
- int bat_ntest; /* # of test */
- int bat_state; /* state of the batch */
- int bat_arg; /* parameter for run|stop, timeout for run, force for stop */
- char bat_name[LST_NAME_SIZE]; /* name of batch */
-
- struct list_head bat_test_list; /* list head of tests (lstcon_test_t) */
- struct list_head bat_trans_list; /* list head of transaction */
- struct list_head bat_cli_list; /* list head of client nodes (lstcon_node_t) */
- struct list_head *bat_cli_hash; /* hash table of client nodes */
- struct list_head bat_srv_list; /* list head of server nodes */
- struct list_head *bat_srv_hash; /* hash table of server nodes */
-} lstcon_batch_t; /*** (tests ) batch descritptor */
-
-typedef struct lstcon_test {
- lstcon_tsb_hdr_t tes_hdr; /* test batch header */
- struct list_head tes_link; /* chain on batch's tests list */
- lstcon_batch_t *tes_batch; /* pointer to batch */
-
- int tes_type; /* type of the test, i.e: bulk, ping */
- int tes_stop_onerr; /* stop on error */
- int tes_oneside; /* one-sided test */
- int tes_concur; /* concurrency */
- int tes_loop; /* loop count */
- int tes_dist; /* nodes distribution of target group */
- int tes_span; /* nodes span of target group */
- int tes_cliidx; /* client index, used for RPC creating */
-
- struct list_head tes_trans_list; /* transaction list */
- lstcon_group_t *tes_src_grp; /* group run the test */
- lstcon_group_t *tes_dst_grp; /* target group */
-
- int tes_paramlen; /* test parameter length */
- char tes_param[0]; /* test parameter */
-} lstcon_test_t; /*** a single test descriptor */
-
-#define LST_GLOBAL_HASHSIZE 503 /* global nodes hash table size */
-#define LST_NODE_HASHSIZE 239 /* node hash table (for batch or group) */
-
-#define LST_SESSION_NONE 0x0 /* no session */
-#define LST_SESSION_ACTIVE 0x1 /* working session */
-
-#define LST_CONSOLE_TIMEOUT 300 /* default console timeout */
-
-typedef struct {
- struct semaphore ses_mutex; /* lock for session, only one thread can enter session */
- lst_sid_t ses_id; /* global session id */
- int ses_key; /* local session key */
- int ses_state; /* state of session */
- int ses_timeout; /* timeout in seconds */
- time_t ses_laststamp; /* last operation stamp (seconds) */
- int ses_force:1; /* force creating */
- int ses_shutdown:1; /* session is shutting down */
- int ses_expired:1; /* console is timedout */
- __u64 ses_id_cookie; /* batch id cookie */
- char ses_name[LST_NAME_SIZE]; /* session name */
- lstcon_rpc_trans_t *ses_ping; /* session pinger */
- stt_timer_t ses_ping_timer; /* timer for pinger */
- lstcon_trans_stat_t ses_trans_stat; /* transaction stats */
-
- struct list_head ses_trans_list; /* global list of transaction */
- struct list_head ses_grp_list; /* global list of groups */
- struct list_head ses_bat_list; /* global list of batches */
- struct list_head ses_ndl_list; /* global list of nodes */
- struct list_head *ses_ndl_hash; /* hash table of nodes */
-
- spinlock_t ses_rpc_lock; /* serialize */
- atomic_t ses_rpc_counter;/* # of initialized RPCs */
- struct list_head ses_rpc_freelist; /* idle console rpc */
-} lstcon_session_t; /*** session descriptor */
-
-extern lstcon_session_t console_session;
-static inline lstcon_trans_stat_t *
-lstcon_trans_stat(void)
-{
- return &console_session.ses_trans_stat;
-}
-
-static inline struct list_head *
-lstcon_id2hash (lnet_process_id_t id, struct list_head *hash)
-{
- unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
-
- return &hash[idx];
-}
-
-extern int lstcon_session_match(lst_sid_t sid);
-extern int lstcon_session_new(char *name, int key,
- int timeout, int flags, lst_sid_t *sid_up);
-extern int lstcon_session_info(lst_sid_t *sid_up, int *key,
- lstcon_ndlist_ent_t *entp, char *name_up, int len);
-extern int lstcon_session_end(void);
-extern int lstcon_session_debug(int timeout, struct list_head *result_up);
-extern int lstcon_batch_debug(int timeout, char *name,
- int client, struct list_head *result_up);
-extern int lstcon_group_debug(int timeout, char *name,
- struct list_head *result_up);
-extern int lstcon_nodes_debug(int timeout, int nnd, lnet_process_id_t *nds_up,
- struct list_head *result_up);
-extern int lstcon_group_add(char *name);
-extern int lstcon_group_del(char *name);
-extern int lstcon_group_clean(char *name, int args);
-extern int lstcon_group_refresh(char *name, struct list_head *result_up);
-extern int lstcon_nodes_add(char *name, int nnd, lnet_process_id_t *nds_up,
- struct list_head *result_up);
-extern int lstcon_nodes_remove(char *name, int nnd, lnet_process_id_t *nds_up,
- struct list_head *result_up);
-extern int lstcon_group_info(char *name, lstcon_ndlist_ent_t *gent_up,
- int *index_p, int *ndent_p, lstcon_node_ent_t *ndents_up);
-extern int lstcon_group_list(int idx, int len, char *name_up);
-extern int lstcon_batch_add(char *name);
-extern int lstcon_batch_run(char *name, int timeout, struct list_head *result_up);
-extern int lstcon_batch_stop(char *name, int force, struct list_head *result_up);
-extern int lstcon_test_batch_query(char *name, int testidx,
- int client, int timeout,
- struct list_head *result_up);
-extern int lstcon_batch_del(char *name);
-extern int lstcon_batch_list(int idx, int namelen, char *name_up);
-extern int lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up,
- int server, int testidx, int *index_p,
- int *ndent_p, lstcon_node_ent_t *dents_up);
-extern int lstcon_group_stat(char *grp_name, int timeout,
- struct list_head *result_up);
-extern int lstcon_nodes_stat(int count, lnet_process_id_t *ids_up,
- int timeout, struct list_head *result_up);
-extern int lstcon_test_add(char *name, int type, int loop, int concur,
- int dist, int span, char *src_name, char * dst_name,
- void *param, int paramlen, int *retp, struct list_head *result_up);
-#endif
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Authors: Isaac Huang <isaac@clusterfs.com>
- * Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-int brw_inject_errors = 0;
-CFS_MODULE_PARM(brw_inject_errors, "i", int, 0644,
- "# data errors to inject randomly, zero by default");
-
-static int session_timeout = 100;
-CFS_MODULE_PARM(session_timeout, "i", int, 0444,
- "test session timeout in seconds (100 by default, 0 == never)");
-
-#define SFW_TEST_CONCURRENCY 128
-#define SFW_TEST_RPC_TIMEOUT 64
-#define SFW_CLIENT_RPC_TIMEOUT 64 /* in seconds */
-#define SFW_EXTRA_TEST_BUFFERS 8 /* tolerate buggy peers with extra buffers */
-
-#define sfw_test_buffers(tsi) ((tsi)->tsi_loop + SFW_EXTRA_TEST_BUFFERS)
-
-#define sfw_unpack_id(id) \
-do { \
- __swab64s(&(id).nid); \
- __swab32s(&(id).pid); \
-} while (0)
-
-#define sfw_unpack_sid(sid) \
-do { \
- __swab64s(&(sid).ses_nid); \
- __swab64s(&(sid).ses_stamp); \
-} while (0)
-
-#define sfw_unpack_fw_counters(fc) \
-do { \
- __swab32s(&(fc).brw_errors); \
- __swab32s(&(fc).ping_errors); \
- __swab32s(&(fc).active_tests); \
- __swab32s(&(fc).active_batches); \
- __swab32s(&(fc).zombie_sessions); \
-} while (0)
-
-#define sfw_unpack_rpc_counters(rc) \
-do { \
- __swab32s(&(rc).errors); \
- __swab32s(&(rc).rpcs_sent); \
- __swab32s(&(rc).rpcs_rcvd); \
- __swab32s(&(rc).rpcs_dropped); \
- __swab32s(&(rc).rpcs_expired); \
- __swab64s(&(rc).bulk_get); \
- __swab64s(&(rc).bulk_put); \
-} while (0)
-
-#define sfw_unpack_lnet_counters(lc) \
-do { \
- __swab32s(&(lc).errors); \
- __swab32s(&(lc).msgs_max); \
- __swab32s(&(lc).msgs_alloc); \
- __swab32s(&(lc).send_count); \
- __swab32s(&(lc).recv_count); \
- __swab32s(&(lc).drop_count); \
- __swab32s(&(lc).route_count); \
- __swab64s(&(lc).send_length); \
- __swab64s(&(lc).recv_length); \
- __swab64s(&(lc).drop_length); \
- __swab64s(&(lc).route_length); \
-} while (0)
-
-#define sfw_test_active(t) (atomic_read(&(t)->tsi_nactive) != 0)
-#define sfw_batch_active(b) (atomic_read(&(b)->bat_nactive) != 0)
-
-struct smoketest_framework {
- struct list_head fw_zombie_rpcs; /* RPCs to be recycled */
- struct list_head fw_zombie_sessions; /* stopping sessions */
- struct list_head fw_tests; /* registered test cases */
- atomic_t fw_nzombies; /* # zombie sessions */
- spinlock_t fw_lock; /* serialise */
- sfw_session_t *fw_session; /* _the_ session */
- int fw_shuttingdown; /* shutdown in progress */
- srpc_server_rpc_t *fw_active_srpc; /* running RPC */
-} sfw_data;
-
-/* forward ref's */
-int sfw_stop_batch (sfw_batch_t *tsb, int force);
-void sfw_destroy_session (sfw_session_t *sn);
-
-static inline sfw_test_case_t *
-sfw_find_test_case(int id)
-{
- sfw_test_case_t *tsc;
-
- LASSERT (id <= SRPC_SERVICE_MAX_ID);
- LASSERT (id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
- if (tsc->tsc_srv_service->sv_id == id)
- return tsc;
- }
-
- return NULL;
-}
-
-static int
-sfw_register_test (srpc_service_t *service, sfw_test_client_ops_t *cliops)
-{
- sfw_test_case_t *tsc;
-
- if (sfw_find_test_case(service->sv_id) != NULL) {
- CERROR ("Failed to register test %s (%d)\n",
- service->sv_name, service->sv_id);
- return -EEXIST;
- }
-
- LIBCFS_ALLOC(tsc, sizeof(sfw_test_case_t));
- if (tsc == NULL)
- return -ENOMEM;
-
- memset(tsc, 0, sizeof(sfw_test_case_t));
- tsc->tsc_cli_ops = cliops;
- tsc->tsc_srv_service = service;
-
- list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests);
- return 0;
-}
-
-void
-sfw_add_session_timer (void)
-{
- sfw_session_t *sn = sfw_data.fw_session;
- stt_timer_t *timer = &sn->sn_timer;
-
- LASSERT (!sfw_data.fw_shuttingdown);
-
- if (sn == NULL || sn->sn_timeout == 0)
- return;
-
- LASSERT (!sn->sn_timer_active);
-
- sn->sn_timer_active = 1;
- timer->stt_expires = cfs_time_add(sn->sn_timeout,
- cfs_time_current_sec());
- stt_add_timer(timer);
- return;
-}
-
-int
-sfw_del_session_timer (void)
-{
- sfw_session_t *sn = sfw_data.fw_session;
-
- if (sn == NULL || !sn->sn_timer_active)
- return 0;
-
- LASSERT (sn->sn_timeout != 0);
-
- if (stt_del_timer(&sn->sn_timer)) { /* timer defused */
- sn->sn_timer_active = 0;
- return 0;
- }
-
-#ifndef __KERNEL__
- /* Racing is impossible in single-threaded userland selftest */
- LBUG();
-#endif
- return EBUSY; /* racing with sfw_session_expired() */
-}
-
-/* called with sfw_data.fw_lock held */
-static void
-sfw_deactivate_session (void)
-{
- sfw_session_t *sn = sfw_data.fw_session;
- int nactive = 0;
- sfw_batch_t *tsb;
-
- if (sn == NULL) return;
-
- LASSERT (!sn->sn_timer_active);
-
- sfw_data.fw_session = NULL;
- atomic_inc(&sfw_data.fw_nzombies);
- list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
-
- list_for_each_entry (tsb, &sn->sn_batches, bat_list) {
- if (sfw_batch_active(tsb)) {
- nactive++;
- sfw_stop_batch(tsb, 1);
- }
- }
-
- if (nactive != 0)
- return; /* wait for active batches to stop */
-
- list_del_init(&sn->sn_list);
- spin_unlock(&sfw_data.fw_lock);
-
- sfw_destroy_session(sn);
-
- spin_lock(&sfw_data.fw_lock);
- return;
-}
-
-#ifndef __KERNEL__
-
-int
-sfw_session_removed (void)
-{
- return (sfw_data.fw_session == NULL) ? 1 : 0;
-}
-
-#endif
-
-void
-sfw_session_expired (void *data)
-{
- sfw_session_t *sn = data;
-
- spin_lock(&sfw_data.fw_lock);
-
- LASSERT (sn->sn_timer_active);
- LASSERT (sn == sfw_data.fw_session);
-
- CWARN ("Session expired! sid: %s-"LPU64", name: %s\n",
- libcfs_nid2str(sn->sn_id.ses_nid),
- sn->sn_id.ses_stamp, &sn->sn_name[0]);
-
- sn->sn_timer_active = 0;
- sfw_deactivate_session();
-
- spin_unlock(&sfw_data.fw_lock);
- return;
-}
-
-static inline void
-sfw_init_session (sfw_session_t *sn, lst_sid_t sid, const char *name)
-{
- stt_timer_t *timer = &sn->sn_timer;
-
- memset(sn, 0, sizeof(sfw_session_t));
- CFS_INIT_LIST_HEAD(&sn->sn_list);
- CFS_INIT_LIST_HEAD(&sn->sn_batches);
- atomic_set(&sn->sn_brw_errors, 0);
- atomic_set(&sn->sn_ping_errors, 0);
- strncpy(&sn->sn_name[0], name, LST_NAME_SIZE);
-
- sn->sn_timer_active = 0;
- sn->sn_id = sid;
- sn->sn_timeout = session_timeout;
-
- timer->stt_data = sn;
- timer->stt_func = sfw_session_expired;
- CFS_INIT_LIST_HEAD(&timer->stt_list);
-}
-
-/* completion handler for incoming framework RPCs */
-void
-sfw_server_rpc_done (srpc_server_rpc_t *rpc)
-{
- srpc_service_t *sv = rpc->srpc_service;
- int status = rpc->srpc_status;
-
- CDEBUG (D_NET,
- "Incoming framework RPC done: "
- "service %s, peer %s, status %s:%d\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer),
- swi_state2str(rpc->srpc_wi.wi_state),
- status);
-
- if (rpc->srpc_bulk != NULL)
- sfw_free_pages(rpc);
- return;
-}
-
-void
-sfw_client_rpc_fini (srpc_client_rpc_t *rpc)
-{
- LASSERT (rpc->crpc_bulk.bk_niov == 0);
- LASSERT (list_empty(&rpc->crpc_list));
- LASSERT (atomic_read(&rpc->crpc_refcount) == 0);
-#ifndef __KERNEL__
- LASSERT (rpc->crpc_bulk.bk_pages == NULL);
-#endif
-
- CDEBUG (D_NET,
- "Outgoing framework RPC done: "
- "service %d, peer %s, status %s:%d:%d\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- swi_state2str(rpc->crpc_wi.wi_state),
- rpc->crpc_aborted, rpc->crpc_status);
-
- spin_lock(&sfw_data.fw_lock);
-
- /* my callers must finish all RPCs before shutting me down */
- LASSERT (!sfw_data.fw_shuttingdown);
- list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs);
-
- spin_unlock(&sfw_data.fw_lock);
- return;
-}
-
-sfw_batch_t *
-sfw_find_batch (lst_bid_t bid)
-{
- sfw_session_t *sn = sfw_data.fw_session;
- sfw_batch_t *bat;
-
- LASSERT (sn != NULL);
-
- list_for_each_entry (bat, &sn->sn_batches, bat_list) {
- if (bat->bat_id.bat_id == bid.bat_id)
- return bat;
- }
-
- return NULL;
-}
-
-sfw_batch_t *
-sfw_bid2batch (lst_bid_t bid)
-{
- sfw_session_t *sn = sfw_data.fw_session;
- sfw_batch_t *bat;
-
- LASSERT (sn != NULL);
-
- bat = sfw_find_batch(bid);
- if (bat != NULL)
- return bat;
-
- LIBCFS_ALLOC(bat, sizeof(sfw_batch_t));
- if (bat == NULL)
- return NULL;
-
- bat->bat_error = 0;
- bat->bat_session = sn;
- bat->bat_id = bid;
- atomic_set(&bat->bat_nactive, 0);
- CFS_INIT_LIST_HEAD(&bat->bat_tests);
-
- list_add_tail(&bat->bat_list, &sn->sn_batches);
- return bat;
-}
-
-int
-sfw_get_stats (srpc_stat_reqst_t *request, srpc_stat_reply_t *reply)
-{
- sfw_session_t *sn = sfw_data.fw_session;
- sfw_counters_t *cnt = &reply->str_fw;
- sfw_batch_t *bat;
-
- reply->str_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
-
- if (request->str_sid.ses_nid == LNET_NID_ANY) {
- reply->str_status = EINVAL;
- return 0;
- }
-
- if (sn == NULL || !sfw_sid_equal(request->str_sid, sn->sn_id)) {
- reply->str_status = ESRCH;
- return 0;
- }
-
- LNET_LOCK();
- reply->str_lnet = the_lnet.ln_counters;
- LNET_UNLOCK();
-
- srpc_get_counters(&reply->str_rpc);
-
- cnt->brw_errors = atomic_read(&sn->sn_brw_errors);
- cnt->ping_errors = atomic_read(&sn->sn_ping_errors);
- cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies);
-
- cnt->active_tests = cnt->active_batches = 0;
- list_for_each_entry (bat, &sn->sn_batches, bat_list) {
- int n = atomic_read(&bat->bat_nactive);
-
- if (n > 0) {
- cnt->active_batches++;
- cnt->active_tests += n;
- }
- }
-
- reply->str_status = 0;
- return 0;
-}
-
-int
-sfw_make_session (srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply)
-{
- sfw_session_t *sn = sfw_data.fw_session;
-
- if (request->mksn_sid.ses_nid == LNET_NID_ANY) {
- reply->mksn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
- reply->mksn_status = EINVAL;
- return 0;
- }
-
- if (sn != NULL && !request->mksn_force) {
- reply->mksn_sid = sn->sn_id;
- reply->mksn_status = EBUSY;
- strncpy(&reply->mksn_name[0], &sn->sn_name[0], LST_NAME_SIZE);
- return 0;
- }
-
- LIBCFS_ALLOC(sn, sizeof(sfw_session_t));
- if (sn == NULL) {
- CERROR ("Dropping RPC (mksn) under memory pressure.\n");
- return -ENOMEM;
- }
-
- sfw_init_session(sn, request->mksn_sid, &request->mksn_name[0]);
-
- spin_lock(&sfw_data.fw_lock);
-
- sfw_deactivate_session();
- LASSERT (sfw_data.fw_session == NULL);
- sfw_data.fw_session = sn;
-
- spin_unlock(&sfw_data.fw_lock);
-
- reply->mksn_status = 0;
- reply->mksn_sid = sn->sn_id;
- reply->mksn_timeout = sn->sn_timeout;
- return 0;
-}
-
-int
-sfw_remove_session (srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply)
-{
- sfw_session_t *sn = sfw_data.fw_session;
-
- reply->rmsn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
-
- if (request->rmsn_sid.ses_nid == LNET_NID_ANY) {
- reply->rmsn_status = EINVAL;
- return 0;
- }
-
- if (sn == NULL || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) {
- reply->rmsn_status = (sn == NULL) ? ESRCH : EBUSY;
- return 0;
- }
-
- spin_lock(&sfw_data.fw_lock);
- sfw_deactivate_session();
- spin_unlock(&sfw_data.fw_lock);
-
- reply->rmsn_status = 0;
- reply->rmsn_sid = LST_INVALID_SID;
- LASSERT (sfw_data.fw_session == NULL);
- return 0;
-}
-
-int
-sfw_debug_session (srpc_debug_reqst_t *request, srpc_debug_reply_t *reply)
-{
- sfw_session_t *sn = sfw_data.fw_session;
-
- if (sn == NULL) {
- reply->dbg_status = ESRCH;
- reply->dbg_sid = LST_INVALID_SID;
- return 0;
- }
-
- reply->dbg_status = 0;
- reply->dbg_sid = sn->sn_id;
- reply->dbg_timeout = sn->sn_timeout;
- strncpy(reply->dbg_name, &sn->sn_name[0], LST_NAME_SIZE);
-
- return 0;
-}
-
-void
-sfw_test_rpc_fini (srpc_client_rpc_t *rpc)
-{
- sfw_test_unit_t *tsu = rpc->crpc_priv;
- sfw_test_instance_t *tsi = tsu->tsu_instance;
-
- /* Called with hold of tsi->tsi_lock */
- LASSERT (list_empty(&rpc->crpc_list));
- list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
-}
-
-int
-sfw_load_test (sfw_test_instance_t *tsi)
-{
- sfw_test_case_t *tsc = sfw_find_test_case(tsi->tsi_service);
- int nrequired = sfw_test_buffers(tsi);
- int nposted;
-
- LASSERT (tsc != NULL);
-
- if (tsi->tsi_is_client) {
- tsi->tsi_ops = tsc->tsc_cli_ops;
- return 0;
- }
-
- nposted = srpc_service_add_buffers(tsc->tsc_srv_service, nrequired);
- if (nposted != nrequired) {
- CWARN ("Failed to reserve enough buffers: "
- "service %s, %d needed, %d reserved\n",
- tsc->tsc_srv_service->sv_name, nrequired, nposted);
- srpc_service_remove_buffers(tsc->tsc_srv_service, nposted);
- return -ENOMEM;
- }
-
- CDEBUG (D_NET, "Reserved %d buffers for test %s\n",
- nposted, tsc->tsc_srv_service->sv_name);
- return 0;
-}
-
-void
-sfw_unload_test (sfw_test_instance_t *tsi)
-{
- sfw_test_case_t *tsc = sfw_find_test_case(tsi->tsi_service);
-
- LASSERT (tsc != NULL);
-
- if (!tsi->tsi_is_client)
- srpc_service_remove_buffers(tsc->tsc_srv_service,
- sfw_test_buffers(tsi));
- return;
-}
-
-void
-sfw_destroy_test_instance (sfw_test_instance_t *tsi)
-{
- srpc_client_rpc_t *rpc;
- sfw_test_unit_t *tsu;
-
- if (!tsi->tsi_is_client) goto clean;
-
- tsi->tsi_ops->tso_fini(tsi);
-
- LASSERT (!tsi->tsi_stopping);
- LASSERT (list_empty(&tsi->tsi_active_rpcs));
- LASSERT (!sfw_test_active(tsi));
-
- while (!list_empty(&tsi->tsi_units)) {
- tsu = list_entry(tsi->tsi_units.next,
- sfw_test_unit_t, tsu_list);
- list_del(&tsu->tsu_list);
- LIBCFS_FREE(tsu, sizeof(*tsu));
- }
-
- while (!list_empty(&tsi->tsi_free_rpcs)) {
- rpc = list_entry(tsi->tsi_free_rpcs.next,
- srpc_client_rpc_t, crpc_list);
- list_del(&rpc->crpc_list);
- LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
- }
-
-clean:
- sfw_unload_test(tsi);
- LIBCFS_FREE(tsi, sizeof(*tsi));
- return;
-}
-
-void
-sfw_destroy_batch (sfw_batch_t *tsb)
-{
- sfw_test_instance_t *tsi;
-
- LASSERT (!sfw_batch_active(tsb));
- LASSERT (list_empty(&tsb->bat_list));
-
- while (!list_empty(&tsb->bat_tests)) {
- tsi = list_entry(tsb->bat_tests.next,
- sfw_test_instance_t, tsi_list);
- list_del_init(&tsi->tsi_list);
- sfw_destroy_test_instance(tsi);
- }
-
- LIBCFS_FREE(tsb, sizeof(sfw_batch_t));
- return;
-}
-
-void
-sfw_destroy_session (sfw_session_t *sn)
-{
- sfw_batch_t *batch;
-
- LASSERT (list_empty(&sn->sn_list));
- LASSERT (sn != sfw_data.fw_session);
-
- while (!list_empty(&sn->sn_batches)) {
- batch = list_entry(sn->sn_batches.next,
- sfw_batch_t, bat_list);
- list_del_init(&batch->bat_list);
- sfw_destroy_batch(batch);
- }
-
- LIBCFS_FREE(sn, sizeof(*sn));
- atomic_dec(&sfw_data.fw_nzombies);
- return;
-}
-
-void
-sfw_unpack_test_req (srpc_msg_t *msg)
-{
- srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
-
- LASSERT (msg->msg_type == SRPC_MSG_TEST_REQST);
- LASSERT (req->tsr_is_client);
-
- if (msg->msg_magic == SRPC_MSG_MAGIC)
- return; /* no flipping needed */
-
- LASSERT (msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- if (req->tsr_service == SRPC_SERVICE_BRW) {
- test_bulk_req_t *bulk = &req->tsr_u.bulk;
-
- __swab32s(&bulk->blk_opc);
- __swab32s(&bulk->blk_npg);
- __swab32s(&bulk->blk_flags);
- return;
- }
-
- if (req->tsr_service == SRPC_SERVICE_PING) {
- test_ping_req_t *ping = &req->tsr_u.ping;
-
- __swab32s(&ping->png_size);
- __swab32s(&ping->png_flags);
- return;
- }
-
- LBUG ();
- return;
-}
-
-int
-sfw_add_test_instance (sfw_batch_t *tsb, srpc_server_rpc_t *rpc)
-{
- srpc_msg_t *msg = &rpc->srpc_reqstbuf->buf_msg;
- srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
- srpc_bulk_t *bk = rpc->srpc_bulk;
- int ndest = req->tsr_ndest;
- sfw_test_unit_t *tsu;
- sfw_test_instance_t *tsi;
- int i;
- int rc;
-
- LIBCFS_ALLOC(tsi, sizeof(*tsi));
- if (tsi == NULL) {
- CERROR ("Can't allocate test instance for batch: "LPU64"\n",
- tsb->bat_id.bat_id);
- return -ENOMEM;
- }
-
- memset(tsi, 0, sizeof(*tsi));
- spin_lock_init(&tsi->tsi_lock);
- atomic_set(&tsi->tsi_nactive, 0);
- CFS_INIT_LIST_HEAD(&tsi->tsi_units);
- CFS_INIT_LIST_HEAD(&tsi->tsi_free_rpcs);
- CFS_INIT_LIST_HEAD(&tsi->tsi_active_rpcs);
-
- tsi->tsi_stopping = 0;
- tsi->tsi_batch = tsb;
- tsi->tsi_loop = req->tsr_loop;
- tsi->tsi_concur = req->tsr_concur;
- tsi->tsi_service = req->tsr_service;
- tsi->tsi_is_client = !!(req->tsr_is_client);
- tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr);
-
- rc = sfw_load_test(tsi);
- if (rc != 0) {
- LIBCFS_FREE(tsi, sizeof(*tsi));
- return rc;
- }
-
- LASSERT (!sfw_batch_active(tsb));
-
- if (!tsi->tsi_is_client) {
- /* it's test server, just add it to tsb */
- list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
- return 0;
- }
-
- LASSERT (bk != NULL);
-#ifndef __KERNEL__
- LASSERT (bk->bk_pages != NULL);
-#endif
- LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= ndest);
- LASSERT (bk->bk_len >= sizeof(lnet_process_id_t) * ndest);
-
- sfw_unpack_test_req(msg);
- memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u));
-
- for (i = 0; i < ndest; i++) {
- lnet_process_id_t *dests;
- lnet_process_id_t id;
- int j;
-
-#ifdef __KERNEL__
- dests = cfs_page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page);
- LASSERT (dests != NULL); /* my pages are within KVM always */
-#else
- dests = cfs_page_address(bk->bk_pages[i / SFW_ID_PER_PAGE]);
-#endif
- id = dests[i % SFW_ID_PER_PAGE];
- if (msg->msg_magic != SRPC_MSG_MAGIC)
- sfw_unpack_id(id);
-
- for (j = 0; j < tsi->tsi_concur; j++) {
- LIBCFS_ALLOC(tsu, sizeof(sfw_test_unit_t));
- if (tsu == NULL) {
- rc = -ENOMEM;
- CERROR ("Can't allocate tsu for %d\n",
- tsi->tsi_service);
- goto error;
- }
-
- tsu->tsu_dest = id;
- tsu->tsu_instance = tsi;
- tsu->tsu_private = NULL;
- list_add_tail(&tsu->tsu_list, &tsi->tsi_units);
- }
- }
-
- rc = tsi->tsi_ops->tso_init(tsi);
- if (rc == 0) {
- list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
- return 0;
- }
-
-error:
- LASSERT (rc != 0);
- sfw_destroy_test_instance(tsi);
- return rc;
-}
-
-static void
-sfw_test_unit_done (sfw_test_unit_t *tsu)
-{
- sfw_test_instance_t *tsi = tsu->tsu_instance;
- sfw_batch_t *tsb = tsi->tsi_batch;
- sfw_session_t *sn = tsb->bat_session;
-
- LASSERT (sfw_test_active(tsi));
-
- if (!atomic_dec_and_test(&tsi->tsi_nactive))
- return;
-
- /* the test instance is done */
- spin_lock(&tsi->tsi_lock);
-
- tsi->tsi_stopping = 0;
-
- spin_unlock(&tsi->tsi_lock);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (!atomic_dec_and_test(&tsb->bat_nactive) || /* tsb still active */
- sn == sfw_data.fw_session) { /* sn also active */
- spin_unlock(&sfw_data.fw_lock);
- return;
- }
-
- LASSERT (!list_empty(&sn->sn_list)); /* I'm a zombie! */
-
- list_for_each_entry (tsb, &sn->sn_batches, bat_list) {
- if (sfw_batch_active(tsb)) {
- spin_unlock(&sfw_data.fw_lock);
- return;
- }
- }
-
- list_del_init(&sn->sn_list);
- spin_unlock(&sfw_data.fw_lock);
-
- sfw_destroy_session(sn);
- return;
-}
-
-void
-sfw_test_rpc_done (srpc_client_rpc_t *rpc)
-{
- sfw_test_unit_t *tsu = rpc->crpc_priv;
- sfw_test_instance_t *tsi = tsu->tsu_instance;
- int done = 0;
-
- tsi->tsi_ops->tso_done_rpc(tsu, rpc);
-
- spin_lock(&tsi->tsi_lock);
-
- LASSERT (sfw_test_active(tsi));
- LASSERT (!list_empty(&rpc->crpc_list));
-
- list_del_init(&rpc->crpc_list);
-
- /* batch is stopping or loop is done or get error */
- if (tsi->tsi_stopping ||
- tsu->tsu_loop == 0 ||
- (rpc->crpc_status != 0 && tsi->tsi_stoptsu_onerr))
- done = 1;
-
- /* dec ref for poster */
- srpc_client_rpc_decref(rpc);
-
- spin_unlock(&tsi->tsi_lock);
-
- if (!done) {
- swi_schedule_workitem(&tsu->tsu_worker);
- return;
- }
-
- sfw_test_unit_done(tsu);
- return;
-}
-
-int
-sfw_create_test_rpc (sfw_test_unit_t *tsu, lnet_process_id_t peer,
- int nblk, int blklen, srpc_client_rpc_t **rpcpp)
-{
- srpc_client_rpc_t *rpc = NULL;
- sfw_test_instance_t *tsi = tsu->tsu_instance;
-
- spin_lock(&tsi->tsi_lock);
-
- LASSERT (sfw_test_active(tsi));
-
- if (!list_empty(&tsi->tsi_free_rpcs)) {
- /* pick request from buffer */
- rpc = list_entry(tsi->tsi_free_rpcs.next,
- srpc_client_rpc_t, crpc_list);
- LASSERT (nblk == rpc->crpc_bulk.bk_niov);
- list_del_init(&rpc->crpc_list);
-
- srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk,
- blklen, sfw_test_rpc_done,
- sfw_test_rpc_fini, tsu);
- }
-
- spin_unlock(&tsi->tsi_lock);
-
- if (rpc == NULL)
- rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk,
- blklen, sfw_test_rpc_done,
- sfw_test_rpc_fini, tsu);
- if (rpc == NULL) {
- CERROR ("Can't create rpc for test %d\n", tsi->tsi_service);
- return -ENOMEM;
- }
-
- *rpcpp = rpc;
- return 0;
-}
-
-int
-sfw_run_test (swi_workitem_t *wi)
-{
- sfw_test_unit_t *tsu = wi->wi_data;
- sfw_test_instance_t *tsi = tsu->tsu_instance;
- srpc_client_rpc_t *rpc = NULL;
-
- LASSERT (wi == &tsu->tsu_worker);
-
- if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc) != 0) {
- LASSERT (rpc == NULL);
- goto test_done;
- }
-
- LASSERT (rpc != NULL);
-
- spin_lock(&tsi->tsi_lock);
-
- if (tsi->tsi_stopping) {
- list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
- spin_unlock(&tsi->tsi_lock);
- goto test_done;
- }
-
- if (tsu->tsu_loop > 0)
- tsu->tsu_loop--;
-
- list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs);
- spin_unlock(&tsi->tsi_lock);
-
- rpc->crpc_timeout = SFW_TEST_RPC_TIMEOUT;
-
- spin_lock(&rpc->crpc_lock);
- srpc_post_rpc(rpc);
- spin_unlock(&rpc->crpc_lock);
- return 0;
-
-test_done:
- /*
- * No one can schedule me now since:
- * - previous RPC, if any, has done and
- * - no new RPC is initiated.
- * - my batch is still active; no one can run it again now.
- * Cancel pending schedules and prevent future schedule attempts:
- */
- swi_kill_workitem(wi);
- sfw_test_unit_done(tsu);
- return 1;
-}
-
-int
-sfw_run_batch (sfw_batch_t *tsb)
-{
- swi_workitem_t *wi;
- sfw_test_unit_t *tsu;
- sfw_test_instance_t *tsi;
-
- if (sfw_batch_active(tsb)) {
- CDEBUG (D_NET, "Can't start active batch: "LPU64" (%d)\n",
- tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
- return -EPERM;
- }
-
- list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
- if (!tsi->tsi_is_client) /* skip server instances */
- continue;
-
- LASSERT (!tsi->tsi_stopping);
- LASSERT (!sfw_test_active(tsi));
-
- atomic_inc(&tsb->bat_nactive);
-
- list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) {
- atomic_inc(&tsi->tsi_nactive);
- tsu->tsu_loop = tsi->tsi_loop;
- wi = &tsu->tsu_worker;
- swi_init_workitem(wi, tsu, sfw_run_test);
- swi_schedule_workitem(wi);
- }
- }
-
- return 0;
-}
-
-int
-sfw_stop_batch (sfw_batch_t *tsb, int force)
-{
- sfw_test_instance_t *tsi;
- srpc_client_rpc_t *rpc;
-
- if (!sfw_batch_active(tsb))
- return -EPERM;
-
- list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
- spin_lock(&tsi->tsi_lock);
-
- if (!tsi->tsi_is_client ||
- !sfw_test_active(tsi) || tsi->tsi_stopping) {
- spin_unlock(&tsi->tsi_lock);
- continue;
- }
-
- tsi->tsi_stopping = 1;
-
- if (!force) {
- spin_unlock(&tsi->tsi_lock);
- continue;
- }
-
- /* abort launched rpcs in the test */
- list_for_each_entry (rpc, &tsi->tsi_active_rpcs, crpc_list) {
- spin_lock(&rpc->crpc_lock);
-
- srpc_abort_rpc(rpc, -EINTR);
-
- spin_unlock(&rpc->crpc_lock);
- }
-
- spin_unlock(&tsi->tsi_lock);
- }
-
- return 0;
-}
-
-int
-sfw_query_batch (sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply)
-{
- sfw_test_instance_t *tsi;
-
- if (testidx < 0)
- return -EINVAL;
-
- if (testidx == 0) {
- reply->bar_active = atomic_read(&tsb->bat_nactive);
- return 0;
- }
-
- list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
- if (testidx-- > 1)
- continue;
-
- reply->bar_active = atomic_read(&tsi->tsi_nactive);
- return 0;
- }
-
- return -ENOENT;
-}
-
-void
-sfw_free_pages (srpc_server_rpc_t *rpc)
-{
- srpc_free_bulk(rpc->srpc_bulk);
- rpc->srpc_bulk = NULL;
-}
-
-int
-sfw_alloc_pages (srpc_server_rpc_t *rpc, int npages, int sink)
-{
- LASSERT (rpc->srpc_bulk == NULL);
- LASSERT (npages > 0 && npages <= LNET_MAX_IOV);
-
- rpc->srpc_bulk = srpc_alloc_bulk(npages, sink);
- if (rpc->srpc_bulk == NULL) return -ENOMEM;
-
- return 0;
-}
-
-int
-sfw_add_test (srpc_server_rpc_t *rpc)
-{
- sfw_session_t *sn = sfw_data.fw_session;
- srpc_test_reply_t *reply = &rpc->srpc_replymsg.msg_body.tes_reply;
- srpc_test_reqst_t *request;
- int rc;
- sfw_batch_t *bat;
-
- request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst;
- reply->tsr_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
-
- if (request->tsr_loop == 0 ||
- request->tsr_concur == 0 ||
- request->tsr_sid.ses_nid == LNET_NID_ANY ||
- request->tsr_ndest > SFW_MAX_NDESTS ||
- (request->tsr_is_client && request->tsr_ndest == 0) ||
- request->tsr_concur > SFW_MAX_CONCUR ||
- request->tsr_service > SRPC_SERVICE_MAX_ID ||
- request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) {
- reply->tsr_status = EINVAL;
- return 0;
- }
-
- if (sn == NULL || !sfw_sid_equal(request->tsr_sid, sn->sn_id) ||
- sfw_find_test_case(request->tsr_service) == NULL) {
- reply->tsr_status = ENOENT;
- return 0;
- }
-
- bat = sfw_bid2batch(request->tsr_bid);
- if (bat == NULL) {
- CERROR ("Dropping RPC (%s) from %s under memory pressure.\n",
- rpc->srpc_service->sv_name,
- libcfs_id2str(rpc->srpc_peer));
- return -ENOMEM;
- }
-
- if (sfw_batch_active(bat)) {
- reply->tsr_status = EBUSY;
- return 0;
- }
-
- if (request->tsr_is_client && rpc->srpc_bulk == NULL) {
- /* rpc will be resumed later in sfw_bulk_ready */
- return sfw_alloc_pages(rpc,
- sfw_id_pages(request->tsr_ndest), 1);
- }
-
- rc = sfw_add_test_instance(bat, rpc);
- CDEBUG (rc == 0 ? D_NET : D_WARNING,
- "%s test: sv %d %s, loop %d, concur %d, ndest %d\n",
- rc == 0 ? "Added" : "Failed to add", request->tsr_service,
- request->tsr_is_client ? "client" : "server",
- request->tsr_loop, request->tsr_concur, request->tsr_ndest);
-
- reply->tsr_status = (rc < 0) ? -rc : rc;
- return 0;
-}
-
-int
-sfw_control_batch (srpc_batch_reqst_t *request, srpc_batch_reply_t *reply)
-{
- sfw_session_t *sn = sfw_data.fw_session;
- int rc = 0;
- sfw_batch_t *bat;
-
- reply->bar_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
-
- if (sn == NULL || !sfw_sid_equal(request->bar_sid, sn->sn_id)) {
- reply->bar_status = ESRCH;
- return 0;
- }
-
- bat = sfw_find_batch(request->bar_bid);
- if (bat == NULL) {
- reply->bar_status = ENOENT;
- return 0;
- }
-
- switch (request->bar_opc) {
- case SRPC_BATCH_OPC_RUN:
- rc = sfw_run_batch(bat);
- break;
-
- case SRPC_BATCH_OPC_STOP:
- rc = sfw_stop_batch(bat, request->bar_arg);
- break;
-
- case SRPC_BATCH_OPC_QUERY:
- rc = sfw_query_batch(bat, request->bar_testidx, reply);
- break;
-
- default:
- return -EINVAL; /* drop it */
- }
-
- reply->bar_status = (rc < 0) ? -rc : rc;
- return 0;
-}
-
-int
-sfw_handle_server_rpc (srpc_server_rpc_t *rpc)
-{
- srpc_service_t *sv = rpc->srpc_service;
- srpc_msg_t *reply = &rpc->srpc_replymsg;
- srpc_msg_t *request = &rpc->srpc_reqstbuf->buf_msg;
- int rc = 0;
-
- LASSERT (sfw_data.fw_active_srpc == NULL);
- LASSERT (sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (sfw_data.fw_shuttingdown) {
- spin_unlock(&sfw_data.fw_lock);
- return -ESHUTDOWN;
- }
-
- /* Remove timer to avoid racing with it or expiring active session */
- if (sfw_del_session_timer() != 0) {
- CERROR ("Dropping RPC (%s) from %s: racing with expiry timer.",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer));
- spin_unlock(&sfw_data.fw_lock);
- return -EAGAIN;
- }
-
- sfw_data.fw_active_srpc = rpc;
- spin_unlock(&sfw_data.fw_lock);
-
- sfw_unpack_message(request);
- LASSERT (request->msg_type == srpc_service2request(sv->sv_id));
-
- switch(sv->sv_id) {
- default:
- LBUG ();
- case SRPC_SERVICE_TEST:
- rc = sfw_add_test(rpc);
- break;
-
- case SRPC_SERVICE_BATCH:
- rc = sfw_control_batch(&request->msg_body.bat_reqst,
- &reply->msg_body.bat_reply);
- break;
-
- case SRPC_SERVICE_QUERY_STAT:
- rc = sfw_get_stats(&request->msg_body.stat_reqst,
- &reply->msg_body.stat_reply);
- break;
-
- case SRPC_SERVICE_DEBUG:
- rc = sfw_debug_session(&request->msg_body.dbg_reqst,
- &reply->msg_body.dbg_reply);
- break;
-
- case SRPC_SERVICE_MAKE_SESSION:
- rc = sfw_make_session(&request->msg_body.mksn_reqst,
- &reply->msg_body.mksn_reply);
- break;
-
- case SRPC_SERVICE_REMOVE_SESSION:
- rc = sfw_remove_session(&request->msg_body.rmsn_reqst,
- &reply->msg_body.rmsn_reply);
- break;
- }
-
- rpc->srpc_done = sfw_server_rpc_done;
- spin_lock(&sfw_data.fw_lock);
-
-#ifdef __KERNEL__
- if (!sfw_data.fw_shuttingdown)
- sfw_add_session_timer();
-#else
- LASSERT (!sfw_data.fw_shuttingdown);
- sfw_add_session_timer();
-#endif
-
- sfw_data.fw_active_srpc = NULL;
- spin_unlock(&sfw_data.fw_lock);
- return rc;
-}
-
-int
-sfw_bulk_ready (srpc_server_rpc_t *rpc, int status)
-{
- srpc_service_t *sv = rpc->srpc_service;
- int rc;
-
- LASSERT (rpc->srpc_bulk != NULL);
- LASSERT (sv->sv_id == SRPC_SERVICE_TEST);
- LASSERT (sfw_data.fw_active_srpc == NULL);
- LASSERT (rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (status != 0) {
- CERROR ("Bulk transfer failed for RPC: "
- "service %s, peer %s, status %d\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer), status);
- spin_unlock(&sfw_data.fw_lock);
- return -EIO;
- }
-
- if (sfw_data.fw_shuttingdown) {
- spin_unlock(&sfw_data.fw_lock);
- return -ESHUTDOWN;
- }
-
- if (sfw_del_session_timer() != 0) {
- CERROR ("Dropping RPC (%s) from %s: racing with expiry timer",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer));
- spin_unlock(&sfw_data.fw_lock);
- return -EAGAIN;
- }
-
- sfw_data.fw_active_srpc = rpc;
- spin_unlock(&sfw_data.fw_lock);
-
- rc = sfw_add_test(rpc);
-
- spin_lock(&sfw_data.fw_lock);
-
-#ifdef __KERNEL__
- if (!sfw_data.fw_shuttingdown)
- sfw_add_session_timer();
-#else
- LASSERT (!sfw_data.fw_shuttingdown);
- sfw_add_session_timer();
-#endif
-
- sfw_data.fw_active_srpc = NULL;
- spin_unlock(&sfw_data.fw_lock);
- return rc;
-}
-
-srpc_client_rpc_t *
-sfw_create_rpc (lnet_process_id_t peer, int service,
- int nbulkiov, int bulklen,
- void (*done) (srpc_client_rpc_t *), void *priv)
-{
- srpc_client_rpc_t *rpc;
-
- spin_lock(&sfw_data.fw_lock);
-
- LASSERT (!sfw_data.fw_shuttingdown);
- LASSERT (service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- if (nbulkiov == 0 && !list_empty(&sfw_data.fw_zombie_rpcs)) {
- rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
- srpc_client_rpc_t, crpc_list);
- list_del(&rpc->crpc_list);
- spin_unlock(&sfw_data.fw_lock);
-
- srpc_init_client_rpc(rpc, peer, service, 0, 0,
- done, sfw_client_rpc_fini, priv);
- return rpc;
- }
-
- spin_unlock(&sfw_data.fw_lock);
-
- rpc = srpc_create_client_rpc(peer, service, nbulkiov, bulklen, done,
- nbulkiov != 0 ? NULL : sfw_client_rpc_fini,
- priv);
- return rpc;
-}
-
-void
-sfw_unpack_message (srpc_msg_t *msg)
-{
- if (msg->msg_magic == SRPC_MSG_MAGIC)
- return; /* no flipping needed */
-
- LASSERT (msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- __swab32s(&msg->msg_type);
-
- if (msg->msg_type == SRPC_MSG_STAT_REQST) {
- srpc_stat_reqst_t *req = &msg->msg_body.stat_reqst;
-
- __swab32s(&req->str_type);
- __swab64s(&req->str_rpyid);
- sfw_unpack_sid(req->str_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_STAT_REPLY) {
- srpc_stat_reply_t *rep = &msg->msg_body.stat_reply;
-
- __swab32s(&rep->str_status);
- sfw_unpack_sid(rep->str_sid);
- sfw_unpack_fw_counters(rep->str_fw);
- sfw_unpack_rpc_counters(rep->str_rpc);
- sfw_unpack_lnet_counters(rep->str_lnet);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_MKSN_REQST) {
- srpc_mksn_reqst_t *req = &msg->msg_body.mksn_reqst;
-
- __swab64s(&req->mksn_rpyid);
- __swab32s(&req->mksn_force);
- sfw_unpack_sid(req->mksn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_MKSN_REPLY) {
- srpc_mksn_reply_t *rep = &msg->msg_body.mksn_reply;
-
- __swab32s(&rep->mksn_status);
- __swab32s(&rep->mksn_timeout);
- sfw_unpack_sid(rep->mksn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_RMSN_REQST) {
- srpc_rmsn_reqst_t *req = &msg->msg_body.rmsn_reqst;
-
- __swab64s(&req->rmsn_rpyid);
- sfw_unpack_sid(req->rmsn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_RMSN_REPLY) {
- srpc_rmsn_reply_t *rep = &msg->msg_body.rmsn_reply;
-
- __swab32s(&rep->rmsn_status);
- sfw_unpack_sid(rep->rmsn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_DEBUG_REQST) {
- srpc_debug_reqst_t *req = &msg->msg_body.dbg_reqst;
-
- __swab64s(&req->dbg_rpyid);
- __swab32s(&req->dbg_flags);
- sfw_unpack_sid(req->dbg_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) {
- srpc_debug_reply_t *rep = &msg->msg_body.dbg_reply;
-
- __swab32s(&rep->dbg_nbatch);
- __swab32s(&rep->dbg_timeout);
- sfw_unpack_sid(rep->dbg_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_BATCH_REQST) {
- srpc_batch_reqst_t *req = &msg->msg_body.bat_reqst;
-
- __swab32s(&req->bar_opc);
- __swab64s(&req->bar_rpyid);
- __swab32s(&req->bar_testidx);
- __swab32s(&req->bar_arg);
- sfw_unpack_sid(req->bar_sid);
- __swab64s(&req->bar_bid.bat_id);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_BATCH_REPLY) {
- srpc_batch_reply_t *rep = &msg->msg_body.bat_reply;
-
- __swab32s(&rep->bar_status);
- sfw_unpack_sid(rep->bar_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_TEST_REQST) {
- srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
-
- __swab64s(&req->tsr_rpyid);
- __swab64s(&req->tsr_bulkid);
- __swab32s(&req->tsr_loop);
- __swab32s(&req->tsr_ndest);
- __swab32s(&req->tsr_concur);
- __swab32s(&req->tsr_service);
- sfw_unpack_sid(req->tsr_sid);
- __swab64s(&req->tsr_bid.bat_id);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_TEST_REPLY) {
- srpc_test_reply_t *rep = &msg->msg_body.tes_reply;
-
- __swab32s(&rep->tsr_status);
- sfw_unpack_sid(rep->tsr_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_JOIN_REQST) {
- srpc_join_reqst_t *req = &msg->msg_body.join_reqst;
-
- __swab64s(&req->join_rpyid);
- sfw_unpack_sid(req->join_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_JOIN_REPLY) {
- srpc_join_reply_t *rep = &msg->msg_body.join_reply;
-
- __swab32s(&rep->join_status);
- __swab32s(&rep->join_timeout);
- sfw_unpack_sid(rep->join_sid);
- return;
- }
-
- LBUG ();
- return;
-}
-
-void
-sfw_abort_rpc (srpc_client_rpc_t *rpc)
-{
- LASSERT (atomic_read(&rpc->crpc_refcount) > 0);
- LASSERT (rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- spin_lock(&rpc->crpc_lock);
- srpc_abort_rpc(rpc, -EINTR);
- spin_unlock(&rpc->crpc_lock);
- return;
-}
-
-void
-sfw_post_rpc (srpc_client_rpc_t *rpc)
-{
- spin_lock(&rpc->crpc_lock);
-
- LASSERT (!rpc->crpc_closed);
- LASSERT (!rpc->crpc_aborted);
- LASSERT (list_empty(&rpc->crpc_list));
- LASSERT (!sfw_data.fw_shuttingdown);
-
- rpc->crpc_timeout = SFW_CLIENT_RPC_TIMEOUT;
- srpc_post_rpc(rpc);
-
- spin_unlock(&rpc->crpc_lock);
- return;
-}
-
-static srpc_service_t sfw_services[] =
-{
- {
- .sv_name = "debug",
- .sv_id = SRPC_SERVICE_DEBUG,
- },
- {
- .sv_name = "query stats",
- .sv_id = SRPC_SERVICE_QUERY_STAT,
- },
- {
- .sv_name = "make sessin",
- .sv_id = SRPC_SERVICE_MAKE_SESSION,
- },
- {
- .sv_name = "remove session",
- .sv_id = SRPC_SERVICE_REMOVE_SESSION,
- },
- {
- .sv_name = "batch service",
- .sv_id = SRPC_SERVICE_BATCH,
- },
- {
- .sv_name = "test service",
- .sv_id = SRPC_SERVICE_TEST,
- },
- { .sv_name = NULL, }
-};
-
-extern sfw_test_client_ops_t ping_test_client;
-extern srpc_service_t ping_test_service;
-
-extern sfw_test_client_ops_t brw_test_client;
-extern srpc_service_t brw_test_service;
-
-int
-sfw_startup (void)
-{
- int i;
- int rc;
- int error;
- srpc_service_t *sv;
- sfw_test_case_t *tsc;
-
-#ifndef __KERNEL__
- char *s;
-
- s = getenv("SESSION_TIMEOUT");
- session_timeout = s != NULL ? atoi(s) : session_timeout;
-
- s = getenv("BRW_INJECT_ERRORS");
- brw_inject_errors = s != NULL ? atoi(s) : brw_inject_errors;
-#endif
-
- if (session_timeout < 0) {
- CERROR ("Session timeout must be non-negative: %d\n",
- session_timeout);
- return -EINVAL;
- }
-
- if (session_timeout == 0)
- CWARN ("Zero session_timeout specified "
- "- test sessions never expire.\n");
-
- memset(&sfw_data, 0, sizeof(struct smoketest_framework));
-
- sfw_data.fw_session = NULL;
- sfw_data.fw_active_srpc = NULL;
- spin_lock_init(&sfw_data.fw_lock);
- atomic_set(&sfw_data.fw_nzombies, 0);
- CFS_INIT_LIST_HEAD(&sfw_data.fw_tests);
- CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
- CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
-
- rc = sfw_register_test(&brw_test_service, &brw_test_client);
- LASSERT (rc == 0);
- rc = sfw_register_test(&ping_test_service, &ping_test_client);
- LASSERT (rc == 0);
-
- error = 0;
- list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
- sv = tsc->tsc_srv_service;
- sv->sv_concur = SFW_TEST_CONCURRENCY;
-
- rc = srpc_add_service(sv);
- LASSERT (rc != -EBUSY);
- if (rc != 0) {
- CWARN ("Failed to add %s service: %d\n",
- sv->sv_name, rc);
- error = rc;
- }
- }
-
- for (i = 0; ; i++) {
- sv = &sfw_services[i];
- if (sv->sv_name == NULL) break;
-
- sv->sv_bulk_ready = NULL;
- sv->sv_handler = sfw_handle_server_rpc;
- sv->sv_concur = SFW_SERVICE_CONCURRENCY;
- if (sv->sv_id == SRPC_SERVICE_TEST)
- sv->sv_bulk_ready = sfw_bulk_ready;
-
- rc = srpc_add_service(sv);
- LASSERT (rc != -EBUSY);
- if (rc != 0) {
- CWARN ("Failed to add %s service: %d\n",
- sv->sv_name, rc);
- error = rc;
- }
-
- /* about to sfw_shutdown, no need to add buffer */
- if (error) continue;
-
- rc = srpc_service_add_buffers(sv, SFW_POST_BUFFERS);
- if (rc != SFW_POST_BUFFERS) {
- CWARN ("Failed to reserve enough buffers: "
- "service %s, %d needed, %d reserved\n",
- sv->sv_name, SFW_POST_BUFFERS, rc);
- error = -ENOMEM;
- }
- }
-
- if (error != 0)
- sfw_shutdown();
- return error;
-}
-
-void
-sfw_shutdown (void)
-{
- srpc_service_t *sv;
- sfw_test_case_t *tsc;
- int i;
-
- spin_lock(&sfw_data.fw_lock);
-
- sfw_data.fw_shuttingdown = 1;
-#ifdef __KERNEL__
- lst_wait_until(sfw_data.fw_active_srpc == NULL, sfw_data.fw_lock,
- "waiting for active RPC to finish.\n");
-#else
- LASSERT (sfw_data.fw_active_srpc == NULL);
-#endif
-
- if (sfw_del_session_timer() != 0)
- lst_wait_until(sfw_data.fw_session == NULL, sfw_data.fw_lock,
- "waiting for session timer to explode.\n");
-
- sfw_deactivate_session();
- lst_wait_until(atomic_read(&sfw_data.fw_nzombies) == 0,
- sfw_data.fw_lock,
- "waiting for %d zombie sessions to die.\n",
- atomic_read(&sfw_data.fw_nzombies));
-
- spin_unlock(&sfw_data.fw_lock);
-
- for (i = 0; ; i++) {
- sv = &sfw_services[i];
- if (sv->sv_name == NULL)
- break;
-
- srpc_shutdown_service(sv);
- srpc_remove_service(sv);
- }
-
- list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
- sv = tsc->tsc_srv_service;
- srpc_shutdown_service(sv);
- srpc_remove_service(sv);
- }
-
- while (!list_empty(&sfw_data.fw_zombie_rpcs)) {
- srpc_client_rpc_t *rpc;
-
- rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
- srpc_client_rpc_t, crpc_list);
- list_del(&rpc->crpc_list);
-
- LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
- }
-
- for (i = 0; ; i++) {
- sv = &sfw_services[i];
- if (sv->sv_name == NULL)
- break;
-
- srpc_wait_service_shutdown(sv);
- }
-
- while (!list_empty(&sfw_data.fw_tests)) {
- tsc = list_entry(sfw_data.fw_tests.next,
- sfw_test_case_t, tsc_list);
-
- srpc_wait_service_shutdown(tsc->tsc_srv_service);
-
- list_del(&tsc->tsc_list);
- LIBCFS_FREE(tsc, sizeof(*tsc));
- }
-
- return;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-
-#define LST_INIT_NONE 0
-#define LST_INIT_RPC 1
-#define LST_INIT_FW 2
-#define LST_INIT_CONSOLE 3
-
-extern int lstcon_console_init(void);
-extern int lstcon_console_fini(void);
-
-static int lst_init_step = LST_INIT_NONE;
-
-void
-lnet_selftest_fini (void)
-{
- switch (lst_init_step) {
-#ifdef __KERNEL__
- case LST_INIT_CONSOLE:
- lstcon_console_fini();
-#endif
- case LST_INIT_FW:
- sfw_shutdown();
- case LST_INIT_RPC:
- srpc_shutdown();
- case LST_INIT_NONE:
- break;
- default:
- LBUG();
- }
- return;
-}
-
-int
-lnet_selftest_init (void)
-{
- int rc;
-
- rc = srpc_startup();
- if (rc != 0) {
- CERROR("LST can't startup rpc\n");
- goto error;
- }
- lst_init_step = LST_INIT_RPC;
-
- rc = sfw_startup();
- if (rc != 0) {
- CERROR("LST can't startup framework\n");
- goto error;
- }
- lst_init_step = LST_INIT_FW;
-
-#ifdef __KERNEL__
- rc = lstcon_console_init();
- if (rc != 0) {
- CERROR("LST can't startup console\n");
- goto error;
- }
- lst_init_step = LST_INIT_CONSOLE;
-#endif
-
- return 0;
-error:
- lnet_selftest_fini();
- return rc;
-}
-
-#ifdef __KERNEL__
-
-MODULE_DESCRIPTION("LNet Selftest");
-MODULE_LICENSE("GPL");
-
-cfs_module(lnet, "0.9.0", lnet_selftest_init, lnet_selftest_fini);
-
-#else
-
-int
-selftest_wait_events (void)
-{
- int evts = 0;
-
- for (;;) {
- /* Consume all pending events */
- while (srpc_check_event(0))
- evts++;
- evts += stt_check_events();
- evts += swi_check_events();
- if (evts != 0) break;
-
- /* Nothing happened, block for events */
- evts += srpc_check_event(stt_poll_interval());
- /* We may have blocked, check for expired timers */
- evts += stt_check_events();
- if (evts == 0) /* timed out and still no event */
- break;
- }
-
- return evts;
-}
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Test client & Server
- */
-
-#include "selftest.h"
-
-#define LST_PING_TEST_MAGIC 0xbabeface
-
-typedef struct {
- spinlock_t pnd_lock; /* serialize */
- int pnd_counter; /* sequence counter */
-} lst_ping_data_t;
-
-static lst_ping_data_t lst_ping_data;
-
-static int
-ping_client_init(sfw_test_instance_t *tsi)
-{
- LASSERT (tsi->tsi_is_client);
-
- spin_lock_init(&lst_ping_data.pnd_lock);
- lst_ping_data.pnd_counter = 0;
-
- return 0;
-}
-
-static void
-ping_client_fini (sfw_test_instance_t *tsi)
-{
- sfw_session_t *sn = tsi->tsi_batch->bat_session;
- int errors;
-
- LASSERT (sn != NULL);
- LASSERT (tsi->tsi_is_client);
-
- errors = atomic_read(&sn->sn_ping_errors);
- if (errors)
- CWARN ("%d pings have failed.\n", errors);
- else
- CDEBUG (D_NET, "Ping test finished OK.\n");
-}
-
-static int
-ping_client_prep_rpc(sfw_test_unit_t *tsu,
- lnet_process_id_t dest, srpc_client_rpc_t **rpc)
-{
- srpc_ping_reqst_t *req;
- struct timeval tv;
- int rc;
-
- rc = sfw_create_test_rpc(tsu, dest, 0, 0, rpc);
- if (rc != 0)
- return rc;
-
- req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst;
-
- req->pnr_magic = LST_PING_TEST_MAGIC;
-
- spin_lock(&lst_ping_data.pnd_lock);
- req->pnr_seq = lst_ping_data.pnd_counter ++;
- spin_unlock(&lst_ping_data.pnd_lock);
-
- cfs_fs_timeval(&tv);
- req->pnr_time_sec = tv.tv_sec;
- req->pnr_time_usec = tv.tv_usec;
-
- return rc;
-}
-
-static void
-ping_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
-{
- sfw_test_instance_t *tsi = tsu->tsu_instance;
- sfw_session_t *sn = tsi->tsi_batch->bat_session;
- srpc_ping_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst;
- srpc_ping_reply_t *reply = &rpc->crpc_replymsg.msg_body.ping_reply;
- struct timeval tv;
-
- LASSERT (sn != NULL);
-
- if (rpc->crpc_status != 0) {
- if (!tsi->tsi_stopping) /* rpc could have been aborted */
- atomic_inc(&sn->sn_ping_errors);
- CERROR ("Unable to ping %s (%d): %d\n",
- libcfs_id2str(rpc->crpc_dest),
- reqst->pnr_seq, rpc->crpc_status);
- return;
- }
-
- if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) {
- __swab32s(&reply->pnr_seq);
- __swab32s(&reply->pnr_magic);
- __swab32s(&reply->pnr_status);
- }
-
- if (reply->pnr_magic != LST_PING_TEST_MAGIC) {
- rpc->crpc_status = -EBADMSG;
- atomic_inc(&sn->sn_ping_errors);
- CERROR ("Bad magic %u from %s, %u expected.\n",
- reply->pnr_magic, libcfs_id2str(rpc->crpc_dest),
- LST_PING_TEST_MAGIC);
- return;
- }
-
- if (reply->pnr_seq != reqst->pnr_seq) {
- rpc->crpc_status = -EBADMSG;
- atomic_inc(&sn->sn_ping_errors);
- CERROR ("Bad seq %u from %s, %u expected.\n",
- reply->pnr_seq, libcfs_id2str(rpc->crpc_dest),
- reqst->pnr_seq);
- return;
- }
-
- cfs_fs_timeval(&tv);
- CDEBUG (D_NET, "%d reply in %u usec\n", reply->pnr_seq,
- (unsigned)((tv.tv_sec - (unsigned)reqst->pnr_time_sec) * 1000000
- + (tv.tv_usec - reqst->pnr_time_usec)));
- return;
-}
-
-static int
-ping_server_handle (srpc_server_rpc_t *rpc)
-{
- srpc_service_t *sv = rpc->srpc_service;
- srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
- srpc_ping_reqst_t *req = &reqstmsg->msg_body.ping_reqst;
- srpc_ping_reply_t *rep = &rpc->srpc_replymsg.msg_body.ping_reply;
-
- LASSERT (sv->sv_id == SRPC_SERVICE_PING);
-
- if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
- LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- __swab32s(&reqstmsg->msg_type);
- __swab32s(&req->pnr_seq);
- __swab32s(&req->pnr_magic);
- __swab64s(&req->pnr_time_sec);
- __swab64s(&req->pnr_time_usec);
- }
- LASSERT (reqstmsg->msg_type == srpc_service2request(sv->sv_id));
-
- if (req->pnr_magic != LST_PING_TEST_MAGIC) {
- CERROR ("Unexpect magic %08x from %s\n",
- req->pnr_magic, libcfs_id2str(rpc->srpc_peer));
- return -EINVAL;
- }
-
- rep->pnr_seq = req->pnr_seq;
- rep->pnr_magic = LST_PING_TEST_MAGIC;
-
- CDEBUG (D_NET, "Get ping %d from %s\n",
- req->pnr_seq, libcfs_id2str(rpc->srpc_peer));
- return 0;
-}
-
-sfw_test_client_ops_t ping_test_client =
-{
- .tso_init = ping_client_init,
- .tso_fini = ping_client_fini,
- .tso_prep_rpc = ping_client_prep_rpc,
- .tso_done_rpc = ping_client_done_rpc,
-};
-
-srpc_service_t ping_test_service =
-{
- .sv_name = "ping test",
- .sv_handler = ping_server_handle,
- .sv_id = SRPC_SERVICE_PING,
-};
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Isaac Huang <isaac@clusterfs.com>
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-
-#define SRPC_PEER_HASH_SIZE 101 /* # peer lists */
-
-typedef enum {
- SRPC_STATE_NONE,
- SRPC_STATE_NI_INIT,
- SRPC_STATE_EQ_INIT,
- SRPC_STATE_WI_INIT,
- SRPC_STATE_RUNNING,
- SRPC_STATE_STOPPING,
-} srpc_state_t;
-
-struct smoketest_rpc {
- spinlock_t rpc_glock; /* global lock */
- srpc_service_t *rpc_services[SRPC_SERVICE_MAX_ID + 1];
- struct list_head *rpc_peers; /* hash table of known peers */
- lnet_handle_eq_t rpc_lnet_eq; /* _the_ LNet event queue */
- srpc_state_t rpc_state;
- srpc_counters_t rpc_counters;
- __u64 rpc_matchbits; /* matchbits counter */
-} srpc_data;
-
-static int srpc_peer_credits = 16;
-CFS_MODULE_PARM(srpc_peer_credits, "i", int, 0444,
- "# in-flight RPCs per peer (16 by default)");
-
-/* forward ref's */
-int srpc_handle_rpc (swi_workitem_t *wi);
-
-void srpc_get_counters (srpc_counters_t *cnt)
-{
- spin_lock(&srpc_data.rpc_glock);
- *cnt = srpc_data.rpc_counters;
- spin_unlock(&srpc_data.rpc_glock);
-}
-
-void srpc_set_counters (const srpc_counters_t *cnt)
-{
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters = *cnt;
- spin_unlock(&srpc_data.rpc_glock);
-}
-
-void
-srpc_add_bulk_page (srpc_bulk_t *bk, cfs_page_t *pg, int i)
-{
- LASSERT (i >= 0 && i < bk->bk_niov);
-
-#ifdef __KERNEL__
- bk->bk_iovs[i].kiov_offset = 0;
- bk->bk_iovs[i].kiov_page = pg;
- bk->bk_iovs[i].kiov_len = CFS_PAGE_SIZE;
-#else
- LASSERT (bk->bk_pages != NULL);
-
- bk->bk_pages[i] = pg;
- bk->bk_iovs[i].iov_len = CFS_PAGE_SIZE;
- bk->bk_iovs[i].iov_base = cfs_page_address(pg);
-#endif
- return;
-}
-
-void
-srpc_free_bulk (srpc_bulk_t *bk)
-{
- int i;
- cfs_page_t *pg;
-
- LASSERT (bk != NULL);
-#ifndef __KERNEL__
- LASSERT (bk->bk_pages != NULL);
-#endif
-
- for (i = 0; i < bk->bk_niov; i++) {
-#ifdef __KERNEL__
- pg = bk->bk_iovs[i].kiov_page;
-#else
- pg = bk->bk_pages[i];
-#endif
- if (pg == NULL) break;
-
- cfs_free_page(pg);
- }
-
-#ifndef __KERNEL__
- LIBCFS_FREE(bk->bk_pages, sizeof(cfs_page_t *) * bk->bk_niov);
-#endif
- LIBCFS_FREE(bk, offsetof(srpc_bulk_t, bk_iovs[bk->bk_niov]));
- return;
-}
-
-srpc_bulk_t *
-srpc_alloc_bulk (int npages, int sink)
-{
- srpc_bulk_t *bk;
- cfs_page_t **pages;
- int i;
-
- LASSERT (npages > 0 && npages <= LNET_MAX_IOV);
-
- LIBCFS_ALLOC(bk, offsetof(srpc_bulk_t, bk_iovs[npages]));
- if (bk == NULL) {
- CERROR ("Can't allocate descriptor for %d pages\n", npages);
- return NULL;
- }
-
- memset(bk, 0, offsetof(srpc_bulk_t, bk_iovs[npages]));
- bk->bk_sink = sink;
- bk->bk_niov = npages;
- bk->bk_len = npages * CFS_PAGE_SIZE;
-#ifndef __KERNEL__
- LIBCFS_ALLOC(pages, sizeof(cfs_page_t *) * npages);
- if (pages == NULL) {
- LIBCFS_FREE(bk, offsetof(srpc_bulk_t, bk_iovs[npages]));
- CERROR ("Can't allocate page array for %d pages\n", npages);
- return NULL;
- }
-
- memset(pages, 0, sizeof(cfs_page_t *) * npages);
- bk->bk_pages = pages;
-#else
- UNUSED (pages);
-#endif
-
- for (i = 0; i < npages; i++) {
- cfs_page_t *pg = cfs_alloc_page(CFS_ALLOC_STD);
-
- if (pg == NULL) {
- CERROR ("Can't allocate page %d of %d\n", i, npages);
- srpc_free_bulk(bk);
- return NULL;
- }
-
- srpc_add_bulk_page(bk, pg, i);
- }
-
- return bk;
-}
-
-
-static inline struct list_head *
-srpc_nid2peerlist (lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % SRPC_PEER_HASH_SIZE;
-
- return &srpc_data.rpc_peers[hash];
-}
-
-static inline srpc_peer_t *
-srpc_create_peer (lnet_nid_t nid)
-{
- srpc_peer_t *peer;
-
- LASSERT (nid != LNET_NID_ANY);
-
- LIBCFS_ALLOC(peer, sizeof(srpc_peer_t));
- if (peer == NULL) {
- CERROR ("Failed to allocate peer structure for %s\n",
- libcfs_nid2str(nid));
- return NULL;
- }
-
- memset(peer, 0, sizeof(srpc_peer_t));
- peer->stp_nid = nid;
- peer->stp_credits = srpc_peer_credits;
-
- spin_lock_init(&peer->stp_lock);
- CFS_INIT_LIST_HEAD(&peer->stp_rpcq);
- CFS_INIT_LIST_HEAD(&peer->stp_ctl_rpcq);
- return peer;
-}
-
-srpc_peer_t *
-srpc_find_peer_locked (lnet_nid_t nid)
-{
- struct list_head *peer_list = srpc_nid2peerlist(nid);
- srpc_peer_t *peer;
-
- LASSERT (nid != LNET_NID_ANY);
-
- list_for_each_entry (peer, peer_list, stp_list) {
- if (peer->stp_nid == nid)
- return peer;
- }
-
- return NULL;
-}
-
-static srpc_peer_t *
-srpc_nid2peer (lnet_nid_t nid)
-{
- srpc_peer_t *peer;
- srpc_peer_t *new_peer;
-
- spin_lock(&srpc_data.rpc_glock);
- peer = srpc_find_peer_locked(nid);
- spin_unlock(&srpc_data.rpc_glock);
-
- if (peer != NULL)
- return peer;
-
- new_peer = srpc_create_peer(nid);
-
- spin_lock(&srpc_data.rpc_glock);
-
- peer = srpc_find_peer_locked(nid);
- if (peer != NULL) {
- spin_unlock(&srpc_data.rpc_glock);
- if (new_peer != NULL)
- LIBCFS_FREE(new_peer, sizeof(srpc_peer_t));
-
- return peer;
- }
-
- if (new_peer == NULL) {
- spin_unlock(&srpc_data.rpc_glock);
- return NULL;
- }
-
- list_add_tail(&new_peer->stp_list, srpc_nid2peerlist(nid));
- spin_unlock(&srpc_data.rpc_glock);
- return new_peer;
-}
-
-static inline __u64
-srpc_next_id (void)
-{
- __u64 id;
-
- spin_lock(&srpc_data.rpc_glock);
- id = srpc_data.rpc_matchbits++;
- spin_unlock(&srpc_data.rpc_glock);
- return id;
-}
-
-void
-srpc_init_server_rpc (srpc_server_rpc_t *rpc,
- srpc_service_t *sv, srpc_buffer_t *buffer)
-{
- memset(rpc, 0, sizeof(*rpc));
- swi_init_workitem(&rpc->srpc_wi, rpc, srpc_handle_rpc);
-
- rpc->srpc_ev.ev_fired = 1; /* no event expected now */
-
- rpc->srpc_service = sv;
- rpc->srpc_reqstbuf = buffer;
- rpc->srpc_peer = buffer->buf_peer;
- rpc->srpc_self = buffer->buf_self;
- rpc->srpc_replymdh = LNET_INVALID_HANDLE;
-}
-
-int
-srpc_add_service (srpc_service_t *sv)
-{
- int id = sv->sv_id;
- int i;
- srpc_server_rpc_t *rpc;
-
- LASSERT (sv->sv_concur > 0);
- LASSERT (0 <= id && id <= SRPC_SERVICE_MAX_ID);
-
- spin_lock(&srpc_data.rpc_glock);
-
- LASSERT (srpc_data.rpc_state == SRPC_STATE_RUNNING);
-
- if (srpc_data.rpc_services[id] != NULL) {
- spin_unlock(&srpc_data.rpc_glock);
- return -EBUSY;
- }
-
- srpc_data.rpc_services[id] = sv;
- spin_unlock(&srpc_data.rpc_glock);
-
- sv->sv_nprune = 0;
- sv->sv_nposted_msg = 0;
- sv->sv_shuttingdown = 0;
- spin_lock_init(&sv->sv_lock);
- CFS_INIT_LIST_HEAD(&sv->sv_free_rpcq);
- CFS_INIT_LIST_HEAD(&sv->sv_active_rpcq);
- CFS_INIT_LIST_HEAD(&sv->sv_posted_msgq);
- CFS_INIT_LIST_HEAD(&sv->sv_blocked_msgq);
-
- sv->sv_ev.ev_data = sv;
- sv->sv_ev.ev_type = SRPC_REQUEST_RCVD;
-
- for (i = 0; i < sv->sv_concur; i++) {
- LIBCFS_ALLOC(rpc, sizeof(*rpc));
- if (rpc == NULL) goto enomem;
-
- list_add(&rpc->srpc_list, &sv->sv_free_rpcq);
- }
-
- CDEBUG (D_NET, "Adding service: id %d, name %s, concurrency %d\n",
- id, sv->sv_name, sv->sv_concur);
- return 0;
-
-enomem:
- while (!list_empty(&sv->sv_free_rpcq)) {
- rpc = list_entry(sv->sv_free_rpcq.next,
- srpc_server_rpc_t, srpc_list);
- list_del(&rpc->srpc_list);
- LIBCFS_FREE(rpc, sizeof(*rpc));
- }
-
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_services[id] = NULL;
- spin_unlock(&srpc_data.rpc_glock);
- return -ENOMEM;
-}
-
-int
-srpc_remove_service (srpc_service_t *sv)
-{
- int id = sv->sv_id;
-
- spin_lock(&srpc_data.rpc_glock);
-
- if (srpc_data.rpc_services[id] != sv) {
- spin_unlock(&srpc_data.rpc_glock);
- return -ENOENT;
- }
-
- srpc_data.rpc_services[id] = NULL;
- spin_unlock(&srpc_data.rpc_glock);
- return 0;
-}
-
-int
-srpc_post_passive_rdma(int portal, __u64 matchbits, void *buf,
- int len, int options, lnet_process_id_t peer,
- lnet_handle_md_t *mdh, srpc_event_t *ev)
-{
- int rc;
- lnet_md_t md;
- lnet_handle_me_t meh;
-
- rc = LNetMEAttach(portal, peer, matchbits, 0,
- LNET_UNLINK, LNET_INS_AFTER, &meh);
- if (rc != 0) {
- CERROR ("LNetMEAttach failed: %d\n", rc);
- LASSERT (rc == -ENOMEM);
- return -ENOMEM;
- }
-
- md.threshold = 1;
- md.user_ptr = ev;
- md.start = buf;
- md.length = len;
- md.options = options;
- md.eq_handle = srpc_data.rpc_lnet_eq;
-
- rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh);
- if (rc != 0) {
- CERROR ("LNetMDAttach failed: %d\n", rc);
- LASSERT (rc == -ENOMEM);
-
- rc = LNetMEUnlink(meh);
- LASSERT (rc == 0);
- return -ENOMEM;
- }
-
- CDEBUG (D_NET,
- "Posted passive RDMA: peer %s, portal %d, matchbits "LPX64"\n",
- libcfs_id2str(peer), portal, matchbits);
- return 0;
-}
-
-int
-srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
- int options, lnet_process_id_t peer, lnet_nid_t self,
- lnet_handle_md_t *mdh, srpc_event_t *ev)
-{
- int rc;
- lnet_md_t md;
-
- md.user_ptr = ev;
- md.start = buf;
- md.length = len;
- md.eq_handle = srpc_data.rpc_lnet_eq;
- md.threshold = ((options & LNET_MD_OP_GET) != 0) ? 2 : 1;
- md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET);
-
- rc = LNetMDBind(md, LNET_UNLINK, mdh);
- if (rc != 0) {
- CERROR ("LNetMDBind failed: %d\n", rc);
- LASSERT (rc == -ENOMEM);
- return -ENOMEM;
- }
-
- /* this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options.
- * they're only meaningful for MDs attached to an ME (i.e. passive
- * buffers... */
- if ((options & LNET_MD_OP_PUT) != 0) {
- rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer,
- portal, matchbits, 0, 0);
- } else {
- LASSERT ((options & LNET_MD_OP_GET) != 0);
-
- rc = LNetGet(self, *mdh, peer, portal, matchbits, 0);
- }
-
- if (rc != 0) {
- CERROR ("LNet%s(%s, %d, "LPD64") failed: %d\n",
- ((options & LNET_MD_OP_PUT) != 0) ? "Put" : "Get",
- libcfs_id2str(peer), portal, matchbits, rc);
-
- /* The forthcoming unlink event will complete this operation
- * with failure, so fall through and return success here.
- */
- rc = LNetMDUnlink(*mdh);
- LASSERT (rc == 0);
- } else {
- CDEBUG (D_NET,
- "Posted active RDMA: peer %s, portal %u, matchbits "LPX64"\n",
- libcfs_id2str(peer), portal, matchbits);
- }
- return 0;
-}
-
-int
-srpc_post_active_rqtbuf(lnet_process_id_t peer, int service, void *buf,
- int len, lnet_handle_md_t *mdh, srpc_event_t *ev)
-{
- int rc;
- int portal;
-
- if (service > SRPC_FRAMEWORK_SERVICE_MAX_ID)
- portal = SRPC_REQUEST_PORTAL;
- else
- portal = SRPC_FRAMEWORK_REQUEST_PORTAL;
-
- rc = srpc_post_active_rdma(portal, service, buf, len,
- LNET_MD_OP_PUT, peer,
- LNET_NID_ANY, mdh, ev);
- return rc;
-}
-
-int
-srpc_post_passive_rqtbuf(int service, void *buf, int len,
- lnet_handle_md_t *mdh, srpc_event_t *ev)
-{
- int rc;
- int portal;
- lnet_process_id_t any = {.nid = LNET_NID_ANY,
- .pid = LNET_PID_ANY};
-
- if (service > SRPC_FRAMEWORK_SERVICE_MAX_ID)
- portal = SRPC_REQUEST_PORTAL;
- else
- portal = SRPC_FRAMEWORK_REQUEST_PORTAL;
-
- rc = srpc_post_passive_rdma(portal, service, buf, len,
- LNET_MD_OP_PUT, any, mdh, ev);
- return rc;
-}
-
-int
-srpc_service_post_buffer (srpc_service_t *sv, srpc_buffer_t *buf)
-{
- srpc_msg_t *msg = &buf->buf_msg;
- int rc;
-
- LASSERT (!sv->sv_shuttingdown);
-
- buf->buf_mdh = LNET_INVALID_HANDLE;
- list_add(&buf->buf_list, &sv->sv_posted_msgq);
- sv->sv_nposted_msg++;
- spin_unlock(&sv->sv_lock);
-
- rc = srpc_post_passive_rqtbuf(sv->sv_id, msg, sizeof(*msg),
- &buf->buf_mdh, &sv->sv_ev);
-
- /* At this point, a RPC (new or delayed) may have arrived in
- * msg and its event handler has been called. So we must add
- * buf to sv_posted_msgq _before_ dropping sv_lock */
-
- spin_lock(&sv->sv_lock);
-
- if (rc == 0) {
- if (sv->sv_shuttingdown) {
- spin_unlock(&sv->sv_lock);
-
- /* srpc_shutdown_service might have tried to unlink me
- * when my buf_mdh was still invalid */
- LNetMDUnlink(buf->buf_mdh);
-
- spin_lock(&sv->sv_lock);
- }
- return 0;
- }
-
- sv->sv_nposted_msg--;
- if (sv->sv_shuttingdown) return rc;
-
- list_del(&buf->buf_list);
-
- spin_unlock(&sv->sv_lock);
- LIBCFS_FREE(buf, sizeof(*buf));
- spin_lock(&sv->sv_lock);
- return rc;
-}
-
-int
-srpc_service_add_buffers (srpc_service_t *sv, int nbuffer)
-{
- int rc;
- int posted;
- srpc_buffer_t *buf;
-
- LASSERTF (nbuffer > 0,
- "nbuffer must be positive: %d\n", nbuffer);
-
- for (posted = 0; posted < nbuffer; posted++) {
- LIBCFS_ALLOC(buf, sizeof(*buf));
- if (buf == NULL) break;
-
- spin_lock(&sv->sv_lock);
- rc = srpc_service_post_buffer(sv, buf);
- spin_unlock(&sv->sv_lock);
-
- if (rc != 0) break;
- }
-
- return posted;
-}
-
-void
-srpc_service_remove_buffers (srpc_service_t *sv, int nbuffer)
-{
- LASSERTF (nbuffer > 0,
- "nbuffer must be positive: %d\n", nbuffer);
-
- spin_lock(&sv->sv_lock);
-
- LASSERT (sv->sv_nprune >= 0);
- LASSERT (!sv->sv_shuttingdown);
-
- sv->sv_nprune += nbuffer;
-
- spin_unlock(&sv->sv_lock);
- return;
-}
-
-/* returns 1 if sv has finished, otherwise 0 */
-int
-srpc_finish_service (srpc_service_t *sv)
-{
- srpc_server_rpc_t *rpc;
- srpc_buffer_t *buf;
-
- spin_lock(&sv->sv_lock);
-
- LASSERT (sv->sv_shuttingdown); /* srpc_shutdown_service called */
-
- if (sv->sv_nposted_msg != 0 || !list_empty(&sv->sv_active_rpcq)) {
- CDEBUG (D_NET,
- "waiting for %d posted buffers to unlink and "
- "in-flight RPCs to die.\n",
- sv->sv_nposted_msg);
-
- if (!list_empty(&sv->sv_active_rpcq)) {
- rpc = list_entry(sv->sv_active_rpcq.next,
- srpc_server_rpc_t, srpc_list);
- CDEBUG (D_NETERROR,
- "Active RPC on shutdown: sv %s, peer %s, "
- "wi %s scheduled %d running %d, "
- "ev fired %d type %d status %d lnet %d\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer),
- swi_state2str(rpc->srpc_wi.wi_state),
- rpc->srpc_wi.wi_scheduled,
- rpc->srpc_wi.wi_running,
- rpc->srpc_ev.ev_fired,
- rpc->srpc_ev.ev_type,
- rpc->srpc_ev.ev_status,
- rpc->srpc_ev.ev_lnet);
- }
-
- spin_unlock(&sv->sv_lock);
- return 0;
- }
-
- spin_unlock(&sv->sv_lock); /* no lock needed from now on */
-
- for (;;) {
- struct list_head *q;
-
- if (!list_empty(&sv->sv_posted_msgq))
- q = &sv->sv_posted_msgq;
- else if (!list_empty(&sv->sv_blocked_msgq))
- q = &sv->sv_blocked_msgq;
- else
- break;
-
- buf = list_entry(q->next, srpc_buffer_t, buf_list);
- list_del(&buf->buf_list);
-
- LIBCFS_FREE(buf, sizeof(*buf));
- }
-
- while (!list_empty(&sv->sv_free_rpcq)) {
- rpc = list_entry(sv->sv_free_rpcq.next,
- srpc_server_rpc_t, srpc_list);
- list_del(&rpc->srpc_list);
- LIBCFS_FREE(rpc, sizeof(*rpc));
- }
-
- return 1;
-}
-
-/* called with sv->sv_lock held */
-void
-srpc_service_recycle_buffer (srpc_service_t *sv, srpc_buffer_t *buf)
-{
- if (sv->sv_shuttingdown) goto free;
-
- if (sv->sv_nprune == 0) {
- if (srpc_service_post_buffer(sv, buf) != 0)
- CWARN ("Failed to post %s buffer\n", sv->sv_name);
- return;
- }
-
- sv->sv_nprune--;
-free:
- spin_unlock(&sv->sv_lock);
- LIBCFS_FREE(buf, sizeof(*buf));
- spin_lock(&sv->sv_lock);
-}
-
-void
-srpc_shutdown_service (srpc_service_t *sv)
-{
- srpc_server_rpc_t *rpc;
- srpc_buffer_t *buf;
-
- spin_lock(&sv->sv_lock);
-
- CDEBUG (D_NET, "Shutting down service: id %d, name %s\n",
- sv->sv_id, sv->sv_name);
-
- sv->sv_shuttingdown = 1; /* i.e. no new active RPC */
-
- /* schedule in-flight RPCs to notice the shutdown */
- list_for_each_entry (rpc, &sv->sv_active_rpcq, srpc_list) {
- swi_schedule_workitem(&rpc->srpc_wi);
- }
-
- spin_unlock(&sv->sv_lock);
-
- /* OK to traverse sv_posted_msgq without lock, since no one
- * touches sv_posted_msgq now */
- list_for_each_entry (buf, &sv->sv_posted_msgq, buf_list)
- LNetMDUnlink(buf->buf_mdh);
-
- return;
-}
-
-int
-srpc_send_request (srpc_client_rpc_t *rpc)
-{
- srpc_event_t *ev = &rpc->crpc_reqstev;
- int rc;
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_REQUEST_SENT;
-
- rc = srpc_post_active_rqtbuf(rpc->crpc_dest, rpc->crpc_service,
- &rpc->crpc_reqstmsg, sizeof(srpc_msg_t),
- &rpc->crpc_reqstmdh, ev);
- if (rc != 0) {
- LASSERT (rc == -ENOMEM);
- ev->ev_fired = 1; /* no more event expected */
- }
- return rc;
-}
-
-int
-srpc_prepare_reply (srpc_client_rpc_t *rpc)
-{
- srpc_event_t *ev = &rpc->crpc_replyev;
- __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid;
- int rc;
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_REPLY_RCVD;
-
- *id = srpc_next_id();
-
- rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, *id,
- &rpc->crpc_replymsg, sizeof(srpc_msg_t),
- LNET_MD_OP_PUT, rpc->crpc_dest,
- &rpc->crpc_replymdh, ev);
- if (rc != 0) {
- LASSERT (rc == -ENOMEM);
- ev->ev_fired = 1; /* no more event expected */
- }
- return rc;
-}
-
-int
-srpc_prepare_bulk (srpc_client_rpc_t *rpc)
-{
- srpc_bulk_t *bk = &rpc->crpc_bulk;
- srpc_event_t *ev = &rpc->crpc_bulkev;
- __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid;
- int rc;
- int opt;
-
- LASSERT (bk->bk_niov <= LNET_MAX_IOV);
-
- if (bk->bk_niov == 0) return 0; /* nothing to do */
-
- opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET;
-#ifdef __KERNEL__
- opt |= LNET_MD_KIOV;
-#else
- opt |= LNET_MD_IOVEC;
-#endif
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_BULK_REQ_RCVD;
-
- *id = srpc_next_id();
-
- rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, *id,
- &bk->bk_iovs[0], bk->bk_niov, opt,
- rpc->crpc_dest, &bk->bk_mdh, ev);
- if (rc != 0) {
- LASSERT (rc == -ENOMEM);
- ev->ev_fired = 1; /* no more event expected */
- }
- return rc;
-}
-
-int
-srpc_do_bulk (srpc_server_rpc_t *rpc)
-{
- srpc_event_t *ev = &rpc->srpc_ev;
- srpc_bulk_t *bk = rpc->srpc_bulk;
- __u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid;
- int rc;
- int opt;
-
- LASSERT (bk != NULL);
-
- opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT;
-#ifdef __KERNEL__
- opt |= LNET_MD_KIOV;
-#else
- opt |= LNET_MD_IOVEC;
-#endif
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT;
-
- rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id,
- &bk->bk_iovs[0], bk->bk_niov, opt,
- rpc->srpc_peer, rpc->srpc_self,
- &bk->bk_mdh, ev);
- if (rc != 0)
- ev->ev_fired = 1; /* no more event expected */
- return rc;
-}
-
-/* called with srpc_service_t::sv_lock held */
-inline void
-srpc_schedule_server_rpc (srpc_server_rpc_t *rpc)
-{
- srpc_service_t *sv = rpc->srpc_service;
-
- if (sv->sv_id > SRPC_FRAMEWORK_SERVICE_MAX_ID)
- swi_schedule_workitem(&rpc->srpc_wi);
- else /* framework RPCs are handled one by one */
- swi_schedule_serial_workitem(&rpc->srpc_wi);
-
- return;
-}
-
-/* only called from srpc_handle_rpc */
-void
-srpc_server_rpc_done (srpc_server_rpc_t *rpc, int status)
-{
- srpc_service_t *sv = rpc->srpc_service;
- srpc_buffer_t *buffer;
-
- LASSERT (status != 0 || rpc->srpc_wi.wi_state == SWI_STATE_DONE);
-
- rpc->srpc_status = status;
-
- CDEBUG (status == 0 ? D_NET : D_NETERROR,
- "Server RPC done: service %s, peer %s, status %s:%d\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer),
- swi_state2str(rpc->srpc_wi.wi_state), status);
-
- if (status != 0) {
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_dropped++;
- spin_unlock(&srpc_data.rpc_glock);
- }
-
- if (rpc->srpc_done != NULL)
- (*rpc->srpc_done) (rpc);
- LASSERT (rpc->srpc_bulk == NULL);
-
- spin_lock(&sv->sv_lock);
-
- if (rpc->srpc_reqstbuf != NULL) {
- /* NB might drop sv_lock in srpc_service_recycle_buffer, but
- * sv won't go away for sv_active_rpcq must not be empty */
- srpc_service_recycle_buffer(sv, rpc->srpc_reqstbuf);
- rpc->srpc_reqstbuf = NULL;
- }
-
- list_del(&rpc->srpc_list); /* from sv->sv_active_rpcq */
-
- /*
- * No one can schedule me now since:
- * - I'm not on sv_active_rpcq.
- * - all LNet events have been fired.
- * Cancel pending schedules and prevent future schedule attempts:
- */
- LASSERT (rpc->srpc_ev.ev_fired);
- swi_kill_workitem(&rpc->srpc_wi);
-
- if (!sv->sv_shuttingdown && !list_empty(&sv->sv_blocked_msgq)) {
- buffer = list_entry(sv->sv_blocked_msgq.next,
- srpc_buffer_t, buf_list);
- list_del(&buffer->buf_list);
-
- srpc_init_server_rpc(rpc, sv, buffer);
- list_add_tail(&rpc->srpc_list, &sv->sv_active_rpcq);
- srpc_schedule_server_rpc(rpc);
- } else {
- list_add(&rpc->srpc_list, &sv->sv_free_rpcq);
- }
-
- spin_unlock(&sv->sv_lock);
- return;
-}
-
-/* handles an incoming RPC */
-int
-srpc_handle_rpc (swi_workitem_t *wi)
-{
- srpc_server_rpc_t *rpc = wi->wi_data;
- srpc_service_t *sv = rpc->srpc_service;
- srpc_event_t *ev = &rpc->srpc_ev;
- int rc = 0;
-
- LASSERT (wi == &rpc->srpc_wi);
-
- spin_lock(&sv->sv_lock);
-
- if (sv->sv_shuttingdown) {
- spin_unlock(&sv->sv_lock);
-
- if (rpc->srpc_bulk != NULL)
- LNetMDUnlink(rpc->srpc_bulk->bk_mdh);
- LNetMDUnlink(rpc->srpc_replymdh);
-
- if (ev->ev_fired) { /* no more event, OK to finish */
- srpc_server_rpc_done(rpc, -ESHUTDOWN);
- return 1;
- }
- return 0;
- }
-
- spin_unlock(&sv->sv_lock);
-
- switch (wi->wi_state) {
- default:
- LBUG ();
- case SWI_STATE_NEWBORN: {
- srpc_msg_t *msg;
- srpc_generic_reply_t *reply;
-
- msg = &rpc->srpc_reqstbuf->buf_msg;
- reply = &rpc->srpc_replymsg.msg_body.reply;
-
- if (msg->msg_version != SRPC_MSG_VERSION &&
- msg->msg_version != __swab32(SRPC_MSG_VERSION)) {
- CWARN ("Version mismatch: %u, %u expected, from %s\n",
- msg->msg_version, SRPC_MSG_VERSION,
- libcfs_id2str(rpc->srpc_peer));
- reply->status = EPROTO;
- } else {
- reply->status = 0;
- rc = (*sv->sv_handler) (rpc);
- LASSERT (reply->status == 0 || !rpc->srpc_bulk);
- }
-
- if (rc != 0) {
- srpc_server_rpc_done(rpc, rc);
- return 1;
- }
-
- wi->wi_state = SWI_STATE_BULK_STARTED;
-
- if (rpc->srpc_bulk != NULL) {
- rc = srpc_do_bulk(rpc);
- if (rc == 0)
- return 0; /* wait for bulk */
-
- LASSERT (ev->ev_fired);
- ev->ev_status = rc;
- }
- }
- case SWI_STATE_BULK_STARTED:
- LASSERT (rpc->srpc_bulk == NULL || ev->ev_fired);
-
- if (rpc->srpc_bulk != NULL) {
- rc = ev->ev_status;
-
- if (sv->sv_bulk_ready != NULL)
- rc = (*sv->sv_bulk_ready) (rpc, rc);
-
- if (rc != 0) {
- srpc_server_rpc_done(rpc, rc);
- return 1;
- }
- }
-
- wi->wi_state = SWI_STATE_REPLY_SUBMITTED;
- rc = srpc_send_reply(rpc);
- if (rc == 0)
- return 0; /* wait for reply */
- srpc_server_rpc_done(rpc, rc);
- return 1;
-
- case SWI_STATE_REPLY_SUBMITTED:
- LASSERT (ev->ev_fired);
-
- wi->wi_state = SWI_STATE_DONE;
- srpc_server_rpc_done(rpc, ev->ev_status);
- return 1;
- }
-
- return 0;
-}
-
-void
-srpc_client_rpc_expired (void *data)
-{
- srpc_client_rpc_t *rpc = data;
-
- CWARN ("Client RPC expired: service %d, peer %s, timeout %d.\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- rpc->crpc_timeout);
-
- spin_lock(&rpc->crpc_lock);
-
- rpc->crpc_timeout = 0;
- srpc_abort_rpc(rpc, -ETIMEDOUT);
-
- spin_unlock(&rpc->crpc_lock);
-
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_expired++;
- spin_unlock(&srpc_data.rpc_glock);
- return;
-}
-
-inline void
-srpc_add_client_rpc_timer (srpc_client_rpc_t *rpc)
-{
- stt_timer_t *timer = &rpc->crpc_timer;
-
- if (rpc->crpc_timeout == 0) return;
-
- CFS_INIT_LIST_HEAD(&timer->stt_list);
- timer->stt_data = rpc;
- timer->stt_func = srpc_client_rpc_expired;
- timer->stt_expires = cfs_time_add(rpc->crpc_timeout,
- cfs_time_current_sec());
- stt_add_timer(timer);
- return;
-}
-
-/*
- * Called with rpc->crpc_lock held.
- *
- * Upon exit the RPC expiry timer is not queued and the handler is not
- * running on any CPU. */
-void
-srpc_del_client_rpc_timer (srpc_client_rpc_t *rpc)
-{
- /* timer not planted or already exploded */
- if (rpc->crpc_timeout == 0) return;
-
- /* timer sucessfully defused */
- if (stt_del_timer(&rpc->crpc_timer)) return;
-
-#ifdef __KERNEL__
- /* timer detonated, wait for it to explode */
- while (rpc->crpc_timeout != 0) {
- spin_unlock(&rpc->crpc_lock);
-
- cfs_schedule();
-
- spin_lock(&rpc->crpc_lock);
- }
-#else
- LBUG(); /* impossible in single-threaded runtime */
-#endif
- return;
-}
-
-void
-srpc_check_sends (srpc_peer_t *peer, int credits)
-{
- struct list_head *q;
- srpc_client_rpc_t *rpc;
-
- LASSERT (credits >= 0);
- LASSERT (srpc_data.rpc_state == SRPC_STATE_RUNNING);
-
- spin_lock(&peer->stp_lock);
- peer->stp_credits += credits;
-
- while (peer->stp_credits) {
- if (!list_empty(&peer->stp_ctl_rpcq))
- q = &peer->stp_ctl_rpcq;
- else if (!list_empty(&peer->stp_rpcq))
- q = &peer->stp_rpcq;
- else
- break;
-
- peer->stp_credits--;
-
- rpc = list_entry(q->next, srpc_client_rpc_t, crpc_privl);
- list_del_init(&rpc->crpc_privl);
- srpc_client_rpc_decref(rpc); /* --ref for peer->*rpcq */
-
- swi_schedule_workitem(&rpc->crpc_wi);
- }
-
- spin_unlock(&peer->stp_lock);
- return;
-}
-
-void
-srpc_client_rpc_done (srpc_client_rpc_t *rpc, int status)
-{
- swi_workitem_t *wi = &rpc->crpc_wi;
- srpc_peer_t *peer = rpc->crpc_peer;
-
- LASSERT (status != 0 || wi->wi_state == SWI_STATE_DONE);
-
- spin_lock(&rpc->crpc_lock);
-
- rpc->crpc_closed = 1;
- if (rpc->crpc_status == 0)
- rpc->crpc_status = status;
-
- srpc_del_client_rpc_timer(rpc);
-
- CDEBUG ((status == 0) ? D_NET : D_NETERROR,
- "Client RPC done: service %d, peer %s, status %s:%d:%d\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- swi_state2str(wi->wi_state), rpc->crpc_aborted, status);
-
- /*
- * No one can schedule me now since:
- * - RPC timer has been defused.
- * - all LNet events have been fired.
- * - crpc_closed has been set, preventing srpc_abort_rpc from
- * scheduling me.
- * Cancel pending schedules and prevent future schedule attempts:
- */
- LASSERT (!srpc_event_pending(rpc));
- swi_kill_workitem(wi);
-
- spin_unlock(&rpc->crpc_lock);
-
- (*rpc->crpc_done) (rpc);
-
- if (peer != NULL)
- srpc_check_sends(peer, 1);
- return;
-}
-
-/* sends an outgoing RPC */
-int
-srpc_send_rpc (swi_workitem_t *wi)
-{
- int rc = 0;
- srpc_client_rpc_t *rpc = wi->wi_data;
- srpc_msg_t *reply = &rpc->crpc_replymsg;
- int do_bulk = rpc->crpc_bulk.bk_niov > 0;
-
- LASSERT (rpc != NULL);
- LASSERT (wi == &rpc->crpc_wi);
-
- spin_lock(&rpc->crpc_lock);
-
- if (rpc->crpc_aborted) {
- spin_unlock(&rpc->crpc_lock);
- goto abort;
- }
-
- spin_unlock(&rpc->crpc_lock);
-
- switch (wi->wi_state) {
- default:
- LBUG ();
- case SWI_STATE_NEWBORN:
- LASSERT (!srpc_event_pending(rpc));
-
- rc = srpc_prepare_reply(rpc);
- if (rc != 0) {
- srpc_client_rpc_done(rpc, rc);
- return 1;
- }
-
- rc = srpc_prepare_bulk(rpc);
- if (rc != 0) break;
-
- wi->wi_state = SWI_STATE_REQUEST_SUBMITTED;
- rc = srpc_send_request(rpc);
- break;
-
- case SWI_STATE_REQUEST_SUBMITTED:
- /* CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any
- * order; however, they're processed in a strict order:
- * rqt, rpy, and bulk. */
- if (!rpc->crpc_reqstev.ev_fired) break;
-
- rc = rpc->crpc_reqstev.ev_status;
- if (rc != 0) break;
-
- wi->wi_state = SWI_STATE_REQUEST_SENT;
- /* perhaps more events, fall thru */
- case SWI_STATE_REQUEST_SENT: {
- srpc_msg_type_t type = srpc_service2reply(rpc->crpc_service);
-
- if (!rpc->crpc_replyev.ev_fired) break;
-
- rc = rpc->crpc_replyev.ev_status;
- if (rc != 0) break;
-
- if ((reply->msg_type != type &&
- reply->msg_type != __swab32(type)) ||
- (reply->msg_magic != SRPC_MSG_MAGIC &&
- reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
- CWARN ("Bad message from %s: type %u (%d expected),"
- " magic %u (%d expected).\n",
- libcfs_id2str(rpc->crpc_dest),
- reply->msg_type, type,
- reply->msg_magic, SRPC_MSG_MAGIC);
- rc = -EBADMSG;
- break;
- }
-
- if (do_bulk && reply->msg_body.reply.status != 0) {
- CWARN ("Remote error %d at %s, unlink bulk buffer in "
- "case peer didn't initiate bulk transfer\n",
- reply->msg_body.reply.status,
- libcfs_id2str(rpc->crpc_dest));
- LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
- }
-
- wi->wi_state = SWI_STATE_REPLY_RECEIVED;
- }
- case SWI_STATE_REPLY_RECEIVED:
- if (do_bulk && !rpc->crpc_bulkev.ev_fired) break;
-
- rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0;
-
- /* Bulk buffer was unlinked due to remote error. Clear error
- * since reply buffer still contains valid data.
- * NB rpc->crpc_done shouldn't look into bulk data in case of
- * remote error. */
- if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK &&
- rpc->crpc_status == 0 && reply->msg_body.reply.status != 0)
- rc = 0;
-
- wi->wi_state = SWI_STATE_DONE;
- srpc_client_rpc_done(rpc, rc);
- return 1;
- }
-
- if (rc != 0) {
- spin_lock(&rpc->crpc_lock);
- srpc_abort_rpc(rpc, rc);
- spin_unlock(&rpc->crpc_lock);
- }
-
-abort:
- if (rpc->crpc_aborted) {
- LNetMDUnlink(rpc->crpc_reqstmdh);
- LNetMDUnlink(rpc->crpc_replymdh);
- LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
-
- if (!srpc_event_pending(rpc)) {
- srpc_client_rpc_done(rpc, -EINTR);
- return 1;
- }
- }
- return 0;
-}
-
-srpc_client_rpc_t *
-srpc_create_client_rpc (lnet_process_id_t peer, int service,
- int nbulkiov, int bulklen,
- void (*rpc_done)(srpc_client_rpc_t *),
- void (*rpc_fini)(srpc_client_rpc_t *), void *priv)
-{
- srpc_client_rpc_t *rpc;
-
- LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t,
- crpc_bulk.bk_iovs[nbulkiov]));
- if (rpc == NULL)
- return NULL;
-
- srpc_init_client_rpc(rpc, peer, service, nbulkiov,
- bulklen, rpc_done, rpc_fini, priv);
- return rpc;
-}
-
-/* called with rpc->crpc_lock held */
-static inline void
-srpc_queue_rpc (srpc_peer_t *peer, srpc_client_rpc_t *rpc)
-{
- int service = rpc->crpc_service;
-
- LASSERT (peer->stp_nid == rpc->crpc_dest.nid);
- LASSERT (srpc_data.rpc_state == SRPC_STATE_RUNNING);
-
- rpc->crpc_peer = peer;
-
- spin_lock(&peer->stp_lock);
-
- /* Framework RPCs that alter session state shall take precedence
- * over test RPCs and framework query RPCs */
- if (service <= SRPC_FRAMEWORK_SERVICE_MAX_ID &&
- service != SRPC_SERVICE_DEBUG &&
- service != SRPC_SERVICE_QUERY_STAT)
- list_add_tail(&rpc->crpc_privl, &peer->stp_ctl_rpcq);
- else
- list_add_tail(&rpc->crpc_privl, &peer->stp_rpcq);
-
- srpc_client_rpc_addref(rpc); /* ++ref for peer->*rpcq */
- spin_unlock(&peer->stp_lock);
- return;
-}
-
-/* called with rpc->crpc_lock held */
-void
-srpc_abort_rpc (srpc_client_rpc_t *rpc, int why)
-{
- srpc_peer_t *peer = rpc->crpc_peer;
-
- LASSERT (why != 0);
-
- if (rpc->crpc_aborted || /* already aborted */
- rpc->crpc_closed) /* callback imminent */
- return;
-
- CDEBUG (D_NET,
- "Aborting RPC: service %d, peer %s, state %s, why %d\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- swi_state2str(rpc->crpc_wi.wi_state), why);
-
- rpc->crpc_aborted = 1;
- rpc->crpc_status = why;
-
- if (peer != NULL) {
- spin_lock(&peer->stp_lock);
-
- if (!list_empty(&rpc->crpc_privl)) { /* still queued */
- list_del_init(&rpc->crpc_privl);
- srpc_client_rpc_decref(rpc); /* --ref for peer->*rpcq */
- rpc->crpc_peer = NULL; /* no credit taken */
- }
-
- spin_unlock(&peer->stp_lock);
- }
-
- swi_schedule_workitem(&rpc->crpc_wi);
- return;
-}
-
-/* called with rpc->crpc_lock held */
-void
-srpc_post_rpc (srpc_client_rpc_t *rpc)
-{
- srpc_peer_t *peer;
-
- LASSERT (!rpc->crpc_aborted);
- LASSERT (rpc->crpc_peer == NULL);
- LASSERT (srpc_data.rpc_state == SRPC_STATE_RUNNING);
- LASSERT ((rpc->crpc_bulk.bk_len & ~CFS_PAGE_MASK) == 0);
-
- CDEBUG (D_NET, "Posting RPC: peer %s, service %d, timeout %d\n",
- libcfs_id2str(rpc->crpc_dest), rpc->crpc_service,
- rpc->crpc_timeout);
-
- srpc_add_client_rpc_timer(rpc);
-
- peer = srpc_nid2peer(rpc->crpc_dest.nid);
- if (peer == NULL) {
- srpc_abort_rpc(rpc, -ENOMEM);
- return;
- }
-
- srpc_queue_rpc(peer, rpc);
-
- spin_unlock(&rpc->crpc_lock);
- srpc_check_sends(peer, 0);
- spin_lock(&rpc->crpc_lock);
- return;
-}
-
-
-int
-srpc_send_reply (srpc_server_rpc_t *rpc)
-{
- srpc_event_t *ev = &rpc->srpc_ev;
- srpc_msg_t *msg = &rpc->srpc_replymsg;
- srpc_buffer_t *buffer = rpc->srpc_reqstbuf;
- srpc_service_t *sv = rpc->srpc_service;
- __u64 rpyid;
- int rc;
-
- LASSERT (buffer != NULL);
- rpyid = buffer->buf_msg.msg_body.reqst.rpyid;
-
- spin_lock(&sv->sv_lock);
-
- if (!sv->sv_shuttingdown &&
- sv->sv_id > SRPC_FRAMEWORK_SERVICE_MAX_ID) {
- /* Repost buffer before replying since test client
- * might send me another RPC once it gets the reply */
- if (srpc_service_post_buffer(sv, buffer) != 0)
- CWARN ("Failed to repost %s buffer\n", sv->sv_name);
- rpc->srpc_reqstbuf = NULL;
- }
-
- spin_unlock(&sv->sv_lock);
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_REPLY_SENT;
-
- msg->msg_magic = SRPC_MSG_MAGIC;
- msg->msg_version = SRPC_MSG_VERSION;
- msg->msg_type = srpc_service2reply(sv->sv_id);
-
- rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg,
- sizeof(*msg), LNET_MD_OP_PUT,
- rpc->srpc_peer, rpc->srpc_self,
- &rpc->srpc_replymdh, ev);
- if (rc != 0)
- ev->ev_fired = 1; /* no more event expected */
- return rc;
-}
-
-/* when in kernel always called with LNET_LOCK() held, and in thread context */
-void
-srpc_lnet_ev_handler (lnet_event_t *ev)
-{
- srpc_event_t *rpcev = ev->md.user_ptr;
- srpc_client_rpc_t *crpc;
- srpc_server_rpc_t *srpc;
- srpc_buffer_t *buffer;
- srpc_service_t *sv;
- srpc_msg_t *msg;
- srpc_msg_type_t type;
-
- LASSERT (!in_interrupt());
-
- if (ev->status != 0) {
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.errors++;
- spin_unlock(&srpc_data.rpc_glock);
- }
-
- rpcev->ev_lnet = ev->type;
-
- switch (rpcev->ev_type) {
- default:
- LBUG ();
- case SRPC_REQUEST_SENT:
- if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) {
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_sent++;
- spin_unlock(&srpc_data.rpc_glock);
- }
- case SRPC_REPLY_RCVD:
- case SRPC_BULK_REQ_RCVD:
- crpc = rpcev->ev_data;
-
- LASSERT (rpcev == &crpc->crpc_reqstev ||
- rpcev == &crpc->crpc_replyev ||
- rpcev == &crpc->crpc_bulkev);
-
- spin_lock(&crpc->crpc_lock);
-
- LASSERT (rpcev->ev_fired == 0);
- rpcev->ev_fired = 1;
- rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
- -EINTR : ev->status;
- swi_schedule_workitem(&crpc->crpc_wi);
-
- spin_unlock(&crpc->crpc_lock);
- break;
-
- case SRPC_REQUEST_RCVD:
- sv = rpcev->ev_data;
-
- LASSERT (rpcev == &sv->sv_ev);
-
- spin_lock(&sv->sv_lock);
-
- LASSERT (ev->unlinked);
- LASSERT (ev->type == LNET_EVENT_PUT ||
- ev->type == LNET_EVENT_UNLINK);
- LASSERT (ev->type != LNET_EVENT_UNLINK ||
- sv->sv_shuttingdown);
-
- buffer = container_of(ev->md.start, srpc_buffer_t, buf_msg);
- buffer->buf_peer = ev->initiator;
- buffer->buf_self = ev->target.nid;
-
- sv->sv_nposted_msg--;
- LASSERT (sv->sv_nposted_msg >= 0);
-
- if (sv->sv_shuttingdown) {
- /* Leave buffer on sv->sv_posted_msgq since
- * srpc_finish_service needs to traverse it. */
- spin_unlock(&sv->sv_lock);
- break;
- }
-
- list_del(&buffer->buf_list); /* from sv->sv_posted_msgq */
- msg = &buffer->buf_msg;
- type = srpc_service2request(sv->sv_id);
-
- if (ev->status != 0 || ev->mlength != sizeof(*msg) ||
- (msg->msg_type != type &&
- msg->msg_type != __swab32(type)) ||
- (msg->msg_magic != SRPC_MSG_MAGIC &&
- msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
- CERROR ("Dropping RPC (%s) from %s: "
- "status %d mlength %d type %u magic %u.\n",
- sv->sv_name, libcfs_id2str(ev->initiator),
- ev->status, ev->mlength,
- msg->msg_type, msg->msg_magic);
-
- /* NB might drop sv_lock in srpc_service_recycle_buffer,
- * sv_nposted_msg++ as an implicit reference to prevent
- * sv from disappearing under me */
- sv->sv_nposted_msg++;
- srpc_service_recycle_buffer(sv, buffer);
- sv->sv_nposted_msg--;
- spin_unlock(&sv->sv_lock);
-
- if (ev->status == 0) { /* status!=0 counted already */
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.errors++;
- spin_unlock(&srpc_data.rpc_glock);
- }
- break;
- }
-
- if (!list_empty(&sv->sv_free_rpcq)) {
- srpc = list_entry(sv->sv_free_rpcq.next,
- srpc_server_rpc_t, srpc_list);
- list_del(&srpc->srpc_list);
-
- srpc_init_server_rpc(srpc, sv, buffer);
- list_add_tail(&srpc->srpc_list, &sv->sv_active_rpcq);
- srpc_schedule_server_rpc(srpc);
- } else {
- list_add_tail(&buffer->buf_list, &sv->sv_blocked_msgq);
- }
-
- spin_unlock(&sv->sv_lock);
-
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_rcvd++;
- spin_unlock(&srpc_data.rpc_glock);
- break;
-
- case SRPC_BULK_GET_RPLD:
- LASSERT (ev->type == LNET_EVENT_SEND ||
- ev->type == LNET_EVENT_REPLY ||
- ev->type == LNET_EVENT_UNLINK);
-
- if (ev->type == LNET_EVENT_SEND &&
- ev->status == 0 && !ev->unlinked)
- break; /* wait for the final LNET_EVENT_REPLY */
-
- case SRPC_BULK_PUT_SENT:
- if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) {
- spin_lock(&srpc_data.rpc_glock);
-
- if (rpcev->ev_type == SRPC_BULK_GET_RPLD)
- srpc_data.rpc_counters.bulk_get += ev->mlength;
- else
- srpc_data.rpc_counters.bulk_put += ev->mlength;
-
- spin_unlock(&srpc_data.rpc_glock);
- }
- case SRPC_REPLY_SENT:
- srpc = rpcev->ev_data;
- sv = srpc->srpc_service;
-
- LASSERT (rpcev == &srpc->srpc_ev);
-
- spin_lock(&sv->sv_lock);
- rpcev->ev_fired = 1;
- rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
- -EINTR : ev->status;
- srpc_schedule_server_rpc(srpc);
- spin_unlock(&sv->sv_lock);
- break;
- }
-
- return;
-}
-
-#ifndef __KERNEL__
-
-int
-srpc_check_event (int timeout)
-{
- lnet_event_t ev;
- int rc;
- int i;
-
- rc = LNetEQPoll(&srpc_data.rpc_lnet_eq, 1,
- timeout * 1000, &ev, &i);
- if (rc == 0) return 0;
-
- LASSERT (rc == -EOVERFLOW || rc == 1);
-
- /* We can't affort to miss any events... */
- if (rc == -EOVERFLOW) {
- CERROR ("Dropped an event!!!\n");
- abort();
- }
-
- srpc_lnet_ev_handler(&ev);
- return 1;
-}
-
-#endif
-
-int
-srpc_startup (void)
-{
- int i;
- int rc;
-
-#ifndef __KERNEL__
- char *s;
-
- s = getenv("SRPC_PEER_CREDITS");
- srpc_peer_credits = (s != NULL) ? atoi(s) : srpc_peer_credits;
-#endif
-
- if (srpc_peer_credits <= 0) {
- CERROR("Peer credits must be positive: %d\n", srpc_peer_credits);
- return -EINVAL;
- }
-
- memset(&srpc_data, 0, sizeof(struct smoketest_rpc));
- spin_lock_init(&srpc_data.rpc_glock);
-
- /* 1 second pause to avoid timestamp reuse */
- cfs_pause(cfs_time_seconds(1));
- srpc_data.rpc_matchbits = ((__u64) cfs_time_current_sec()) << 48;
-
- srpc_data.rpc_state = SRPC_STATE_NONE;
-
- LIBCFS_ALLOC(srpc_data.rpc_peers,
- sizeof(struct list_head) * SRPC_PEER_HASH_SIZE);
- if (srpc_data.rpc_peers == NULL) {
- CERROR ("Failed to alloc peer hash.\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < SRPC_PEER_HASH_SIZE; i++)
- CFS_INIT_LIST_HEAD(&srpc_data.rpc_peers[i]);
-
-#ifdef __KERNEL__
- rc = LNetNIInit(LUSTRE_SRV_LNET_PID);
-#else
- if (the_lnet.ln_server_mode_flag)
- rc = LNetNIInit(LUSTRE_SRV_LNET_PID);
- else
- rc = LNetNIInit(getpid() | LNET_PID_USERFLAG);
-#endif
- if (rc < 0) {
- CERROR ("LNetNIInit() has failed: %d\n", rc);
- LIBCFS_FREE(srpc_data.rpc_peers,
- sizeof(struct list_head) * SRPC_PEER_HASH_SIZE);
- return rc;
- }
-
- srpc_data.rpc_state = SRPC_STATE_NI_INIT;
-
- srpc_data.rpc_lnet_eq = LNET_EQ_NONE;
-#ifdef __KERNEL__
- rc = LNetEQAlloc(16, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq);
-#else
- rc = LNetEQAlloc(10240, LNET_EQ_HANDLER_NONE, &srpc_data.rpc_lnet_eq);
-#endif
- if (rc != 0) {
- CERROR("LNetEQAlloc() has failed: %d\n", rc);
- goto bail;
- }
-
- rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
- LASSERT (rc == 0);
-
- srpc_data.rpc_state = SRPC_STATE_EQ_INIT;
-
- rc = swi_startup();
- if (rc != 0)
- goto bail;
-
- srpc_data.rpc_state = SRPC_STATE_WI_INIT;
-
- rc = stt_startup();
-
-bail:
- if (rc != 0)
- srpc_shutdown();
- else
- srpc_data.rpc_state = SRPC_STATE_RUNNING;
-
- return rc;
-}
-
-void
-srpc_shutdown (void)
-{
- int i;
- int rc;
- int state;
-
- state = srpc_data.rpc_state;
- srpc_data.rpc_state = SRPC_STATE_STOPPING;
-
- switch (state) {
- default:
- LBUG ();
- case SRPC_STATE_RUNNING:
- spin_lock(&srpc_data.rpc_glock);
-
- for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) {
- srpc_service_t *sv = srpc_data.rpc_services[i];
-
- LASSERTF (sv == NULL,
- "service not empty: id %d, name %s\n",
- i, sv->sv_name);
- }
-
- spin_unlock(&srpc_data.rpc_glock);
-
- stt_shutdown();
-
- case SRPC_STATE_WI_INIT:
- swi_shutdown();
-
- case SRPC_STATE_EQ_INIT:
- rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
- LASSERT (rc == 0);
- rc = LNetEQFree(srpc_data.rpc_lnet_eq);
- LASSERT (rc == 0); /* the EQ should have no user by now */
-
- case SRPC_STATE_NI_INIT:
- LNetNIFini();
- break;
- }
-
- /* srpc_peer_t's are kept in hash until shutdown */
- for (i = 0; i < SRPC_PEER_HASH_SIZE; i++) {
- srpc_peer_t *peer;
-
- while (!list_empty(&srpc_data.rpc_peers[i])) {
- peer = list_entry(srpc_data.rpc_peers[i].next,
- srpc_peer_t, stp_list);
- list_del(&peer->stp_list);
-
- LASSERT (list_empty(&peer->stp_rpcq));
- LASSERT (list_empty(&peer->stp_ctl_rpcq));
- LASSERT (peer->stp_credits == srpc_peer_credits);
-
- LIBCFS_FREE(peer, sizeof(srpc_peer_t));
- }
- }
-
- LIBCFS_FREE(srpc_data.rpc_peers,
- sizeof(struct list_head) * SRPC_PEER_HASH_SIZE);
- return;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef __SELFTEST_RPC_H__
-#define __SELFTEST_RPC_H__
-
-#include <lnet/lnetst.h>
-
-/*
- * LST wired structures
- *
- * XXX: *REPLY == *REQST + 1
- */
-typedef enum {
- SRPC_MSG_MKSN_REQST = 0,
- SRPC_MSG_MKSN_REPLY = 1,
- SRPC_MSG_RMSN_REQST = 2,
- SRPC_MSG_RMSN_REPLY = 3,
- SRPC_MSG_BATCH_REQST = 4,
- SRPC_MSG_BATCH_REPLY = 5,
- SRPC_MSG_STAT_REQST = 6,
- SRPC_MSG_STAT_REPLY = 7,
- SRPC_MSG_TEST_REQST = 8,
- SRPC_MSG_TEST_REPLY = 9,
- SRPC_MSG_DEBUG_REQST = 10,
- SRPC_MSG_DEBUG_REPLY = 11,
- SRPC_MSG_BRW_REQST = 12,
- SRPC_MSG_BRW_REPLY = 13,
- SRPC_MSG_PING_REQST = 14,
- SRPC_MSG_PING_REPLY = 15,
- SRPC_MSG_JOIN_REQST = 16,
- SRPC_MSG_JOIN_REPLY = 17,
-} srpc_msg_type_t;
-
-/* CAVEAT EMPTOR:
- * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer,
- * and 2nd field matchbits of bulk buffer if any.
- *
- * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field
- * session id if needed.
- */
-typedef struct {
- __u64 rpyid; /* reply buffer matchbits */
- __u64 bulkid; /* bulk buffer matchbits */
-} WIRE_ATTR srpc_generic_reqst_t;
-
-typedef struct {
- __u32 status;
- lst_sid_t sid;
-} WIRE_ATTR srpc_generic_reply_t;
-
-/* FRAMEWORK RPCs */
-typedef struct {
- __u64 mksn_rpyid; /* reply buffer matchbits */
- lst_sid_t mksn_sid; /* session id */
- __u32 mksn_force; /* use brute force */
- char mksn_name[LST_NAME_SIZE];
-} WIRE_ATTR srpc_mksn_reqst_t; /* make session request */
-
-typedef struct {
- __u32 mksn_status; /* session status */
- lst_sid_t mksn_sid; /* session id */
- __u32 mksn_timeout; /* session timeout */
- char mksn_name[LST_NAME_SIZE];
-} WIRE_ATTR srpc_mksn_reply_t; /* make session reply */
-
-typedef struct {
- __u64 rmsn_rpyid; /* reply buffer matchbits */
- lst_sid_t rmsn_sid; /* session id */
-} WIRE_ATTR srpc_rmsn_reqst_t; /* remove session request */
-
-typedef struct {
- __u32 rmsn_status;
- lst_sid_t rmsn_sid; /* session id */
-} WIRE_ATTR srpc_rmsn_reply_t; /* remove session reply */
-
-typedef struct {
- __u64 join_rpyid; /* reply buffer matchbits */
- lst_sid_t join_sid; /* session id to join */
- char join_group[LST_NAME_SIZE]; /* group name */
-} WIRE_ATTR srpc_join_reqst_t;
-
-typedef struct {
- __u32 join_status; /* returned status */
- lst_sid_t join_sid; /* session id */
- __u32 join_timeout; /* # seconds' inactivity to expire */
- char join_session[LST_NAME_SIZE]; /* session name */
-} WIRE_ATTR srpc_join_reply_t;
-
-typedef struct {
- __u64 dbg_rpyid; /* reply buffer matchbits */
- lst_sid_t dbg_sid; /* session id */
- __u32 dbg_flags; /* bitmap of debug */
-} WIRE_ATTR srpc_debug_reqst_t;
-
-typedef struct {
- __u32 dbg_status; /* returned code */
- lst_sid_t dbg_sid; /* session id */
- __u32 dbg_timeout; /* session timeout */
- __u32 dbg_nbatch; /* # of batches in the node */
- char dbg_name[LST_NAME_SIZE]; /* session name */
-} WIRE_ATTR srpc_debug_reply_t;
-
-#define SRPC_BATCH_OPC_RUN 1
-#define SRPC_BATCH_OPC_STOP 2
-#define SRPC_BATCH_OPC_QUERY 3
-
-typedef struct {
- __u64 bar_rpyid; /* reply buffer matchbits */
- lst_sid_t bar_sid; /* session id */
- lst_bid_t bar_bid; /* batch id */
- __u32 bar_opc; /* create/start/stop batch */
- __u32 bar_testidx; /* index of test */
- __u32 bar_arg; /* parameters */
-} WIRE_ATTR srpc_batch_reqst_t;
-
-typedef struct {
- __u32 bar_status; /* status of request */
- lst_sid_t bar_sid; /* session id */
- __u32 bar_active; /* # of active tests in batch/test */
- __u32 bar_time; /* remained time */
-} WIRE_ATTR srpc_batch_reply_t;
-
-typedef struct {
- __u64 str_rpyid; /* reply buffer matchbits */
- lst_sid_t str_sid; /* session id */
- __u32 str_type; /* type of stat */
-} WIRE_ATTR srpc_stat_reqst_t;
-
-typedef struct {
- __u32 str_status;
- lst_sid_t str_sid;
- sfw_counters_t str_fw;
- srpc_counters_t str_rpc;
- lnet_counters_t str_lnet;
-} WIRE_ATTR srpc_stat_reply_t;
-
-typedef struct {
- __u32 blk_opc; /* bulk operation code */
- __u32 blk_npg; /* # of pages */
- __u32 blk_flags; /* reserved flags */
-} WIRE_ATTR test_bulk_req_t;
-
-typedef struct {
- __u32 png_size; /* size of ping message */
- __u32 png_flags; /* reserved flags */
-} WIRE_ATTR test_ping_req_t;
-
-typedef struct {
- __u64 tsr_rpyid; /* reply buffer matchbits */
- __u64 tsr_bulkid; /* bulk buffer matchbits */
- lst_sid_t tsr_sid; /* session id */
- lst_bid_t tsr_bid; /* batch id */
- __u32 tsr_service; /* test type: bulk|ping|... */
- /* test client loop count or # server buffers needed */
- __u32 tsr_loop;
- __u32 tsr_concur; /* concurrency of test */
- __u8 tsr_is_client; /* is test client or not */
- __u8 tsr_stop_onerr; /* stop on error */
- __u32 tsr_ndest; /* # of dest nodes */
-
- union {
- test_bulk_req_t bulk;
- test_ping_req_t ping;
- } tsr_u;
-} WIRE_ATTR srpc_test_reqst_t;
-
-typedef struct {
- __u32 tsr_status; /* returned code */
- lst_sid_t tsr_sid;
-} WIRE_ATTR srpc_test_reply_t;
-
-/* TEST RPCs */
-typedef struct {
- __u64 pnr_rpyid;
- __u32 pnr_magic;
- __u32 pnr_seq;
- __u64 pnr_time_sec;
- __u64 pnr_time_usec;
-} WIRE_ATTR srpc_ping_reqst_t;
-
-typedef struct {
- __u32 pnr_status;
- __u32 pnr_magic;
- __u32 pnr_seq;
-} WIRE_ATTR srpc_ping_reply_t;
-
-typedef struct {
- __u64 brw_rpyid; /* reply buffer matchbits */
- __u64 brw_bulkid; /* bulk buffer matchbits */
- __u32 brw_rw; /* read or write */
- __u32 brw_len; /* bulk data len */
- __u32 brw_flags; /* bulk data patterns */
-} WIRE_ATTR srpc_brw_reqst_t; /* bulk r/w request */
-
-typedef struct {
- __u32 brw_status;
-} WIRE_ATTR srpc_brw_reply_t; /* bulk r/w reply */
-
-#define SRPC_MSG_MAGIC 0xeeb0f00d
-#define SRPC_MSG_VERSION 1
-typedef struct {
- __u32 msg_magic; /* magic */
- __u32 msg_version; /* # version */
- __u32 msg_type; /* what's in msg_body? srpc_msg_type_t */
- __u32 msg_reserved0; /* reserved seats */
- __u32 msg_reserved1;
- __u32 msg_reserved2;
- union {
- srpc_generic_reqst_t reqst;
- srpc_generic_reply_t reply;
-
- srpc_mksn_reqst_t mksn_reqst;
- srpc_mksn_reply_t mksn_reply;
- srpc_rmsn_reqst_t rmsn_reqst;
- srpc_rmsn_reply_t rmsn_reply;
- srpc_debug_reqst_t dbg_reqst;
- srpc_debug_reply_t dbg_reply;
- srpc_batch_reqst_t bat_reqst;
- srpc_batch_reply_t bat_reply;
- srpc_stat_reqst_t stat_reqst;
- srpc_stat_reply_t stat_reply;
- srpc_test_reqst_t tes_reqst;
- srpc_test_reply_t tes_reply;
- srpc_join_reqst_t join_reqst;
- srpc_join_reply_t join_reply;
-
- srpc_ping_reqst_t ping_reqst;
- srpc_ping_reply_t ping_reply;
- srpc_brw_reqst_t brw_reqst;
- srpc_brw_reply_t brw_reply;
- } msg_body;
-} WIRE_ATTR srpc_msg_t;
-
-#endif /* __SELFTEST_RPC_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Isaac Huang <isaac@clusterfs.com>
- *
- */
-#ifndef __SELFTEST_SELFTEST_H__
-#define __SELFTEST_SELFTEST_H__
-
-#define LNET_ONLY
-
-#ifndef __KERNEL__
-
-/* XXX workaround XXX */
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-/* TODO: remove these when libcfs provides proper primitives for userspace
- *
- * Dummy implementations of spinlock_t and atomic_t work since userspace
- * selftest is completely single-threaded, even using multi-threaded usocklnd.
- */
-typedef struct { } spinlock_t;
-static inline void spin_lock(spinlock_t *l) {return;}
-static inline void spin_unlock(spinlock_t *l) {return;}
-static inline void spin_lock_init(spinlock_t *l) {return;}
-
-typedef struct { volatile int counter; } atomic_t;
-#define atomic_read(a) ((a)->counter)
-#define atomic_set(a,b) do {(a)->counter = b; } while (0)
-#define atomic_dec_and_test(a) ((--((a)->counter)) == 0)
-#define atomic_inc(a) (((a)->counter)++)
-#define atomic_dec(a) do { (a)->counter--; } while (0)
-
-#endif
-
-#include <libcfs/kp30.h>
-#include <libcfs/libcfs.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-#include <lnet/lib-types.h>
-#include <lnet/lnetst.h>
-
-#include "rpc.h"
-#include "timer.h"
-
-#ifndef MADE_WITHOUT_COMPROMISE
-#define MADE_WITHOUT_COMPROMISE
-#endif
-
-
-#define SWI_STATE_NEWBORN 0
-#define SWI_STATE_REPLY_SUBMITTED 1
-#define SWI_STATE_REPLY_SENT 2
-#define SWI_STATE_REQUEST_SUBMITTED 3
-#define SWI_STATE_REQUEST_SENT 4
-#define SWI_STATE_REPLY_RECEIVED 5
-#define SWI_STATE_BULK_STARTED 6
-#define SWI_STATE_DONE 10
-
-/* forward refs */
-struct swi_workitem;
-struct srpc_service;
-struct sfw_test_unit;
-struct sfw_test_instance;
-
-/*
- * A workitems is deferred work with these semantics:
- * - a workitem always runs in thread context.
- * - a workitem can be concurrent with other workitems but is strictly
- * serialized with respect to itself.
- * - no CPU affinity, a workitem does not necessarily run on the same CPU
- * that schedules it. However, this might change in the future.
- * - if a workitem is scheduled again before it has a chance to run, it
- * runs only once.
- * - if a workitem is scheduled while it runs, it runs again after it
- * completes; this ensures that events occurring while other events are
- * being processed receive due attention. This behavior also allows a
- * workitem to reschedule itself.
- *
- * Usage notes:
- * - a workitem can sleep but it should be aware of how that sleep might
- * affect others.
- * - a workitem runs inside a kernel thread so there's no user space to access.
- * - do not use a workitem if the scheduling latency can't be tolerated.
- *
- * When wi_action returns non-zero, it means the workitem has either been
- * freed or reused and workitem scheduler won't touch it any more.
- */
-typedef int (*swi_action_t) (struct swi_workitem *);
-typedef struct swi_workitem {
- struct list_head wi_list; /* chain on runq */
- int wi_state;
- swi_action_t wi_action;
- void *wi_data;
- unsigned int wi_running:1;
- unsigned int wi_scheduled:1;
-} swi_workitem_t;
-
-static inline void
-swi_init_workitem (swi_workitem_t *wi, void *data, swi_action_t action)
-{
- CFS_INIT_LIST_HEAD(&wi->wi_list);
-
- wi->wi_running = 0;
- wi->wi_scheduled = 0;
- wi->wi_data = data;
- wi->wi_action = action;
- wi->wi_state = SWI_STATE_NEWBORN;
-}
-
-#define SWI_RESCHED 128 /* # workitem scheduler loops before reschedule */
-
-/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework
- * services, e.g. create/modify session.
- */
-#define SRPC_SERVICE_DEBUG 0
-#define SRPC_SERVICE_MAKE_SESSION 1
-#define SRPC_SERVICE_REMOVE_SESSION 2
-#define SRPC_SERVICE_BATCH 3
-#define SRPC_SERVICE_TEST 4
-#define SRPC_SERVICE_QUERY_STAT 5
-#define SRPC_SERVICE_JOIN 6
-#define SRPC_FRAMEWORK_SERVICE_MAX_ID 10
-/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */
-#define SRPC_SERVICE_BRW 11
-#define SRPC_SERVICE_PING 12
-#define SRPC_SERVICE_MAX_ID 12
-
-#define SRPC_REQUEST_PORTAL 50
-/* a lazy portal for framework RPC requests */
-#define SRPC_FRAMEWORK_REQUEST_PORTAL 51
-/* all reply/bulk RDMAs go to this portal */
-#define SRPC_RDMA_PORTAL 52
-
-static inline srpc_msg_type_t
-srpc_service2request (int service)
-{
- switch (service) {
- default:
- LBUG ();
- case SRPC_SERVICE_DEBUG:
- return SRPC_MSG_DEBUG_REQST;
-
- case SRPC_SERVICE_MAKE_SESSION:
- return SRPC_MSG_MKSN_REQST;
-
- case SRPC_SERVICE_REMOVE_SESSION:
- return SRPC_MSG_RMSN_REQST;
-
- case SRPC_SERVICE_BATCH:
- return SRPC_MSG_BATCH_REQST;
-
- case SRPC_SERVICE_TEST:
- return SRPC_MSG_TEST_REQST;
-
- case SRPC_SERVICE_QUERY_STAT:
- return SRPC_MSG_STAT_REQST;
-
- case SRPC_SERVICE_BRW:
- return SRPC_MSG_BRW_REQST;
-
- case SRPC_SERVICE_PING:
- return SRPC_MSG_PING_REQST;
-
- case SRPC_SERVICE_JOIN:
- return SRPC_MSG_JOIN_REQST;
- }
-}
-
-static inline srpc_msg_type_t
-srpc_service2reply (int service)
-{
- return srpc_service2request(service) + 1;
-}
-
-typedef enum {
- SRPC_BULK_REQ_RCVD = 0, /* passive bulk request(PUT sink/GET source) received */
- SRPC_BULK_PUT_SENT = 1, /* active bulk PUT sent (source) */
- SRPC_BULK_GET_RPLD = 2, /* active bulk GET replied (sink) */
- SRPC_REPLY_RCVD = 3, /* incoming reply received */
- SRPC_REPLY_SENT = 4, /* outgoing reply sent */
- SRPC_REQUEST_RCVD = 5, /* incoming request received */
- SRPC_REQUEST_SENT = 6, /* outgoing request sent */
-} srpc_event_type_t;
-
-/* RPC event */
-typedef struct {
- srpc_event_type_t ev_type; /* what's up */
- lnet_event_kind_t ev_lnet; /* LNet event type */
- int ev_fired; /* LNet event fired? */
- int ev_status; /* LNet event status */
- void *ev_data; /* owning server/client RPC */
-} srpc_event_t;
-
-typedef struct {
- int bk_len; /* len of bulk data */
- lnet_handle_md_t bk_mdh;
- int bk_sink; /* sink/source */
- int bk_niov; /* # iov in bk_iovs */
-#ifdef __KERNEL__
- lnet_kiov_t bk_iovs[0];
-#else
- cfs_page_t **bk_pages;
- lnet_md_iovec_t bk_iovs[0];
-#endif
-} srpc_bulk_t; /* bulk descriptor */
-
-typedef struct srpc_peer {
- struct list_head stp_list; /* chain on peer hash */
- struct list_head stp_rpcq; /* q of non-control RPCs */
- struct list_head stp_ctl_rpcq; /* q of control RPCs */
- spinlock_t stp_lock; /* serialize */
- lnet_nid_t stp_nid;
- int stp_credits; /* available credits */
-} srpc_peer_t;
-
-/* message buffer descriptor */
-typedef struct {
- struct list_head buf_list; /* chain on srpc_service::*_msgq */
- srpc_msg_t buf_msg;
- lnet_handle_md_t buf_mdh;
- lnet_nid_t buf_self;
- lnet_process_id_t buf_peer;
-} srpc_buffer_t;
-
-/* server-side state of a RPC */
-typedef struct srpc_server_rpc {
- struct list_head srpc_list; /* chain on srpc_service::*_rpcq */
- struct srpc_service *srpc_service;
- swi_workitem_t srpc_wi;
- srpc_event_t srpc_ev; /* bulk/reply event */
- lnet_nid_t srpc_self;
- lnet_process_id_t srpc_peer;
- srpc_msg_t srpc_replymsg;
- lnet_handle_md_t srpc_replymdh;
- srpc_buffer_t *srpc_reqstbuf;
- srpc_bulk_t *srpc_bulk;
-
- int srpc_status;
- void (*srpc_done)(struct srpc_server_rpc *);
-} srpc_server_rpc_t;
-
-/* client-side state of a RPC */
-typedef struct srpc_client_rpc {
- struct list_head crpc_list; /* chain on user's lists */
- struct list_head crpc_privl; /* chain on srpc_peer_t::*rpcq */
- spinlock_t crpc_lock; /* serialize */
- int crpc_service;
- atomic_t crpc_refcount;
- int crpc_timeout; /* # seconds to wait for reply */
- stt_timer_t crpc_timer;
- swi_workitem_t crpc_wi;
- lnet_process_id_t crpc_dest;
- srpc_peer_t *crpc_peer;
-
- void (*crpc_done)(struct srpc_client_rpc *);
- void (*crpc_fini)(struct srpc_client_rpc *);
- int crpc_status; /* completion status */
- void *crpc_priv; /* caller data */
-
- /* state flags */
- unsigned int crpc_aborted:1; /* being given up */
- unsigned int crpc_closed:1; /* completed */
-
- /* RPC events */
- srpc_event_t crpc_bulkev; /* bulk event */
- srpc_event_t crpc_reqstev; /* request event */
- srpc_event_t crpc_replyev; /* reply event */
-
- /* bulk, request(reqst), and reply exchanged on wire */
- srpc_msg_t crpc_reqstmsg;
- srpc_msg_t crpc_replymsg;
- lnet_handle_md_t crpc_reqstmdh;
- lnet_handle_md_t crpc_replymdh;
- srpc_bulk_t crpc_bulk;
-} srpc_client_rpc_t;
-
-#define srpc_client_rpc_size(rpc) \
-offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
-
-#define srpc_client_rpc_addref(rpc) \
-do { \
- CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \
- (rpc), libcfs_id2str((rpc)->crpc_dest), \
- atomic_read(&(rpc)->crpc_refcount)); \
- LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
- atomic_inc(&(rpc)->crpc_refcount); \
-} while (0)
-
-#define srpc_client_rpc_decref(rpc) \
-do { \
- CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \
- (rpc), libcfs_id2str((rpc)->crpc_dest), \
- atomic_read(&(rpc)->crpc_refcount)); \
- LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
- if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \
- srpc_destroy_client_rpc(rpc); \
-} while (0)
-
-#define srpc_event_pending(rpc) ((rpc)->crpc_bulkev.ev_fired == 0 || \
- (rpc)->crpc_reqstev.ev_fired == 0 || \
- (rpc)->crpc_replyev.ev_fired == 0)
-
-typedef struct srpc_service {
- int sv_id; /* service id */
- const char *sv_name; /* human readable name */
- int sv_nprune; /* # posted RPC to be pruned */
- int sv_concur; /* max # concurrent RPCs */
-
- spinlock_t sv_lock;
- int sv_shuttingdown;
- srpc_event_t sv_ev; /* LNet event */
- int sv_nposted_msg; /* # posted message buffers */
- struct list_head sv_free_rpcq; /* free RPC descriptors */
- struct list_head sv_active_rpcq; /* in-flight RPCs */
- struct list_head sv_posted_msgq; /* posted message buffers */
- struct list_head sv_blocked_msgq; /* blocked for RPC descriptor */
-
- /* Service callbacks:
- * - sv_handler: process incoming RPC request
- * - sv_bulk_ready: notify bulk data
- */
- int (*sv_handler) (srpc_server_rpc_t *);
- int (*sv_bulk_ready) (srpc_server_rpc_t *, int);
-} srpc_service_t;
-
-#define SFW_POST_BUFFERS 8
-#define SFW_SERVICE_CONCURRENCY (SFW_POST_BUFFERS/2)
-
-typedef struct {
- struct list_head sn_list; /* chain on fw_zombie_sessions */
- lst_sid_t sn_id; /* unique identifier */
- unsigned int sn_timeout; /* # seconds' inactivity to expire */
- int sn_timer_active;
- stt_timer_t sn_timer;
- struct list_head sn_batches; /* list of batches */
- char sn_name[LST_NAME_SIZE];
- atomic_t sn_brw_errors;
- atomic_t sn_ping_errors;
-} sfw_session_t;
-
-#define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \
- (sid0).ses_stamp == (sid1).ses_stamp)
-
-typedef struct {
- struct list_head bat_list; /* chain on sn_batches */
- lst_bid_t bat_id; /* batch id */
- int bat_error; /* error code of batch */
- sfw_session_t *bat_session; /* batch's session */
- atomic_t bat_nactive; /* # of active tests */
- struct list_head bat_tests; /* test instances */
-} sfw_batch_t;
-
-typedef struct {
- int (*tso_init)(struct sfw_test_instance *tsi); /* intialize test client */
- void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test client */
- int (*tso_prep_rpc)(struct sfw_test_unit *tsu,
- lnet_process_id_t dest,
- srpc_client_rpc_t **rpc); /* prep a tests rpc */
- void (*tso_done_rpc)(struct sfw_test_unit *tsu,
- srpc_client_rpc_t *rpc); /* done a test rpc */
-} sfw_test_client_ops_t;
-
-typedef struct sfw_test_instance {
- struct list_head tsi_list; /* chain on batch */
- int tsi_service; /* test type */
- sfw_batch_t *tsi_batch; /* batch */
- sfw_test_client_ops_t *tsi_ops; /* test client operations */
-
- /* public parameter for all test units */
- int tsi_is_client:1; /* is test client */
- int tsi_stoptsu_onerr:1; /* stop tsu on error */
- int tsi_concur; /* concurrency */
- int tsi_loop; /* loop count */
-
- /* status of test instance */
- spinlock_t tsi_lock; /* serialize */
- int tsi_stopping:1; /* test is stopping */
- atomic_t tsi_nactive; /* # of active test unit */
- struct list_head tsi_units; /* test units */
- struct list_head tsi_free_rpcs; /* free rpcs */
- struct list_head tsi_active_rpcs; /* active rpcs */
-
- union {
- test_bulk_req_t bulk; /* bulk parameter */
- test_ping_req_t ping; /* ping parameter */
- } tsi_u;
-} sfw_test_instance_t;
-
-/* XXX: trailing (CFS_PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at
- * the end of pages are not used */
-#define SFW_MAX_CONCUR LST_MAX_CONCUR
-#define SFW_ID_PER_PAGE (CFS_PAGE_SIZE / sizeof(lnet_process_id_t))
-#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE)
-#define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
-
-typedef struct sfw_test_unit {
- struct list_head tsu_list; /* chain on lst_test_instance */
- lnet_process_id_t tsu_dest; /* id of dest node */
- int tsu_loop; /* loop count of the test */
- sfw_test_instance_t *tsu_instance; /* pointer to test instance */
- void *tsu_private; /* private data */
- swi_workitem_t tsu_worker; /* workitem of the test unit */
-} sfw_test_unit_t;
-
-typedef struct {
- struct list_head tsc_list; /* chain on fw_tests */
- srpc_service_t *tsc_srv_service; /* test service */
- sfw_test_client_ops_t *tsc_cli_ops; /* ops of test client */
-} sfw_test_case_t;
-
-
-srpc_client_rpc_t *
-sfw_create_rpc(lnet_process_id_t peer, int service, int nbulkiov, int bulklen,
- void (*done) (srpc_client_rpc_t *), void *priv);
-int sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer,
- int nblk, int blklen, srpc_client_rpc_t **rpc);
-void sfw_abort_rpc(srpc_client_rpc_t *rpc);
-void sfw_post_rpc(srpc_client_rpc_t *rpc);
-void sfw_client_rpc_done(srpc_client_rpc_t *rpc);
-void sfw_unpack_message(srpc_msg_t *msg);
-void sfw_free_pages(srpc_server_rpc_t *rpc);
-void sfw_add_bulk_page(srpc_bulk_t *bk, cfs_page_t *pg, int i);
-int sfw_alloc_pages(srpc_server_rpc_t *rpc, int npages, int sink);
-
-srpc_client_rpc_t *
-srpc_create_client_rpc(lnet_process_id_t peer, int service,
- int nbulkiov, int bulklen,
- void (*rpc_done)(srpc_client_rpc_t *),
- void (*rpc_fini)(srpc_client_rpc_t *), void *priv);
-void srpc_post_rpc(srpc_client_rpc_t *rpc);
-void srpc_abort_rpc(srpc_client_rpc_t *rpc, int why);
-void srpc_free_bulk(srpc_bulk_t *bk);
-srpc_bulk_t *srpc_alloc_bulk(int npages, int sink);
-int srpc_send_rpc(swi_workitem_t *wi);
-int srpc_send_reply(srpc_server_rpc_t *rpc);
-int srpc_add_service(srpc_service_t *sv);
-int srpc_remove_service(srpc_service_t *sv);
-void srpc_shutdown_service(srpc_service_t *sv);
-int srpc_finish_service(srpc_service_t *sv);
-int srpc_service_add_buffers(srpc_service_t *sv, int nbuffer);
-void srpc_service_remove_buffers(srpc_service_t *sv, int nbuffer);
-void srpc_get_counters(srpc_counters_t *cnt);
-void srpc_set_counters(const srpc_counters_t *cnt);
-
-void swi_kill_workitem(swi_workitem_t *wi);
-void swi_schedule_workitem(swi_workitem_t *wi);
-void swi_schedule_serial_workitem(swi_workitem_t *wi);
-int swi_startup(void);
-int sfw_startup(void);
-int srpc_startup(void);
-void swi_shutdown(void);
-void sfw_shutdown(void);
-void srpc_shutdown(void);
-
-static inline void
-srpc_destroy_client_rpc (srpc_client_rpc_t *rpc)
-{
- LASSERT (rpc != NULL);
- LASSERT (!srpc_event_pending(rpc));
- LASSERT (list_empty(&rpc->crpc_privl));
- LASSERT (atomic_read(&rpc->crpc_refcount) == 0);
-#ifndef __KERNEL__
- LASSERT (rpc->crpc_bulk.bk_pages == NULL);
-#endif
-
- if (rpc->crpc_fini == NULL) {
- LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
- } else {
- (*rpc->crpc_fini) (rpc);
- }
-
- return;
-}
-
-static inline void
-srpc_init_client_rpc (srpc_client_rpc_t *rpc, lnet_process_id_t peer,
- int service, int nbulkiov, int bulklen,
- void (*rpc_done)(srpc_client_rpc_t *),
- void (*rpc_fini)(srpc_client_rpc_t *), void *priv)
-{
- LASSERT (nbulkiov <= LNET_MAX_IOV);
-
- memset(rpc, 0, offsetof(srpc_client_rpc_t,
- crpc_bulk.bk_iovs[nbulkiov]));
-
- CFS_INIT_LIST_HEAD(&rpc->crpc_list);
- CFS_INIT_LIST_HEAD(&rpc->crpc_privl);
- swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc);
- spin_lock_init(&rpc->crpc_lock);
- atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
-
- rpc->crpc_dest = peer;
- rpc->crpc_priv = priv;
- rpc->crpc_service = service;
- rpc->crpc_bulk.bk_len = bulklen;
- rpc->crpc_bulk.bk_niov = nbulkiov;
- rpc->crpc_done = rpc_done;
- rpc->crpc_fini = rpc_fini;
- rpc->crpc_reqstmdh =
- rpc->crpc_replymdh =
- rpc->crpc_bulk.bk_mdh = LNET_INVALID_HANDLE;
-
- /* no event is expected at this point */
- rpc->crpc_bulkev.ev_fired =
- rpc->crpc_reqstev.ev_fired =
- rpc->crpc_replyev.ev_fired = 1;
-
- rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC;
- rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION;
- rpc->crpc_reqstmsg.msg_type = srpc_service2request(service);
- return;
-}
-
-static inline const char *
-swi_state2str (int state)
-{
-#define STATE2STR(x) case x: return #x
- switch(state) {
- default:
- LBUG();
- STATE2STR(SWI_STATE_NEWBORN);
- STATE2STR(SWI_STATE_REPLY_SUBMITTED);
- STATE2STR(SWI_STATE_REPLY_SENT);
- STATE2STR(SWI_STATE_REQUEST_SUBMITTED);
- STATE2STR(SWI_STATE_REQUEST_SENT);
- STATE2STR(SWI_STATE_REPLY_RECEIVED);
- STATE2STR(SWI_STATE_BULK_STARTED);
- STATE2STR(SWI_STATE_DONE);
- }
-#undef STATE2STR
-}
-
-#define UNUSED(x) ( (void)(x) )
-
-#ifndef __KERNEL__
-
-int stt_poll_interval(void);
-int sfw_session_removed(void);
-
-int stt_check_events(void);
-int swi_check_events(void);
-int srpc_check_event(int timeout);
-
-int lnet_selftest_init(void);
-void lnet_selftest_fini(void);
-int selftest_wait_events(void);
-
-#else
-
-#define selftest_wait_events() cfs_pause(cfs_time_seconds(1))
-
-#endif
-
-#define lst_wait_until(cond, lock, fmt, a...) \
-do { \
- int __I = 2; \
- while (!(cond)) { \
- __I++; \
- CDEBUG(((__I & (-__I)) == __I) ? D_WARNING : \
- D_NET, /* 2**n? */ \
- fmt, ## a); \
- spin_unlock(&(lock)); \
- \
- selftest_wait_events(); \
- \
- spin_lock(&(lock)); \
- } \
-} while (0)
-
-static inline void
-srpc_wait_service_shutdown (srpc_service_t *sv)
-{
- int i = 2;
-
- spin_lock(&sv->sv_lock);
- LASSERT (sv->sv_shuttingdown);
- spin_unlock(&sv->sv_lock);
-
- while (srpc_finish_service(sv) == 0) {
- i++;
- CDEBUG (((i & -i) == i) ? D_WARNING : D_NET,
- "Waiting for %s service to shutdown...\n",
- sv->sv_name);
- selftest_wait_events();
- }
-}
-
-#endif /* __SELFTEST_SELFTEST_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Isaac Huang <isaac@clusterfs.com>
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-
-/*
- * Timers are implemented as a sorted queue of expiry times. The queue
- * is slotted, with each slot holding timers which expire in a
- * 2**STTIMER_MINPOLL (8) second period. The timers in each slot are
- * sorted by increasing expiry time. The number of slots is 2**7 (128),
- * to cover a time period of 1024 seconds into the future before wrapping.
- */
-#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */
-#define STTIMER_SLOTTIME (1 << STTIMER_MINPOLL)
-#define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1))
-#define STTIMER_NSLOTS (1 << 7)
-#define STTIMER_SLOT(t) (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \
- (STTIMER_NSLOTS - 1))])
-
-struct st_timer_data {
- spinlock_t stt_lock;
- /* start time of the slot processed previously */
- cfs_time_t stt_prev_slot;
- struct list_head stt_hash[STTIMER_NSLOTS];
- int stt_shuttingdown;
-#ifdef __KERNEL__
- cfs_waitq_t stt_waitq;
- int stt_nthreads;
-#endif
-} stt_data;
-
-void
-stt_add_timer (stt_timer_t *timer)
-{
- struct list_head *pos;
-
- spin_lock(&stt_data.stt_lock);
-
-#ifdef __KERNEL__
- LASSERT (stt_data.stt_nthreads > 0);
-#endif
- LASSERT (!stt_data.stt_shuttingdown);
- LASSERT (timer->stt_func != NULL);
- LASSERT (list_empty(&timer->stt_list));
- LASSERT (cfs_time_after(timer->stt_expires, cfs_time_current_sec()));
-
- /* a simple insertion sort */
- list_for_each_prev (pos, STTIMER_SLOT(timer->stt_expires)) {
- stt_timer_t *old = list_entry(pos, stt_timer_t, stt_list);
-
- if (cfs_time_aftereq(timer->stt_expires, old->stt_expires))
- break;
- }
- list_add(&timer->stt_list, pos);
-
- spin_unlock(&stt_data.stt_lock);
-}
-
-/*
- * The function returns whether it has deactivated a pending timer or not.
- * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
- * active timer returns 1.)
- *
- * CAVEAT EMPTOR:
- * When 0 is returned, it is possible that timer->stt_func _is_ running on
- * another CPU.
- */
-int
-stt_del_timer (stt_timer_t *timer)
-{
- int ret = 0;
-
- spin_lock(&stt_data.stt_lock);
-
-#ifdef __KERNEL__
- LASSERT (stt_data.stt_nthreads > 0);
-#endif
- LASSERT (!stt_data.stt_shuttingdown);
-
- if (!list_empty(&timer->stt_list)) {
- ret = 1;
- list_del_init(&timer->stt_list);
- }
-
- spin_unlock(&stt_data.stt_lock);
- return ret;
-}
-
-/* called with stt_data.stt_lock held */
-int
-stt_expire_list (struct list_head *slot, cfs_time_t now)
-{
- int expired = 0;
- stt_timer_t *timer;
-
- while (!list_empty(slot)) {
- timer = list_entry(slot->next, stt_timer_t, stt_list);
-
- if (cfs_time_after(timer->stt_expires, now))
- break;
-
- list_del_init(&timer->stt_list);
- spin_unlock(&stt_data.stt_lock);
-
- expired++;
- (*timer->stt_func) (timer->stt_data);
-
- spin_lock(&stt_data.stt_lock);
- }
-
- return expired;
-}
-
-int
-stt_check_timers (cfs_time_t *last)
-{
- int expired = 0;
- cfs_time_t now;
- cfs_time_t this_slot;
-
- now = cfs_time_current_sec();
- this_slot = now & STTIMER_SLOTTIMEMASK;
-
- spin_lock(&stt_data.stt_lock);
-
- while (cfs_time_aftereq(this_slot, *last)) {
- expired += stt_expire_list(STTIMER_SLOT(this_slot), now);
- this_slot = cfs_time_sub(this_slot, STTIMER_SLOTTIME);
- }
-
- *last = now & STTIMER_SLOTTIMEMASK;
- spin_unlock(&stt_data.stt_lock);
- return expired;
-}
-
-#ifdef __KERNEL__
-
-int
-stt_timer_main (void *arg)
-{
- UNUSED(arg);
-
- cfs_daemonize("st_timer");
- cfs_block_allsigs();
-
- while (!stt_data.stt_shuttingdown) {
- stt_check_timers(&stt_data.stt_prev_slot);
-
- cfs_waitq_wait_event_timeout(stt_data.stt_waitq,
- stt_data.stt_shuttingdown,
- cfs_time_seconds(STTIMER_SLOTTIME));
- }
-
- spin_lock(&stt_data.stt_lock);
- stt_data.stt_nthreads--;
- spin_unlock(&stt_data.stt_lock);
- return 0;
-}
-
-int
-stt_start_timer_thread (void)
-{
- long pid;
-
- LASSERT (!stt_data.stt_shuttingdown);
-
- pid = cfs_kernel_thread(stt_timer_main, NULL, 0);
- if (pid < 0)
- return (int)pid;
-
- spin_lock(&stt_data.stt_lock);
- stt_data.stt_nthreads++;
- spin_unlock(&stt_data.stt_lock);
- return 0;
-}
-
-#else /* !__KERNEL__ */
-
-int
-stt_check_events (void)
-{
- return stt_check_timers(&stt_data.stt_prev_slot);
-}
-
-int
-stt_poll_interval (void)
-{
- return STTIMER_SLOTTIME;
-}
-
-#endif
-
-int
-stt_startup (void)
-{
- int rc = 0;
- int i;
-
- stt_data.stt_shuttingdown = 0;
- stt_data.stt_prev_slot = cfs_time_current_sec() & STTIMER_SLOTTIMEMASK;
-
- spin_lock_init(&stt_data.stt_lock);
- for (i = 0; i < STTIMER_NSLOTS; i++)
- CFS_INIT_LIST_HEAD(&stt_data.stt_hash[i]);
-
-#ifdef __KERNEL__
- stt_data.stt_nthreads = 0;
- cfs_waitq_init(&stt_data.stt_waitq);
- rc = stt_start_timer_thread();
- if (rc != 0)
- CERROR ("Can't spawn timer thread: %d\n", rc);
-#endif
-
- return rc;
-}
-
-void
-stt_shutdown (void)
-{
- int i;
-
- spin_lock(&stt_data.stt_lock);
-
- for (i = 0; i < STTIMER_NSLOTS; i++)
- LASSERT (list_empty(&stt_data.stt_hash[i]));
-
- stt_data.stt_shuttingdown = 1;
-
-#ifdef __KERNEL__
- cfs_waitq_signal(&stt_data.stt_waitq);
- lst_wait_until(stt_data.stt_nthreads == 0, stt_data.stt_lock,
- "waiting for %d threads to terminate\n",
- stt_data.stt_nthreads);
-#endif
-
- spin_unlock(&stt_data.stt_lock);
- return;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Isaac Huang <isaac@clusterfs.com>
- *
- */
-#ifndef __SELFTEST_TIMER_H__
-#define __SELFTEST_TIMER_H__
-
-typedef struct {
- struct list_head stt_list;
- cfs_time_t stt_expires;
- void (*stt_func) (void *);
- void *stt_data;
-} stt_timer_t;
-
-void stt_add_timer (stt_timer_t *timer);
-int stt_del_timer (stt_timer_t *timer);
-int stt_startup (void);
-void stt_shutdown (void);
-
-#endif /* __SELFTEST_TIMER_H__ */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Isaac Huang <isaac@clusterfs.com>
- *
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-
-struct smoketest_workitem {
- struct list_head wi_runq; /* concurrent workitems */
- struct list_head wi_serial_runq; /* serialised workitems */
- cfs_waitq_t wi_waitq; /* where schedulers sleep */
- cfs_waitq_t wi_serial_waitq; /* where serial scheduler sleep */
- spinlock_t wi_lock; /* serialize */
- int wi_shuttingdown;
- int wi_nthreads;
-} swi_data;
-
-static inline int
-swi_sched_cansleep (struct list_head *q)
-{
- int rc;
-
- spin_lock(&swi_data.wi_lock);
-
- rc = !swi_data.wi_shuttingdown && list_empty(q);
-
- spin_unlock(&swi_data.wi_lock);
- return rc;
-}
-
-/* XXX:
- * 0. it only works when called from wi->wi_action.
- * 1. when it returns no one shall try to schedule the workitem.
- */
-void
-swi_kill_workitem (swi_workitem_t *wi)
-{
- LASSERT (!in_interrupt()); /* because we use plain spinlock */
- LASSERT (!swi_data.wi_shuttingdown);
-
- spin_lock(&swi_data.wi_lock);
-
-#ifdef __KERNEL__
- LASSERT (wi->wi_running);
-#endif
-
- if (wi->wi_scheduled) { /* cancel pending schedules */
- LASSERT (!list_empty(&wi->wi_list));
- list_del_init(&wi->wi_list);
- }
-
- LASSERT (list_empty(&wi->wi_list));
- wi->wi_scheduled = 1; /* LBUG future schedule attempts */
-
- spin_unlock(&swi_data.wi_lock);
- return;
-}
-
-void
-swi_schedule_workitem (swi_workitem_t *wi)
-{
- LASSERT (!in_interrupt()); /* because we use plain spinlock */
- LASSERT (!swi_data.wi_shuttingdown);
-
- spin_lock(&swi_data.wi_lock);
-
- if (!wi->wi_scheduled) {
- LASSERT (list_empty(&wi->wi_list));
-
- wi->wi_scheduled = 1;
- list_add_tail(&wi->wi_list, &swi_data.wi_runq);
- cfs_waitq_signal(&swi_data.wi_waitq);
- }
-
- LASSERT (!list_empty(&wi->wi_list));
- spin_unlock(&swi_data.wi_lock);
- return;
-}
-
-/*
- * Workitem scheduled by this function is strictly serialised not only with
- * itself, but also with others scheduled this way.
- *
- * Now there's only one static serialised queue, but in the future more might
- * be added, and even dynamic creation of serialised queues might be supported.
- */
-void
-swi_schedule_serial_workitem (swi_workitem_t *wi)
-{
- LASSERT (!in_interrupt()); /* because we use plain spinlock */
- LASSERT (!swi_data.wi_shuttingdown);
-
- spin_lock(&swi_data.wi_lock);
-
- if (!wi->wi_scheduled) {
- LASSERT (list_empty(&wi->wi_list));
-
- wi->wi_scheduled = 1;
- list_add_tail(&wi->wi_list, &swi_data.wi_serial_runq);
- cfs_waitq_signal(&swi_data.wi_serial_waitq);
- }
-
- LASSERT (!list_empty(&wi->wi_list));
- spin_unlock(&swi_data.wi_lock);
- return;
-}
-
-#ifdef __KERNEL__
-
-int
-swi_scheduler_main (void *arg)
-{
- int id = (long) arg;
- char name[16];
-
- snprintf(name, sizeof(name), "swi_sd%03d", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- spin_lock(&swi_data.wi_lock);
-
- while (!swi_data.wi_shuttingdown) {
- int nloops = 0;
- int rc;
- swi_workitem_t *wi;
-
- while (!list_empty(&swi_data.wi_runq) &&
- nloops < SWI_RESCHED) {
- wi = list_entry(swi_data.wi_runq.next,
- swi_workitem_t, wi_list);
- list_del_init(&wi->wi_list);
-
- LASSERT (wi->wi_scheduled);
-
- nloops++;
- if (wi->wi_running) {
- list_add_tail(&wi->wi_list, &swi_data.wi_runq);
- continue;
- }
-
- wi->wi_running = 1;
- wi->wi_scheduled = 0;
- spin_unlock(&swi_data.wi_lock);
-
- rc = (*wi->wi_action) (wi);
-
- spin_lock(&swi_data.wi_lock);
- if (rc == 0) /* wi still active */
- wi->wi_running = 0;
- }
-
- spin_unlock(&swi_data.wi_lock);
-
- if (nloops < SWI_RESCHED)
- wait_event_interruptible_exclusive(
- swi_data.wi_waitq,
- !swi_sched_cansleep(&swi_data.wi_runq));
- else
- our_cond_resched();
-
- spin_lock(&swi_data.wi_lock);
- }
-
- swi_data.wi_nthreads--;
- spin_unlock(&swi_data.wi_lock);
- return 0;
-}
-
-int
-swi_serial_scheduler_main (void *arg)
-{
- UNUSED (arg);
-
- cfs_daemonize("swi_serial_sd");
- cfs_block_allsigs();
-
- spin_lock(&swi_data.wi_lock);
-
- while (!swi_data.wi_shuttingdown) {
- int nloops = 0;
- int rc;
- swi_workitem_t *wi;
-
- while (!list_empty(&swi_data.wi_serial_runq) &&
- nloops < SWI_RESCHED) {
- wi = list_entry(swi_data.wi_serial_runq.next,
- swi_workitem_t, wi_list);
- list_del_init(&wi->wi_list);
-
- LASSERT (!wi->wi_running);
- LASSERT (wi->wi_scheduled);
-
- nloops++;
- wi->wi_running = 1;
- wi->wi_scheduled = 0;
- spin_unlock(&swi_data.wi_lock);
-
- rc = (*wi->wi_action) (wi);
-
- spin_lock(&swi_data.wi_lock);
- if (rc == 0) /* wi still active */
- wi->wi_running = 0;
- }
-
- spin_unlock(&swi_data.wi_lock);
-
- if (nloops < SWI_RESCHED)
- wait_event_interruptible_exclusive(
- swi_data.wi_serial_waitq,
- !swi_sched_cansleep(&swi_data.wi_serial_runq));
- else
- our_cond_resched();
-
- spin_lock(&swi_data.wi_lock);
- }
-
- swi_data.wi_nthreads--;
- spin_unlock(&swi_data.wi_lock);
- return 0;
-}
-
-int
-swi_start_thread (int (*func) (void*), void *arg)
-{
- long pid;
-
- LASSERT (!swi_data.wi_shuttingdown);
-
- pid = cfs_kernel_thread(func, arg, 0);
- if (pid < 0)
- return (int)pid;
-
- spin_lock(&swi_data.wi_lock);
- swi_data.wi_nthreads++;
- spin_unlock(&swi_data.wi_lock);
- return 0;
-}
-
-#else /* __KERNEL__ */
-
-int
-swi_check_events (void)
-{
- int n = 0;
- swi_workitem_t *wi;
- struct list_head *q;
-
- spin_lock(&swi_data.wi_lock);
-
- for (;;) {
- if (!list_empty(&swi_data.wi_serial_runq))
- q = &swi_data.wi_serial_runq;
- else if (!list_empty(&swi_data.wi_runq))
- q = &swi_data.wi_runq;
- else
- break;
-
- wi = list_entry(q->next, swi_workitem_t, wi_list);
- list_del_init(&wi->wi_list);
-
- LASSERT (wi->wi_scheduled);
- wi->wi_scheduled = 0;
- spin_unlock(&swi_data.wi_lock);
-
- n++;
- (*wi->wi_action) (wi);
-
- spin_lock(&swi_data.wi_lock);
- }
-
- spin_unlock(&swi_data.wi_lock);
- return n;
-}
-
-#endif
-
-int
-swi_startup (void)
-{
- int i;
- int rc;
-
- swi_data.wi_nthreads = 0;
- swi_data.wi_shuttingdown = 0;
- spin_lock_init(&swi_data.wi_lock);
- cfs_waitq_init(&swi_data.wi_waitq);
- cfs_waitq_init(&swi_data.wi_serial_waitq);
- CFS_INIT_LIST_HEAD(&swi_data.wi_runq);
- CFS_INIT_LIST_HEAD(&swi_data.wi_serial_runq);
-
-#ifdef __KERNEL__
- rc = swi_start_thread(swi_serial_scheduler_main, NULL);
- if (rc != 0) {
- LASSERT (swi_data.wi_nthreads == 0);
- CERROR ("Can't spawn serial workitem scheduler: %d\n", rc);
- return rc;
- }
-
- for (i = 0; i < num_online_cpus(); i++) {
- rc = swi_start_thread(swi_scheduler_main, (void *) (long) i);
- if (rc != 0) {
- CERROR ("Can't spawn workitem scheduler: %d\n", rc);
- swi_shutdown();
- return rc;
- }
- }
-#else
- UNUSED(i);
- UNUSED(rc);
-#endif
-
- return 0;
-}
-
-void
-swi_shutdown (void)
-{
- spin_lock(&swi_data.wi_lock);
-
- LASSERT (list_empty(&swi_data.wi_runq));
- LASSERT (list_empty(&swi_data.wi_serial_runq));
-
- swi_data.wi_shuttingdown = 1;
-
-#ifdef __KERNEL__
- cfs_waitq_broadcast(&swi_data.wi_waitq);
- cfs_waitq_broadcast(&swi_data.wi_serial_waitq);
- lst_wait_until(swi_data.wi_nthreads == 0, swi_data.wi_lock,
- "waiting for %d threads to terminate\n",
- swi_data.wi_nthreads);
-#endif
-
- spin_unlock(&swi_data.wi_lock);
- return;
-}
+++ /dev/null
-.deps
-Makefile
-autoMakefile
-autoMakefile.in
+++ /dev/null
-@BUILD_USOCKLND_TRUE@subdir-m += socklnd
-@BUILD_UPTLLND_TRUE@subdir-m += ptllnd
-
-@INCLUDE_RULES@
-
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-SUBDIRS = socklnd ptllnd
+++ /dev/null
-.deps
-Makefile
-Makefile.in
+++ /dev/null
-
-if BUILD_UPTLLND
-if LIBLUSTRE
-noinst_LIBRARIES = libptllnd.a
-noinst_HEADERS = ptllnd.h
-libptllnd_a_SOURCES = ptllnd.h ptllnd.c ptllnd_cb.c
-libptllnd_a_CPPFLAGS= $(LLCPPFLAGS)
-# I need $(PTLNDCPPLFLAGS) to be AFTER $(CPPFLAGS)
-# Adding them into $(AM_CFLAGS) seems wrong, but lets me get on..
-libptllnd_a_CFLAGS= $(PTLLNDCPPFLAGS) $(LLCFLAGS)
-endif
-endif
+++ /dev/null
-
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: Eric Barton <eeb@bartonsoftware.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-#include "ptllnd.h"
-
-lnd_t the_ptllnd = {
- .lnd_type = PTLLND,
- .lnd_startup = ptllnd_startup,
- .lnd_shutdown = ptllnd_shutdown,
- .lnd_ctl = ptllnd_ctl,
- .lnd_send = ptllnd_send,
- .lnd_recv = ptllnd_recv,
- .lnd_eager_recv = ptllnd_eager_recv,
- .lnd_notify = ptllnd_notify,
- .lnd_wait = ptllnd_wait,
- .lnd_setasync = ptllnd_setasync,
-};
-
-static int ptllnd_ni_count = 0;
-
-static struct list_head ptllnd_idle_history;
-static struct list_head ptllnd_history_list;
-
-void
-ptllnd_history_fini(void)
-{
- ptllnd_he_t *he;
-
- while (!list_empty(&ptllnd_idle_history)) {
- he = list_entry(ptllnd_idle_history.next,
- ptllnd_he_t, he_list);
-
- list_del(&he->he_list);
- LIBCFS_FREE(he, sizeof(*he));
- }
-
- while (!list_empty(&ptllnd_history_list)) {
- he = list_entry(ptllnd_history_list.next,
- ptllnd_he_t, he_list);
-
- list_del(&he->he_list);
- LIBCFS_FREE(he, sizeof(*he));
- }
-}
-
-int
-ptllnd_history_init(void)
-{
- int i;
- ptllnd_he_t *he;
- int n;
- int rc;
-
- CFS_INIT_LIST_HEAD(&ptllnd_idle_history);
- CFS_INIT_LIST_HEAD(&ptllnd_history_list);
-
- rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0);
- if (rc != 0)
- return rc;
-
- for (i = 0; i < n; i++) {
- LIBCFS_ALLOC(he, sizeof(*he));
- if (he == NULL) {
- ptllnd_history_fini();
- return -ENOMEM;
- }
-
- list_add(&he->he_list, &ptllnd_idle_history);
- }
-
- PTLLND_HISTORY("Init");
-
- return 0;
-}
-
-void
-ptllnd_history(const char *fn, const char *file, const int line,
- const char *fmt, ...)
-{
- static int seq;
-
- va_list ap;
- ptllnd_he_t *he;
-
- if (!list_empty(&ptllnd_idle_history)) {
- he = list_entry(ptllnd_idle_history.next,
- ptllnd_he_t, he_list);
- } else if (!list_empty(&ptllnd_history_list)) {
- he = list_entry(ptllnd_history_list.next,
- ptllnd_he_t, he_list);
- } else {
- return;
- }
-
- list_del(&he->he_list);
- list_add_tail(&he->he_list, &ptllnd_history_list);
-
- he->he_seq = seq++;
- he->he_fn = fn;
- he->he_file = file;
- he->he_line = line;
- gettimeofday(&he->he_time, NULL);
-
- va_start(ap, fmt);
- vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap);
- va_end(ap);
-}
-
-void
-ptllnd_dump_history(void)
-{
- ptllnd_he_t *he;
-
- PTLLND_HISTORY("dumping...");
-
- while (!list_empty(&ptllnd_history_list)) {
- he = list_entry(ptllnd_history_list.next,
- ptllnd_he_t, he_list);
-
- list_del(&he->he_list);
-
- CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq,
- (int)he->he_time.tv_sec, (int)he->he_time.tv_usec,
- he->he_file, he->he_line, he->he_fn, he->he_msg);
-
- list_add_tail(&he->he_list, &ptllnd_idle_history);
- }
-
- PTLLND_HISTORY("complete");
-}
-
-void
-ptllnd_assert_wire_constants (void)
-{
- /* Wire protocol assertions generated by 'wirecheck'
- * running on Linux fedora 2.6.11-co-0.6.4 #1 Mon Jun 19 05:36:13 UTC 2006 i686 i686 i386 GNU
- * with gcc version 4.1.1 20060525 (Red Hat 4.1.1-1) */
-
-
- /* Constants... */
- CLASSERT (PTL_RESERVED_MATCHBITS == 0x100);
- CLASSERT (LNET_MSG_MATCHBITS == 0);
- CLASSERT (PTLLND_MSG_MAGIC == 0x50746C4E);
- CLASSERT (PTLLND_MSG_VERSION == 0x04);
- CLASSERT (PTLLND_RDMA_OK == 0x00);
- CLASSERT (PTLLND_RDMA_FAIL == 0x01);
- CLASSERT (PTLLND_MSG_TYPE_INVALID == 0x00);
- CLASSERT (PTLLND_MSG_TYPE_PUT == 0x01);
- CLASSERT (PTLLND_MSG_TYPE_GET == 0x02);
- CLASSERT (PTLLND_MSG_TYPE_IMMEDIATE == 0x03);
- CLASSERT (PTLLND_MSG_TYPE_NOOP == 0x04);
- CLASSERT (PTLLND_MSG_TYPE_HELLO == 0x05);
- CLASSERT (PTLLND_MSG_TYPE_NAK == 0x06);
-
- /* Checks for struct kptl_msg_t */
- CLASSERT ((int)sizeof(kptl_msg_t) == 136);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_magic) == 0);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_magic) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_version) == 4);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_version) == 2);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_type) == 6);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_type) == 1);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_credits) == 7);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_credits) == 1);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_nob) == 8);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_nob) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_cksum) == 12);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_cksum) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcnid) == 16);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcnid) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcstamp) == 24);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcstamp) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstnid) == 32);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstnid) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dststamp) == 40);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dststamp) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcpid) == 48);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcpid) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstpid) == 52);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstpid) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.immediate) == 56);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.immediate) == 72);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.rdma) == 56);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.rdma) == 80);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.hello) == 56);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.hello) == 12);
-
- /* Checks for struct kptl_immediate_msg_t */
- CLASSERT ((int)sizeof(kptl_immediate_msg_t) == 72);
- CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_hdr) == 0);
- CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_hdr) == 72);
- CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_payload[13]) == 85);
- CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_payload[13]) == 1);
-
- /* Checks for struct kptl_rdma_msg_t */
- CLASSERT ((int)sizeof(kptl_rdma_msg_t) == 80);
- CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_hdr) == 0);
- CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_hdr) == 72);
- CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_matchbits) == 72);
- CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_matchbits) == 8);
-
- /* Checks for struct kptl_hello_msg_t */
- CLASSERT ((int)sizeof(kptl_hello_msg_t) == 12);
- CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_matchbits) == 0);
- CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_matchbits) == 8);
- CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_max_msg_size) == 8);
- CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_max_msg_size) == 4);
-}
-
-int
-ptllnd_parse_int_tunable(int *value, char *name, int dflt)
-{
- char *env = getenv(name);
- char *end;
-
- if (env == NULL) {
- *value = dflt;
- return 0;
- }
-
- *value = strtoull(env, &end, 0);
- if (*end == 0)
- return 0;
-
- CERROR("Can't parse tunable %s=%s\n", name, env);
- return -EINVAL;
-}
-
-int
-ptllnd_get_tunables(lnet_ni_t *ni)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- int max_msg_size;
- int msgs_per_buffer;
- int rc;
- int temp;
-
- /* Other tunable defaults depend on this */
- rc = ptllnd_parse_int_tunable(&plni->plni_debug, "PTLLND_DEBUG", 0);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_portal,
- "PTLLND_PORTAL", PTLLND_PORTAL);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&temp,
- "PTLLND_PID", PTLLND_PID);
- if (rc != 0)
- return rc;
- plni->plni_ptllnd_pid = (ptl_pid_t)temp;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_peer_credits,
- "PTLLND_PEERCREDITS", PTLLND_PEERCREDITS);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&max_msg_size,
- "PTLLND_MAX_MSG_SIZE",
- PTLLND_MAX_ULND_MSG_SIZE);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&msgs_per_buffer,
- "PTLLND_MSGS_PER_BUFFER", 64);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_msgs_spare,
- "PTLLND_MSGS_SPARE", 256);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_peer_hash_size,
- "PTLLND_PEER_HASH_SIZE", 101);
- if (rc != 0)
- return rc;
-
-
- rc = ptllnd_parse_int_tunable(&plni->plni_eq_size,
- "PTLLND_EQ_SIZE", 1024);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_checksum,
- "PTLLND_CHECKSUM", 0);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history,
- "PTLLND_TX_HISTORY",
- plni->plni_debug ? 1024 : 0);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_protocol_mismatch,
- "PTLLND_ABORT_ON_PROTOCOL_MISMATCH", 1);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak,
- "PTLLND_ABORT_ON_NAK", 0);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak,
- "PTLLND_DUMP_ON_NAK", plni->plni_debug);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_watchdog_interval,
- "PTLLND_WATCHDOG_INTERVAL", 1);
- if (rc != 0)
- return rc;
- if (plni->plni_watchdog_interval <= 0)
- plni->plni_watchdog_interval = 1;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_timeout,
- "PTLLND_TIMEOUT", 50);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_long_wait,
- "PTLLND_LONG_WAIT",
- plni->plni_debug ? 5 : plni->plni_timeout);
- if (rc != 0)
- return rc;
- plni->plni_long_wait *= 1000; /* convert to mS */
-
- plni->plni_max_msg_size = max_msg_size & ~7;
- if (plni->plni_max_msg_size < PTLLND_MIN_BUFFER_SIZE)
- plni->plni_max_msg_size = PTLLND_MIN_BUFFER_SIZE;
- CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0);
- CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE);
-
- plni->plni_buffer_size = plni->plni_max_msg_size * msgs_per_buffer;
-
- CDEBUG(D_NET, "portal = %d\n",plni->plni_portal);
- CDEBUG(D_NET, "ptllnd_pid = %d\n",plni->plni_ptllnd_pid);
- CDEBUG(D_NET, "max_msg_size = %d\n",max_msg_size);
- CDEBUG(D_NET, "msgs_per_buffer = %d\n",msgs_per_buffer);
- CDEBUG(D_NET, "msgs_spare = %d\n",plni->plni_msgs_spare);
- CDEBUG(D_NET, "peer_hash_size = %d\n",plni->plni_peer_hash_size);
- CDEBUG(D_NET, "eq_size = %d\n",plni->plni_eq_size);
- CDEBUG(D_NET, "max_msg_size = %d\n",plni->plni_max_msg_size);
- CDEBUG(D_NET, "buffer_size = %d\n",plni->plni_buffer_size);
-
- return 0;
-}
-
-ptllnd_buffer_t *
-ptllnd_create_buffer (lnet_ni_t *ni)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_buffer_t *buf;
-
- LIBCFS_ALLOC(buf, sizeof(*buf));
- if (buf == NULL) {
- CERROR("Can't allocate buffer descriptor\n");
- return NULL;
- }
-
- buf->plb_ni = ni;
- buf->plb_posted = 0;
- CFS_INIT_LIST_HEAD(&buf->plb_list);
-
- LIBCFS_ALLOC(buf->plb_buffer, plni->plni_buffer_size);
- if (buf->plb_buffer == NULL) {
- CERROR("Can't allocate buffer size %d\n",
- plni->plni_buffer_size);
- LIBCFS_FREE(buf, sizeof(*buf));
- return NULL;
- }
-
- list_add(&buf->plb_list, &plni->plni_buffers);
- plni->plni_nbuffers++;
-
- return buf;
-}
-
-void
-ptllnd_destroy_buffer (ptllnd_buffer_t *buf)
-{
- ptllnd_ni_t *plni = buf->plb_ni->ni_data;
-
- LASSERT (!buf->plb_posted);
-
- plni->plni_nbuffers--;
- list_del(&buf->plb_list);
- LIBCFS_FREE(buf->plb_buffer, plni->plni_buffer_size);
- LIBCFS_FREE(buf, sizeof(*buf));
-}
-
-int
-ptllnd_size_buffers (lnet_ni_t *ni, int delta)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_buffer_t *buf;
- int nmsgs;
- int nbufs;
- int rc;
-
- CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers);
- CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers);
-
- plni->plni_nmsgs += delta;
- LASSERT(plni->plni_nmsgs >= 0);
-
- nmsgs = plni->plni_nmsgs + plni->plni_msgs_spare;
-
- nbufs = (nmsgs * plni->plni_max_msg_size + plni->plni_buffer_size - 1) /
- plni->plni_buffer_size;
-
- while (nbufs > plni->plni_nbuffers) {
- buf = ptllnd_create_buffer(ni);
-
- if (buf == NULL)
- return -ENOMEM;
-
- rc = ptllnd_post_buffer(buf);
- if (rc != 0) {
- /* TODO - this path seems to orpahn the buffer
- * in a state where its not posted and will never be
- * However it does not leak the buffer as it's
- * already been put onto the global buffer list
- * and will be cleaned up
- */
- return rc;
- }
- }
-
- CDEBUG(D_NET, "nposted_buffers = %d (after)\n",plni->plni_nposted_buffers);
- CDEBUG(D_NET, "nbuffers = %d (after)\n",plni->plni_nbuffers);
- return 0;
-}
-
-void
-ptllnd_destroy_buffers (lnet_ni_t *ni)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_buffer_t *buf;
- struct list_head *tmp;
- struct list_head *nxt;
-
- CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers);
- CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers);
-
- list_for_each_safe(tmp, nxt, &plni->plni_buffers) {
- buf = list_entry(tmp, ptllnd_buffer_t, plb_list);
-
- //CDEBUG(D_NET, "buf=%p posted=%d\n",buf,buf->plb_posted);
-
- LASSERT (plni->plni_nbuffers > 0);
- if (buf->plb_posted) {
- time_t start = cfs_time_current_sec();
- int w = plni->plni_long_wait;
-
- LASSERT (plni->plni_nposted_buffers > 0);
-
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
- (void) PtlMDUnlink(buf->plb_md);
-
- while (buf->plb_posted) {
- if (w > 0 && cfs_time_current_sec() > start + w/1000) {
- CWARN("Waited %ds to unlink buffer\n",
- (int)(cfs_time_current_sec() - start));
- w *= 2;
- }
- ptllnd_wait(ni, w);
- }
-#else
- while (buf->plb_posted) {
- rc = PtlMDUnlink(buf->plb_md);
- if (rc == PTL_OK) {
- buf->plb_posted = 0;
- plni->plni_nposted_buffers--;
- break;
- }
- LASSERT (rc == PTL_MD_IN_USE);
- if (w > 0 && cfs_time_current_sec() > start + w/1000) {
- CWARN("Waited %ds to unlink buffer\n",
- cfs_time_current_sec() - start);
- w *= 2;
- }
- ptllnd_wait(ni, w);
- }
-#endif
- }
- ptllnd_destroy_buffer(buf);
- }
-
- CDEBUG(D_NET, "nposted_buffers = %d (after)\n",plni->plni_nposted_buffers);
- CDEBUG(D_NET, "nbuffers = %d (after)\n",plni->plni_nbuffers);
-
- LASSERT (plni->plni_nposted_buffers == 0);
- LASSERT (plni->plni_nbuffers == 0);
-}
-
-int
-ptllnd_create_peer_hash (lnet_ni_t *ni)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- int i;
-
- plni->plni_npeers = 0;
-
- LIBCFS_ALLOC(plni->plni_peer_hash,
- plni->plni_peer_hash_size * sizeof(*plni->plni_peer_hash));
- if (plni->plni_peer_hash == NULL) {
- CERROR("Can't allocate ptllnd peer hash (size %d)\n",
- plni->plni_peer_hash_size);
- return -ENOMEM;
- }
-
- for (i = 0; i < plni->plni_peer_hash_size; i++)
- CFS_INIT_LIST_HEAD(&plni->plni_peer_hash[i]);
-
- return 0;
-}
-
-void
-ptllnd_destroy_peer_hash (lnet_ni_t *ni)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- int i;
-
- LASSERT( plni->plni_npeers == 0);
-
- for (i = 0; i < plni->plni_peer_hash_size; i++)
- LASSERT (list_empty(&plni->plni_peer_hash[i]));
-
- LIBCFS_FREE(plni->plni_peer_hash,
- plni->plni_peer_hash_size * sizeof(*plni->plni_peer_hash));
-}
-
-void
-ptllnd_close_peers (lnet_ni_t *ni)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_peer_t *plp;
- int i;
-
- for (i = 0; i < plni->plni_peer_hash_size; i++)
- while (!list_empty(&plni->plni_peer_hash[i])) {
- plp = list_entry(plni->plni_peer_hash[i].next,
- ptllnd_peer_t, plp_list);
-
- ptllnd_close_peer(plp, 0);
- }
-}
-
-int
-ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- switch (cmd) {
- case IOC_LIBCFS_DEBUG_PEER:
- ptllnd_dump_debug(ni, *((lnet_process_id_t *)arg));
- return 0;
-
- default:
- return -EINVAL;
- }
-}
-
-__u64
-ptllnd_get_timestamp(void)
-{
- struct timeval tv;
- int rc = gettimeofday(&tv, NULL);
-
- LASSERT (rc == 0);
- return ((__u64)tv.tv_sec) * 1000000 + tv.tv_usec;
-}
-
-void
-ptllnd_shutdown (lnet_ni_t *ni)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- int rc;
- time_t start = cfs_time_current_sec();
- int w = plni->plni_long_wait;
-
- LASSERT (ptllnd_ni_count == 1);
- plni->plni_max_tx_history = 0;
-
- ptllnd_cull_tx_history(plni);
-
- ptllnd_close_peers(ni);
- ptllnd_destroy_buffers(ni);
-
- while (plni->plni_npeers > 0) {
- if (w > 0 && cfs_time_current_sec() > start + w/1000) {
- CWARN("Waited %ds for peers to shutdown\n",
- (int)(cfs_time_current_sec() - start));
- w *= 2;
- }
- ptllnd_wait(ni, w);
- }
-
- LASSERT (plni->plni_ntxs == 0);
- LASSERT (plni->plni_nrxs == 0);
-
- rc = PtlEQFree(plni->plni_eqh);
- LASSERT (rc == PTL_OK);
-
- rc = PtlNIFini(plni->plni_nih);
- LASSERT (rc == PTL_OK);
-
- ptllnd_destroy_peer_hash(ni);
- LIBCFS_FREE(plni, sizeof(*plni));
- ptllnd_ni_count--;
-}
-
-int
-ptllnd_startup (lnet_ni_t *ni)
-{
- ptllnd_ni_t *plni;
- int rc;
-
- /* could get limits from portals I guess... */
- ni->ni_maxtxcredits =
- ni->ni_peertxcredits = 1000;
-
- if (ptllnd_ni_count != 0) {
- CERROR("Can't have > 1 instance of ptllnd\n");
- return -EPERM;
- }
-
- ptllnd_ni_count++;
-
- rc = ptllnd_history_init();
- if (rc != 0) {
- CERROR("Can't init history\n");
- goto failed0;
- }
-
- LIBCFS_ALLOC(plni, sizeof(*plni));
- if (plni == NULL) {
- CERROR("Can't allocate ptllnd state\n");
- rc = -ENOMEM;
- goto failed0;
- }
-
- ni->ni_data = plni;
-
- plni->plni_stamp = ptllnd_get_timestamp();
- plni->plni_nrxs = 0;
- plni->plni_ntxs = 0;
- plni->plni_ntx_history = 0;
- plni->plni_watchdog_peeridx = 0;
- plni->plni_watchdog_nextt = cfs_time_current_sec();
- CFS_INIT_LIST_HEAD(&plni->plni_zombie_txs);
- CFS_INIT_LIST_HEAD(&plni->plni_tx_history);
-
- /*
- * Initilize buffer related data structures
- */
- CFS_INIT_LIST_HEAD(&plni->plni_buffers);
- plni->plni_nbuffers = 0;
- plni->plni_nposted_buffers = 0;
-
- rc = ptllnd_get_tunables(ni);
- if (rc != 0)
- goto failed1;
-
- rc = ptllnd_create_peer_hash(ni);
- if (rc != 0)
- goto failed1;
-
- /* NB I most probably won't get the PID I requested here. It doesn't
- * matter because I don't need a fixed PID (only connection acceptors
- * need a "well known" PID). */
-
- rc = PtlNIInit(PTL_IFACE_DEFAULT, plni->plni_ptllnd_pid,
- NULL, NULL, &plni->plni_nih);
- if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
- CERROR("PtlNIInit failed: %s(%d)\n",
- ptllnd_errtype2str(rc), rc);
- rc = -ENODEV;
- goto failed2;
- }
-
- rc = PtlEQAlloc(plni->plni_nih, plni->plni_eq_size,
- PTL_EQ_HANDLER_NONE, &plni->plni_eqh);
- if (rc != PTL_OK) {
- CERROR("PtlEQAlloc failed: %s(%d)\n",
- ptllnd_errtype2str(rc), rc);
- rc = -ENODEV;
- goto failed3;
- }
-
- /*
- * Fetch the Portals NID
- */
- rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id);
- if (rc != PTL_OK) {
- CERROR ("PtlGetID failed : %s(%d)\n",
- ptllnd_errtype2str(rc), rc);
- rc = -EINVAL;
- goto failed4;
- }
-
- /*
- * Create the new NID. Based on the LND network type
- * and the lower ni's address data.
- */
- ni->ni_nid = ptllnd_ptl2lnetnid(ni, plni->plni_portals_id.nid);
-
- CDEBUG(D_NET, "ptl id =%s\n", ptllnd_ptlid2str(plni->plni_portals_id));
- CDEBUG(D_NET, "lnet id =%s (passed back)\n",
- libcfs_id2str((lnet_process_id_t) {
- .nid = ni->ni_nid, .pid = the_lnet.ln_pid}));
-
- rc = ptllnd_size_buffers(ni, 0);
- if (rc != 0)
- goto failed4;
-
- return 0;
-
- failed4:
- ptllnd_destroy_buffers(ni);
- PtlEQFree(plni->plni_eqh);
- failed3:
- PtlNIFini(plni->plni_nih);
- failed2:
- ptllnd_destroy_peer_hash(ni);
- failed1:
- LIBCFS_FREE(plni, sizeof(*plni));
- failed0:
- ptllnd_history_fini();
- ptllnd_ni_count--;
- CDEBUG(D_NET, "<<< rc=%d\n",rc);
- return rc;
-}
-
-const char *ptllnd_evtype2str(int type)
-{
-#define DO_TYPE(x) case x: return #x;
- switch(type)
- {
- DO_TYPE(PTL_EVENT_GET_START);
- DO_TYPE(PTL_EVENT_GET_END);
- DO_TYPE(PTL_EVENT_PUT_START);
- DO_TYPE(PTL_EVENT_PUT_END);
- DO_TYPE(PTL_EVENT_REPLY_START);
- DO_TYPE(PTL_EVENT_REPLY_END);
- DO_TYPE(PTL_EVENT_ACK);
- DO_TYPE(PTL_EVENT_SEND_START);
- DO_TYPE(PTL_EVENT_SEND_END);
- DO_TYPE(PTL_EVENT_UNLINK);
- default:
- return "<unknown event type>";
- }
-#undef DO_TYPE
-}
-
-const char *ptllnd_msgtype2str(int type)
-{
-#define DO_TYPE(x) case x: return #x;
- switch(type)
- {
- DO_TYPE(PTLLND_MSG_TYPE_INVALID);
- DO_TYPE(PTLLND_MSG_TYPE_PUT);
- DO_TYPE(PTLLND_MSG_TYPE_GET);
- DO_TYPE(PTLLND_MSG_TYPE_IMMEDIATE);
- DO_TYPE(PTLLND_MSG_TYPE_HELLO);
- DO_TYPE(PTLLND_MSG_TYPE_NOOP);
- DO_TYPE(PTLLND_MSG_TYPE_NAK);
- default:
- return "<unknown msg type>";
- }
-#undef DO_TYPE
-}
-
-const char *ptllnd_errtype2str(int type)
-{
-#define DO_TYPE(x) case x: return #x;
- switch(type)
- {
- DO_TYPE(PTL_OK);
- DO_TYPE(PTL_SEGV);
- DO_TYPE(PTL_NO_SPACE);
- DO_TYPE(PTL_ME_IN_USE);
- DO_TYPE(PTL_NAL_FAILED);
- DO_TYPE(PTL_NO_INIT);
- DO_TYPE(PTL_IFACE_DUP);
- DO_TYPE(PTL_IFACE_INVALID);
- DO_TYPE(PTL_HANDLE_INVALID);
- DO_TYPE(PTL_MD_INVALID);
- DO_TYPE(PTL_ME_INVALID);
- DO_TYPE(PTL_PROCESS_INVALID);
- DO_TYPE(PTL_PT_INDEX_INVALID);
- DO_TYPE(PTL_SR_INDEX_INVALID);
- DO_TYPE(PTL_EQ_INVALID);
- DO_TYPE(PTL_EQ_DROPPED);
- DO_TYPE(PTL_EQ_EMPTY);
- DO_TYPE(PTL_MD_NO_UPDATE);
- DO_TYPE(PTL_FAIL);
- DO_TYPE(PTL_AC_INDEX_INVALID);
- DO_TYPE(PTL_MD_ILLEGAL);
- DO_TYPE(PTL_ME_LIST_TOO_LONG);
- DO_TYPE(PTL_MD_IN_USE);
- DO_TYPE(PTL_NI_INVALID);
- DO_TYPE(PTL_PID_INVALID);
- DO_TYPE(PTL_PT_FULL);
- DO_TYPE(PTL_VAL_FAILED);
- DO_TYPE(PTL_NOT_IMPLEMENTED);
- DO_TYPE(PTL_NO_ACK);
- DO_TYPE(PTL_EQ_IN_USE);
- DO_TYPE(PTL_PID_IN_USE);
- DO_TYPE(PTL_INV_EQ_SIZE);
- DO_TYPE(PTL_AGAIN);
- default:
- return "<unknown error type>";
- }
-#undef DO_TYPE
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: Eric Barton <eeb@bartonsoftware.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <lnet/lib-lnet.h>
-#include <lnet/ptllnd_wire.h>
-
-#include <portals/p30.h>
-#include <lnet/ptllnd.h> /* Depends on portals/p30.h */
-#include <stdarg.h>
-
-/* Hack to record history
- * This should really be done by CDEBUG(D_NETTRACE... */
-
-typedef struct {
- struct list_head he_list;
- struct timeval he_time;
- const char *he_fn;
- const char *he_file;
- int he_seq;
- int he_line;
- char he_msg[80];
-} ptllnd_he_t;
-
-void ptllnd_dump_history();
-void ptllnd_history(const char *fn, const char *file, const int line,
- const char *fmt, ...);
-#define PTLLND_HISTORY(fmt, a...) \
- ptllnd_history(__FUNCTION__, __FILE__, __LINE__, fmt, ## a)
-
-
-#define PTLLND_MD_OPTIONS (PTL_MD_LUSTRE_COMPLETION_SEMANTICS |\
- PTL_MD_EVENT_START_DISABLE)
-typedef struct
-{
- int plni_portal;
- ptl_pid_t plni_ptllnd_pid; /* Portals PID of peers I may connect to */
- int plni_peer_credits;
- int plni_max_msg_size;
- int plni_buffer_size;
- int plni_msgs_spare;
- int plni_peer_hash_size;
- int plni_eq_size;
- int plni_checksum;
- int plni_max_tx_history;
- int plni_abort_on_protocol_mismatch;
- int plni_abort_on_nak;
- int plni_dump_on_nak;
- int plni_debug;
- int plni_long_wait;
- int plni_watchdog_interval;
- int plni_timeout;
-
- __u64 plni_stamp;
- struct list_head plni_active_txs;
- struct list_head plni_zombie_txs;
- int plni_ntxs;
- int plni_nrxs;
-
- ptl_handle_ni_t plni_nih;
- ptl_handle_eq_t plni_eqh;
- ptl_process_id_t plni_portals_id; /* Portals ID of interface */
-
- struct list_head *plni_peer_hash;
- int plni_npeers;
-
- int plni_watchdog_nextt;
- int plni_watchdog_peeridx;
-
- struct list_head plni_tx_history;
- int plni_ntx_history;
-
- struct list_head plni_buffers;
- int plni_nbuffers;
- int plni_nposted_buffers;
- int plni_nmsgs;
-} ptllnd_ni_t;
-
-#define PTLLND_CREDIT_HIGHWATER(plni) ((plni)->plni_peer_credits - 1)
-
-typedef struct
-{
- struct list_head plp_list;
- lnet_ni_t *plp_ni;
- lnet_process_id_t plp_id;
- ptl_process_id_t plp_ptlid;
- int plp_credits; /* # msg buffers reserved for me at peer */
-
- /* credits for msg buffers I've posted for this peer...
- * outstanding - free buffers I've still to inform my peer about
- * sent - free buffers I've told my peer about
- * lazy - additional buffers (over and above plni_peer_credits)
- * posted to prevent peer blocking on sending a non-RDMA
- * messages to me when LNET isn't eagerly responsive to
- * the network (i.e. liblustre doesn't have control).
- * extra_lazy - lazy credits not required any more. */
- int plp_outstanding_credits;
- int plp_sent_credits;
- int plp_lazy_credits;
- int plp_extra_lazy_credits;
-
- int plp_max_msg_size;
- int plp_refcount;
- int plp_recvd_hello:1;
- int plp_closing:1;
- __u64 plp_match;
- __u64 plp_stamp;
- struct list_head plp_txq;
- struct list_head plp_activeq;
-} ptllnd_peer_t;
-
-typedef struct
-{
- struct list_head plb_list;
- lnet_ni_t *plb_ni;
- int plb_posted;
- ptl_handle_md_t plb_md;
- char *plb_buffer;
-} ptllnd_buffer_t;
-
-typedef struct
-{
- ptllnd_peer_t *rx_peer;
- kptl_msg_t *rx_msg;
- int rx_nob;
-} ptllnd_rx_t;
-
-typedef struct
-{
- struct list_head tx_list;
- int tx_type;
- int tx_status;
- ptllnd_peer_t *tx_peer;
- lnet_msg_t *tx_lnetmsg;
- lnet_msg_t *tx_lnetreplymsg;
- unsigned int tx_niov;
- ptl_md_iovec_t *tx_iov;
- ptl_handle_md_t tx_bulkmdh;
- ptl_handle_md_t tx_reqmdh;
- struct timeval tx_bulk_posted;
- struct timeval tx_bulk_done;
- struct timeval tx_req_posted;
- struct timeval tx_req_done;
- int tx_completing; /* someone already completing */
- int tx_msgsize; /* # bytes in tx_msg */
- time_t tx_deadline; /* time to complete by */
- kptl_msg_t tx_msg; /* message to send */
-} ptllnd_tx_t;
-
-#define PTLLND_RDMA_WRITE 0x100 /* pseudo message type */
-#define PTLLND_RDMA_READ 0x101 /* (no msg actually sent) */
-
-/* Hack to extract object type from event's user_ptr relies on (and checks)
- * that structs are somewhat aligned. */
-#define PTLLND_EVENTARG_TYPE_TX 0x1
-#define PTLLND_EVENTARG_TYPE_BUF 0x2
-#define PTLLND_EVENTARG_TYPE_MASK 0x3
-
-static inline void *
-ptllnd_obj2eventarg (void *obj, int type)
-{
- unsigned long ptr = (unsigned long)obj;
-
- LASSERT ((ptr & PTLLND_EVENTARG_TYPE_MASK) == 0);
- LASSERT ((type & ~PTLLND_EVENTARG_TYPE_MASK) == 0);
-
- return (void *)(ptr | type);
-}
-
-static inline int
-ptllnd_eventarg2type (void *arg)
-{
- unsigned long ptr = (unsigned long)arg;
-
- return (ptr & PTLLND_EVENTARG_TYPE_MASK);
-}
-
-static inline void *
-ptllnd_eventarg2obj (void *arg)
-{
- unsigned long ptr = (unsigned long)arg;
-
- return (void *)(ptr & ~PTLLND_EVENTARG_TYPE_MASK);
-}
-
-int ptllnd_parse_int_tunable(int *value, char *name, int dflt);
-void ptllnd_cull_tx_history(ptllnd_ni_t *plni);
-int ptllnd_startup(lnet_ni_t *ni);
-void ptllnd_shutdown(lnet_ni_t *ni);
-int ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg);
-int ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-int ptllnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
- void **new_privatep);
-
-ptllnd_tx_t *ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob);
-void ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive);
-int ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int n);
-void ptllnd_wait(lnet_ni_t *ni, int milliseconds);
-void ptllnd_check_sends(ptllnd_peer_t *peer);
-void ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id);
-void ptllnd_destroy_peer(ptllnd_peer_t *peer);
-void ptllnd_close_peer(ptllnd_peer_t *peer, int error);
-int ptllnd_post_buffer(ptllnd_buffer_t *buf);
-int ptllnd_size_buffers (lnet_ni_t *ni, int delta);
-const char *ptllnd_evtype2str(int type);
-const char *ptllnd_msgtype2str(int type);
-const char *ptllnd_errtype2str(int type);
-char *ptllnd_ptlid2str(ptl_process_id_t id);
-void ptllnd_dump_debug(lnet_ni_t *ni, lnet_process_id_t id);
-
-
-static inline void
-ptllnd_peer_addref (ptllnd_peer_t *peer)
-{
- LASSERT (peer->plp_refcount > 0);
- peer->plp_refcount++;
-}
-
-static inline void
-ptllnd_peer_decref (ptllnd_peer_t *peer)
-{
- LASSERT (peer->plp_refcount > 0);
- peer->plp_refcount--;
- if (peer->plp_refcount == 0)
- ptllnd_destroy_peer(peer);
-}
-
-static inline lnet_nid_t
-ptllnd_ptl2lnetnid(lnet_ni_t *ni, ptl_nid_t portals_nid)
-{
- return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid);
-}
-
-static inline ptl_nid_t
-ptllnd_lnet2ptlnid(lnet_nid_t lnet_nid)
-{
- return LNET_NIDADDR(lnet_nid);
-}
-
-/*
- * A note about lprintf():
- * Normally printf() is redirected to stdout of the console
- * from which yod launched the catamount application. However
- * there is a lot of initilziation code that runs before this
- * redirection is hooked up, and printf() seems to go to the bit bucket
- *
- * To get any kind of debug output and init time lprintf() can
- * be used to output to the console from which bookqk was used to
- * boot the catamount node. This works for debugging some simple
- * cases.
- */
-
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: Eric Barton <eeb@bartonsoftware.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
- *
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
- *
- */
-
-#include "ptllnd.h"
-
-void
-ptllnd_set_tx_deadline(ptllnd_tx_t *tx)
-{
- ptllnd_peer_t *peer = tx->tx_peer;
- lnet_ni_t *ni = peer->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
-
- tx->tx_deadline = cfs_time_current_sec() + plni->plni_timeout;
-}
-
-void
-ptllnd_post_tx(ptllnd_tx_t *tx)
-{
- ptllnd_peer_t *peer = tx->tx_peer;
-
- ptllnd_set_tx_deadline(tx);
- list_add_tail(&tx->tx_list, &peer->plp_txq);
- ptllnd_check_sends(peer);
-}
-
-char *
-ptllnd_ptlid2str(ptl_process_id_t id)
-{
- static char strs[8][32];
- static int idx = 0;
-
- char *str = strs[idx++];
-
- if (idx >= sizeof(strs)/sizeof(strs[0]))
- idx = 0;
-
- snprintf(str, sizeof(strs[0]), FMT_PTLID, id.pid, id.nid);
- return str;
-}
-
-void
-ptllnd_destroy_peer(ptllnd_peer_t *peer)
-{
- lnet_ni_t *ni = peer->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
- int nmsg = peer->plp_lazy_credits +
- plni->plni_peer_credits;
-
- ptllnd_size_buffers(ni, -nmsg);
-
- LASSERT (peer->plp_closing);
- LASSERT (plni->plni_npeers > 0);
- LASSERT (list_empty(&peer->plp_txq));
- LASSERT (list_empty(&peer->plp_activeq));
- plni->plni_npeers--;
- LIBCFS_FREE(peer, sizeof(*peer));
-}
-
-void
-ptllnd_abort_txs(ptllnd_ni_t *plni, struct list_head *q)
-{
- while (!list_empty(q)) {
- ptllnd_tx_t *tx = list_entry(q->next, ptllnd_tx_t, tx_list);
-
- tx->tx_status = -ESHUTDOWN;
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &plni->plni_zombie_txs);
- }
-}
-
-void
-ptllnd_close_peer(ptllnd_peer_t *peer, int error)
-{
- lnet_ni_t *ni = peer->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
-
- if (peer->plp_closing)
- return;
-
- peer->plp_closing = 1;
-
- if (!list_empty(&peer->plp_txq) ||
- !list_empty(&peer->plp_activeq) ||
- error != 0) {
- CWARN("Closing %s\n", libcfs_id2str(peer->plp_id));
- if (plni->plni_debug)
- ptllnd_dump_debug(ni, peer->plp_id);
- }
-
- ptllnd_abort_txs(plni, &peer->plp_txq);
- ptllnd_abort_txs(plni, &peer->plp_activeq);
-
- list_del(&peer->plp_list);
- ptllnd_peer_decref(peer);
-}
-
-ptllnd_peer_t *
-ptllnd_find_peer(lnet_ni_t *ni, lnet_process_id_t id, int create)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- unsigned int hash = LNET_NIDADDR(id.nid) % plni->plni_peer_hash_size;
- struct list_head *tmp;
- ptllnd_peer_t *plp;
- ptllnd_tx_t *tx;
- int rc;
-
- LASSERT (LNET_NIDNET(id.nid) == LNET_NIDNET(ni->ni_nid));
-
- list_for_each(tmp, &plni->plni_peer_hash[hash]) {
- plp = list_entry(tmp, ptllnd_peer_t, plp_list);
-
- if (plp->plp_id.nid == id.nid &&
- plp->plp_id.pid == id.pid) {
- ptllnd_peer_addref(plp);
- return plp;
- }
- }
-
- if (!create)
- return NULL;
-
- /* New peer: check first for enough posted buffers */
- plni->plni_npeers++;
- rc = ptllnd_size_buffers(ni, plni->plni_peer_credits);
- if (rc != 0) {
- plni->plni_npeers--;
- return NULL;
- }
-
- LIBCFS_ALLOC(plp, sizeof(*plp));
- if (plp == NULL) {
- CERROR("Can't allocate new peer %s\n", libcfs_id2str(id));
- plni->plni_npeers--;
- ptllnd_size_buffers(ni, -plni->plni_peer_credits);
- return NULL;
- }
-
- plp->plp_ni = ni;
- plp->plp_id = id;
- plp->plp_ptlid.nid = LNET_NIDADDR(id.nid);
- plp->plp_ptlid.pid = plni->plni_ptllnd_pid;
- plp->plp_credits = 1; /* add more later when she gives me credits */
- plp->plp_max_msg_size = plni->plni_max_msg_size; /* until I hear from her */
- plp->plp_sent_credits = 1; /* Implicit credit for HELLO */
- plp->plp_outstanding_credits = plni->plni_peer_credits - 1;
- plp->plp_lazy_credits = 0;
- plp->plp_extra_lazy_credits = 0;
- plp->plp_match = 0;
- plp->plp_stamp = 0;
- plp->plp_recvd_hello = 0;
- plp->plp_closing = 0;
- plp->plp_refcount = 1;
- CFS_INIT_LIST_HEAD(&plp->plp_list);
- CFS_INIT_LIST_HEAD(&plp->plp_txq);
- CFS_INIT_LIST_HEAD(&plp->plp_activeq);
-
- ptllnd_peer_addref(plp);
- list_add_tail(&plp->plp_list, &plni->plni_peer_hash[hash]);
-
- tx = ptllnd_new_tx(plp, PTLLND_MSG_TYPE_HELLO, 0);
- if (tx == NULL) {
- CERROR("Can't send HELLO to %s\n", libcfs_id2str(id));
- ptllnd_close_peer(plp, -ENOMEM);
- ptllnd_peer_decref(plp);
- return NULL;
- }
-
- tx->tx_msg.ptlm_u.hello.kptlhm_matchbits = PTL_RESERVED_MATCHBITS;
- tx->tx_msg.ptlm_u.hello.kptlhm_max_msg_size = plni->plni_max_msg_size;
-
- PTLLND_HISTORY("%s[%d/%d+%d(%d)]: post hello %p", libcfs_id2str(id),
- tx->tx_peer->plp_credits,
- tx->tx_peer->plp_outstanding_credits,
- tx->tx_peer->plp_sent_credits,
- plni->plni_peer_credits +
- tx->tx_peer->plp_lazy_credits, tx);
- ptllnd_post_tx(tx);
-
- return plp;
-}
-
-int
-ptllnd_count_q(struct list_head *q)
-{
- struct list_head *e;
- int n = 0;
-
- list_for_each(e, q) {
- n++;
- }
-
- return n;
-}
-
-const char *
-ptllnd_tx_typestr(int type)
-{
- switch (type) {
- case PTLLND_RDMA_WRITE:
- return "rdma_write";
-
- case PTLLND_RDMA_READ:
- return "rdma_read";
-
- case PTLLND_MSG_TYPE_PUT:
- return "put_req";
-
- case PTLLND_MSG_TYPE_GET:
- return "get_req";
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- return "immediate";
-
- case PTLLND_MSG_TYPE_NOOP:
- return "noop";
-
- case PTLLND_MSG_TYPE_HELLO:
- return "hello";
-
- default:
- return "<unknown>";
- }
-}
-
-void
-ptllnd_debug_tx(ptllnd_tx_t *tx)
-{
- CDEBUG(D_WARNING, "%s %s b %ld.%06ld/%ld.%06ld"
- " r %ld.%06ld/%ld.%06ld status %d\n",
- ptllnd_tx_typestr(tx->tx_type),
- libcfs_id2str(tx->tx_peer->plp_id),
- tx->tx_bulk_posted.tv_sec, tx->tx_bulk_posted.tv_usec,
- tx->tx_bulk_done.tv_sec, tx->tx_bulk_done.tv_usec,
- tx->tx_req_posted.tv_sec, tx->tx_req_posted.tv_usec,
- tx->tx_req_done.tv_sec, tx->tx_req_done.tv_usec,
- tx->tx_status);
-}
-
-void
-ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id)
-{
- ptllnd_peer_t *plp = ptllnd_find_peer(ni, id, 0);
- struct list_head *tmp;
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_tx_t *tx;
-
- if (plp == NULL) {
- CDEBUG(D_WARNING, "No peer %s\n", libcfs_id2str(id));
- return;
- }
-
- CDEBUG(D_WARNING, "%s %s%s [%d] "LPU64".%06d m "LPU64" q %d/%d c %d/%d+%d(%d)\n",
- libcfs_id2str(id),
- plp->plp_recvd_hello ? "H" : "_",
- plp->plp_closing ? "C" : "_",
- plp->plp_refcount,
- plp->plp_stamp / 1000000, (int)(plp->plp_stamp % 1000000),
- plp->plp_match,
- ptllnd_count_q(&plp->plp_txq),
- ptllnd_count_q(&plp->plp_activeq),
- plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits,
- plni->plni_peer_credits + plp->plp_lazy_credits);
-
- CDEBUG(D_WARNING, "txq:\n");
- list_for_each (tmp, &plp->plp_txq) {
- tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
- ptllnd_debug_tx(tx);
- }
-
- CDEBUG(D_WARNING, "activeq:\n");
- list_for_each (tmp, &plp->plp_activeq) {
- tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
- ptllnd_debug_tx(tx);
- }
-
- CDEBUG(D_WARNING, "zombies:\n");
- list_for_each (tmp, &plni->plni_zombie_txs) {
- tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
- if (tx->tx_peer->plp_id.nid == id.nid &&
- tx->tx_peer->plp_id.pid == id.pid)
- ptllnd_debug_tx(tx);
- }
-
- CDEBUG(D_WARNING, "history:\n");
- list_for_each (tmp, &plni->plni_tx_history) {
- tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
- if (tx->tx_peer->plp_id.nid == id.nid &&
- tx->tx_peer->plp_id.pid == id.pid)
- ptllnd_debug_tx(tx);
- }
-
- ptllnd_peer_decref(plp);
-}
-
-void
-ptllnd_dump_debug(lnet_ni_t *ni, lnet_process_id_t id)
-{
- ptllnd_debug_peer(ni, id);
- ptllnd_dump_history();
-}
-
-void
-ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive)
-{
- lnet_process_id_t id;
- ptllnd_peer_t *peer;
- time_t start = cfs_time_current_sec();
- ptllnd_ni_t *plni = ni->ni_data;
- int w = plni->plni_long_wait;
-
- /* This is only actually used to connect to routers at startup! */
- LASSERT(alive);
-
- id.nid = nid;
- id.pid = LUSTRE_SRV_LNET_PID;
-
- peer = ptllnd_find_peer(ni, id, 1);
- if (peer == NULL)
- return;
-
- /* wait for the peer to reply */
- while (!peer->plp_recvd_hello) {
- if (w > 0 && cfs_time_current_sec() > start + w/1000) {
- CWARN("Waited %ds to connect to %s\n",
- (int)(cfs_time_current_sec() - start),
- libcfs_id2str(id));
- w *= 2;
- }
-
- ptllnd_wait(ni, w);
- }
-
- ptllnd_peer_decref(peer);
-}
-
-int
-ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int nasync)
-{
- ptllnd_peer_t *peer = ptllnd_find_peer(ni, id, nasync > 0);
- int rc;
-
- if (peer == NULL)
- return -ENOMEM;
-
- LASSERT (peer->plp_lazy_credits >= 0);
- LASSERT (peer->plp_extra_lazy_credits >= 0);
-
- /* If nasync < 0, we're being told we can reduce the total message
- * headroom. We can't do this right now because our peer might already
- * have credits for the extra buffers, so we just account the extra
- * headroom in case we need it later and only destroy buffers when the
- * peer closes.
- *
- * Note that the following condition handles this case, where it
- * actually increases the extra lazy credit counter. */
-
- if (nasync <= peer->plp_extra_lazy_credits) {
- peer->plp_extra_lazy_credits -= nasync;
- return 0;
- }
-
- LASSERT (nasync > 0);
-
- nasync -= peer->plp_extra_lazy_credits;
- peer->plp_extra_lazy_credits = 0;
-
- rc = ptllnd_size_buffers(ni, nasync);
- if (rc == 0) {
- peer->plp_lazy_credits += nasync;
- peer->plp_outstanding_credits += nasync;
- }
-
- return rc;
-}
-
-__u32
-ptllnd_cksum (void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return (sum == 0) ? 1 : sum;
-}
-
-ptllnd_tx_t *
-ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob)
-{
- lnet_ni_t *ni = peer->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_tx_t *tx;
- int msgsize;
-
- CDEBUG(D_NET, "peer=%p type=%d payload=%d\n", peer, type, payload_nob);
-
- switch (type) {
- default:
- LBUG();
-
- case PTLLND_RDMA_WRITE:
- case PTLLND_RDMA_READ:
- LASSERT (payload_nob == 0);
- msgsize = 0;
- break;
-
- case PTLLND_MSG_TYPE_PUT:
- case PTLLND_MSG_TYPE_GET:
- LASSERT (payload_nob == 0);
- msgsize = offsetof(kptl_msg_t, ptlm_u) +
- sizeof(kptl_rdma_msg_t);
- break;
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- msgsize = offsetof(kptl_msg_t,
- ptlm_u.immediate.kptlim_payload[payload_nob]);
- break;
-
- case PTLLND_MSG_TYPE_NOOP:
- LASSERT (payload_nob == 0);
- msgsize = offsetof(kptl_msg_t, ptlm_u);
- break;
-
- case PTLLND_MSG_TYPE_HELLO:
- LASSERT (payload_nob == 0);
- msgsize = offsetof(kptl_msg_t, ptlm_u) +
- sizeof(kptl_hello_msg_t);
- break;
- }
-
- msgsize = (msgsize + 7) & ~7;
- LASSERT (msgsize <= peer->plp_max_msg_size);
-
- LIBCFS_ALLOC(tx, offsetof(ptllnd_tx_t, tx_msg) + msgsize);
-
- if (tx == NULL) {
- CERROR("Can't allocate msg type %d for %s\n",
- type, libcfs_id2str(peer->plp_id));
- return NULL;
- }
-
- CFS_INIT_LIST_HEAD(&tx->tx_list);
- tx->tx_peer = peer;
- tx->tx_type = type;
- tx->tx_lnetmsg = tx->tx_lnetreplymsg = NULL;
- tx->tx_niov = 0;
- tx->tx_iov = NULL;
- tx->tx_reqmdh = PTL_INVALID_HANDLE;
- tx->tx_bulkmdh = PTL_INVALID_HANDLE;
- tx->tx_msgsize = msgsize;
- tx->tx_completing = 0;
- tx->tx_status = 0;
-
- memset(&tx->tx_bulk_posted, 0, sizeof(tx->tx_bulk_posted));
- memset(&tx->tx_bulk_done, 0, sizeof(tx->tx_bulk_done));
- memset(&tx->tx_req_posted, 0, sizeof(tx->tx_req_posted));
- memset(&tx->tx_req_done, 0, sizeof(tx->tx_req_done));
-
- if (msgsize != 0) {
- tx->tx_msg.ptlm_magic = PTLLND_MSG_MAGIC;
- tx->tx_msg.ptlm_version = PTLLND_MSG_VERSION;
- tx->tx_msg.ptlm_type = type;
- tx->tx_msg.ptlm_credits = 0;
- tx->tx_msg.ptlm_nob = msgsize;
- tx->tx_msg.ptlm_cksum = 0;
- tx->tx_msg.ptlm_srcnid = ni->ni_nid;
- tx->tx_msg.ptlm_srcstamp = plni->plni_stamp;
- tx->tx_msg.ptlm_dstnid = peer->plp_id.nid;
- tx->tx_msg.ptlm_dststamp = peer->plp_stamp;
- tx->tx_msg.ptlm_srcpid = the_lnet.ln_pid;
- tx->tx_msg.ptlm_dstpid = peer->plp_id.pid;
- }
-
- ptllnd_peer_addref(peer);
- plni->plni_ntxs++;
-
- CDEBUG(D_NET, "tx=%p\n",tx);
-
- return tx;
-}
-
-void
-ptllnd_abort_tx(ptllnd_tx_t *tx, ptl_handle_md_t *mdh)
-{
- ptllnd_peer_t *peer = tx->tx_peer;
- lnet_ni_t *ni = peer->plp_ni;
- int rc;
- time_t start = cfs_time_current_sec();
- ptllnd_ni_t *plni = ni->ni_data;
- int w = plni->plni_long_wait;
-
- while (!PtlHandleIsEqual(*mdh, PTL_INVALID_HANDLE)) {
- rc = PtlMDUnlink(*mdh);
-#ifndef LUSTRE_PORTALS_UNLINK_SEMANTICS
- if (rc == PTL_OK) /* unlink successful => no unlinked event */
- return;
- LASSERT (rc == PTL_MD_IN_USE);
-#endif
- if (w > 0 && cfs_time_current_sec() > start + w/1000) {
- CWARN("Waited %ds to abort tx to %s\n",
- (int)(cfs_time_current_sec() - start),
- libcfs_id2str(peer->plp_id));
- w *= 2;
- }
- /* Wait for ptllnd_tx_event() to invalidate */
- ptllnd_wait(ni, w);
- }
-}
-
-void
-ptllnd_cull_tx_history(ptllnd_ni_t *plni)
-{
- int max = plni->plni_max_tx_history;
-
- while (plni->plni_ntx_history > max) {
- ptllnd_tx_t *tx = list_entry(plni->plni_tx_history.next,
- ptllnd_tx_t, tx_list);
- list_del(&tx->tx_list);
-
- ptllnd_peer_decref(tx->tx_peer);
-
- LIBCFS_FREE(tx, offsetof(ptllnd_tx_t, tx_msg) + tx->tx_msgsize);
-
- LASSERT (plni->plni_ntxs > 0);
- plni->plni_ntxs--;
- plni->plni_ntx_history--;
- }
-}
-
-void
-ptllnd_tx_done(ptllnd_tx_t *tx)
-{
- ptllnd_peer_t *peer = tx->tx_peer;
- lnet_ni_t *ni = peer->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
-
- /* CAVEAT EMPTOR: If this tx is being aborted, I'll continue to get
- * events for this tx until it's unlinked. So I set tx_completing to
- * flag the tx is getting handled */
-
- if (tx->tx_completing)
- return;
-
- tx->tx_completing = 1;
-
- if (!list_empty(&tx->tx_list))
- list_del_init(&tx->tx_list);
-
- if (tx->tx_status != 0) {
- if (plni->plni_debug) {
- CERROR("Completing tx for %s with error %d\n",
- libcfs_id2str(peer->plp_id), tx->tx_status);
- ptllnd_debug_tx(tx);
- }
- ptllnd_close_peer(peer, tx->tx_status);
- }
-
- ptllnd_abort_tx(tx, &tx->tx_reqmdh);
- ptllnd_abort_tx(tx, &tx->tx_bulkmdh);
-
- if (tx->tx_niov > 0) {
- LIBCFS_FREE(tx->tx_iov, tx->tx_niov * sizeof(*tx->tx_iov));
- tx->tx_niov = 0;
- }
-
- if (tx->tx_lnetreplymsg != NULL) {
- LASSERT (tx->tx_type == PTLLND_MSG_TYPE_GET);
- LASSERT (tx->tx_lnetmsg != NULL);
- /* Simulate GET success always */
- lnet_finalize(ni, tx->tx_lnetmsg, 0);
- CDEBUG(D_NET, "lnet_finalize(tx_lnetreplymsg=%p)\n",tx->tx_lnetreplymsg);
- lnet_finalize(ni, tx->tx_lnetreplymsg, tx->tx_status);
- } else if (tx->tx_lnetmsg != NULL) {
- lnet_finalize(ni, tx->tx_lnetmsg, tx->tx_status);
- }
-
- plni->plni_ntx_history++;
- list_add_tail(&tx->tx_list, &plni->plni_tx_history);
-
- ptllnd_cull_tx_history(plni);
-}
-
-int
-ptllnd_set_txiov(ptllnd_tx_t *tx,
- unsigned int niov, struct iovec *iov,
- unsigned int offset, unsigned int len)
-{
- ptl_md_iovec_t *piov;
- int npiov;
-
- if (len == 0) {
- tx->tx_niov = 0;
- return 0;
- }
-
- /*
- * Remove iovec's at the beginning that
- * are skipped because of the offset.
- * Adjust the offset accordingly
- */
- for (;;) {
- LASSERT (niov > 0);
- if (offset < iov->iov_len)
- break;
- offset -= iov->iov_len;
- niov--;
- iov++;
- }
-
- for (;;) {
- int temp_offset = offset;
- int resid = len;
- LIBCFS_ALLOC(piov, niov * sizeof(*piov));
- if (piov == NULL)
- return -ENOMEM;
-
- for (npiov = 0;; npiov++) {
- LASSERT (npiov < niov);
- LASSERT (iov->iov_len >= temp_offset);
-
- piov[npiov].iov_base = iov[npiov].iov_base + temp_offset;
- piov[npiov].iov_len = iov[npiov].iov_len - temp_offset;
-
- if (piov[npiov].iov_len >= resid) {
- piov[npiov].iov_len = resid;
- npiov++;
- break;
- }
- resid -= piov[npiov].iov_len;
- temp_offset = 0;
- }
-
- if (npiov == niov) {
- tx->tx_niov = niov;
- tx->tx_iov = piov;
- return 0;
- }
-
- /* Dang! The piov I allocated was too big and it's a drag to
- * have to maintain separate 'allocated' and 'used' sizes, so
- * I'll just do it again; NB this doesn't happen normally... */
- LIBCFS_FREE(piov, niov * sizeof(*piov));
- niov = npiov;
- }
-}
-
-void
-ptllnd_set_md_buffer(ptl_md_t *md, ptllnd_tx_t *tx)
-{
- unsigned int niov = tx->tx_niov;
- ptl_md_iovec_t *iov = tx->tx_iov;
-
- LASSERT ((md->options & PTL_MD_IOVEC) == 0);
-
- if (niov == 0) {
- md->start = NULL;
- md->length = 0;
- } else if (niov == 1) {
- md->start = iov[0].iov_base;
- md->length = iov[0].iov_len;
- } else {
- md->start = iov;
- md->length = niov;
- md->options |= PTL_MD_IOVEC;
- }
-}
-
-int
-ptllnd_post_buffer(ptllnd_buffer_t *buf)
-{
- lnet_ni_t *ni = buf->plb_ni;
- ptllnd_ni_t *plni = ni->ni_data;
- ptl_process_id_t anyid = {
- .nid = PTL_NID_ANY,
- .pid = PTL_PID_ANY};
- ptl_md_t md = {
- .start = buf->plb_buffer,
- .length = plni->plni_buffer_size,
- .threshold = PTL_MD_THRESH_INF,
- .max_size = plni->plni_max_msg_size,
- .options = (PTLLND_MD_OPTIONS |
- PTL_MD_OP_PUT | PTL_MD_MAX_SIZE |
- PTL_MD_LOCAL_ALIGN8),
- .user_ptr = ptllnd_obj2eventarg(buf, PTLLND_EVENTARG_TYPE_BUF),
- .eq_handle = plni->plni_eqh};
- ptl_handle_me_t meh;
- int rc;
-
- LASSERT (!buf->plb_posted);
-
- rc = PtlMEAttach(plni->plni_nih, plni->plni_portal,
- anyid, LNET_MSG_MATCHBITS, 0,
- PTL_UNLINK, PTL_INS_AFTER, &meh);
- if (rc != PTL_OK) {
- CERROR("PtlMEAttach failed: %s(%d)\n",
- ptllnd_errtype2str(rc), rc);
- return -ENOMEM;
- }
-
- buf->plb_posted = 1;
- plni->plni_nposted_buffers++;
-
- rc = PtlMDAttach(meh, md, LNET_UNLINK, &buf->plb_md);
- if (rc == PTL_OK)
- return 0;
-
- CERROR("PtlMDAttach failed: %s(%d)\n",
- ptllnd_errtype2str(rc), rc);
-
- buf->plb_posted = 0;
- plni->plni_nposted_buffers--;
-
- rc = PtlMEUnlink(meh);
- LASSERT (rc == PTL_OK);
-
- return -ENOMEM;
-}
-
-void
-ptllnd_check_sends(ptllnd_peer_t *peer)
-{
- lnet_ni_t *ni = peer->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_tx_t *tx;
- ptl_md_t md;
- ptl_handle_md_t mdh;
- int rc;
-
- CDEBUG(D_NET, "%s: [%d/%d+%d(%d)\n",
- libcfs_id2str(peer->plp_id), peer->plp_credits,
- peer->plp_outstanding_credits, peer->plp_sent_credits,
- plni->plni_peer_credits + peer->plp_lazy_credits);
-
- if (list_empty(&peer->plp_txq) &&
- peer->plp_outstanding_credits >= PTLLND_CREDIT_HIGHWATER(plni) &&
- peer->plp_credits != 0) {
-
- tx = ptllnd_new_tx(peer, PTLLND_MSG_TYPE_NOOP, 0);
- CDEBUG(D_NET, "NOOP tx=%p\n",tx);
- if (tx == NULL) {
- CERROR("Can't return credits to %s\n",
- libcfs_id2str(peer->plp_id));
- } else {
- ptllnd_set_tx_deadline(tx);
- list_add_tail(&tx->tx_list, &peer->plp_txq);
- }
- }
-
- while (!list_empty(&peer->plp_txq)) {
- tx = list_entry(peer->plp_txq.next, ptllnd_tx_t, tx_list);
-
- LASSERT (tx->tx_msgsize > 0);
-
- LASSERT (peer->plp_outstanding_credits >= 0);
- LASSERT (peer->plp_sent_credits >= 0);
- LASSERT (peer->plp_outstanding_credits + peer->plp_sent_credits
- <= plni->plni_peer_credits + peer->plp_lazy_credits);
- LASSERT (peer->plp_credits >= 0);
-
- if (peer->plp_credits == 0) { /* no credits */
- PTLLND_HISTORY("%s[%d/%d+%d(%d)]: no creds for %p",
- libcfs_id2str(peer->plp_id),
- peer->plp_credits,
- peer->plp_outstanding_credits,
- peer->plp_sent_credits,
- plni->plni_peer_credits +
- peer->plp_lazy_credits, tx);
- break;
- }
-
- if (peer->plp_credits == 1 && /* last credit reserved for */
- peer->plp_outstanding_credits == 0) { /* returning credits */
- PTLLND_HISTORY("%s[%d/%d+%d(%d)]: too few creds for %p",
- libcfs_id2str(peer->plp_id),
- peer->plp_credits,
- peer->plp_outstanding_credits,
- peer->plp_sent_credits,
- plni->plni_peer_credits +
- peer->plp_lazy_credits, tx);
- break;
- }
-
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &peer->plp_activeq);
-
- CDEBUG(D_NET, "Sending at TX=%p type=%s (%d)\n",tx,
- ptllnd_msgtype2str(tx->tx_type),tx->tx_type);
-
- if (tx->tx_type == PTLLND_MSG_TYPE_NOOP &&
- (!list_empty(&peer->plp_txq) ||
- peer->plp_outstanding_credits <
- PTLLND_CREDIT_HIGHWATER(plni))) {
- /* redundant NOOP */
- ptllnd_tx_done(tx);
- continue;
- }
-
- /* Set stamp at the last minute; on a new peer, I don't know it
- * until I receive the HELLO back */
- tx->tx_msg.ptlm_dststamp = peer->plp_stamp;
-
- /*
- * Return all the credits we have
- */
- tx->tx_msg.ptlm_credits = peer->plp_outstanding_credits;
- peer->plp_sent_credits += peer->plp_outstanding_credits;
- peer->plp_outstanding_credits = 0;
-
- /*
- * One less credit
- */
- peer->plp_credits--;
-
- if (plni->plni_checksum)
- tx->tx_msg.ptlm_cksum =
- ptllnd_cksum(&tx->tx_msg,
- offsetof(kptl_msg_t, ptlm_u));
-
- md.user_ptr = ptllnd_obj2eventarg(tx, PTLLND_EVENTARG_TYPE_TX);
- md.eq_handle = plni->plni_eqh;
- md.threshold = 1;
- md.options = PTLLND_MD_OPTIONS;
- md.start = &tx->tx_msg;
- md.length = tx->tx_msgsize;
-
- rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh);
- if (rc != PTL_OK) {
- CERROR("PtlMDBind for %s failed: %s(%d)\n",
- libcfs_id2str(peer->plp_id),
- ptllnd_errtype2str(rc), rc);
- tx->tx_status = -EIO;
- ptllnd_tx_done(tx);
- break;
- }
-
- LASSERT (tx->tx_type != PTLLND_RDMA_WRITE &&
- tx->tx_type != PTLLND_RDMA_READ);
-
- tx->tx_reqmdh = mdh;
- gettimeofday(&tx->tx_req_posted, NULL);
-
- PTLLND_HISTORY("%s[%d/%d+%d(%d)]: %s %p c %d",
- libcfs_id2str(peer->plp_id),
- peer->plp_credits,
- peer->plp_outstanding_credits,
- peer->plp_sent_credits,
- plni->plni_peer_credits +
- peer->plp_lazy_credits,
- ptllnd_msgtype2str(tx->tx_type), tx,
- tx->tx_msg.ptlm_credits);
-
- rc = PtlPut(mdh, PTL_NOACK_REQ, peer->plp_ptlid,
- plni->plni_portal, 0, LNET_MSG_MATCHBITS, 0, 0);
- if (rc != PTL_OK) {
- CERROR("PtlPut for %s failed: %s(%d)\n",
- libcfs_id2str(peer->plp_id),
- ptllnd_errtype2str(rc), rc);
- tx->tx_status = -EIO;
- ptllnd_tx_done(tx);
- break;
- }
- }
-}
-
-int
-ptllnd_passive_rdma(ptllnd_peer_t *peer, int type, lnet_msg_t *msg,
- unsigned int niov, struct iovec *iov,
- unsigned int offset, unsigned int len)
-{
- lnet_ni_t *ni = peer->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_tx_t *tx = ptllnd_new_tx(peer, type, 0);
- __u64 matchbits;
- ptl_md_t md;
- ptl_handle_md_t mdh;
- ptl_handle_me_t meh;
- int rc;
- int rc2;
- time_t start;
- int w;
-
- CDEBUG(D_NET, "niov=%d offset=%d len=%d\n",niov,offset,len);
-
- LASSERT (type == PTLLND_MSG_TYPE_GET ||
- type == PTLLND_MSG_TYPE_PUT);
-
- if (tx == NULL) {
- CERROR("Can't allocate %s tx for %s\n",
- type == PTLLND_MSG_TYPE_GET ? "GET" : "PUT/REPLY",
- libcfs_id2str(peer->plp_id));
- return -ENOMEM;
- }
-
- rc = ptllnd_set_txiov(tx, niov, iov, offset, len);
- if (rc != 0) {
- CERROR ("Can't allocate iov %d for %s\n",
- niov, libcfs_id2str(peer->plp_id));
- rc = -ENOMEM;
- goto failed;
- }
-
- md.user_ptr = ptllnd_obj2eventarg(tx, PTLLND_EVENTARG_TYPE_TX);
- md.eq_handle = plni->plni_eqh;
- md.threshold = 1;
- md.max_size = 0;
- md.options = PTLLND_MD_OPTIONS;
- if(type == PTLLND_MSG_TYPE_GET)
- md.options |= PTL_MD_OP_PUT | PTL_MD_ACK_DISABLE;
- else
- md.options |= PTL_MD_OP_GET;
- ptllnd_set_md_buffer(&md, tx);
-
- start = cfs_time_current_sec();
- w = plni->plni_long_wait;
-
- while (!peer->plp_recvd_hello) { /* wait to validate plp_match */
- if (peer->plp_closing) {
- rc = -EIO;
- goto failed;
- }
- if (w > 0 && cfs_time_current_sec() > start + w/1000) {
- CWARN("Waited %ds to connect to %s\n",
- (int)(cfs_time_current_sec() - start),
- libcfs_id2str(peer->plp_id));
- w *= 2;
- }
- ptllnd_wait(ni, w);
- }
-
- if (peer->plp_match < PTL_RESERVED_MATCHBITS)
- peer->plp_match = PTL_RESERVED_MATCHBITS;
- matchbits = peer->plp_match++;
-
- rc = PtlMEAttach(plni->plni_nih, plni->plni_portal, peer->plp_ptlid,
- matchbits, 0, PTL_UNLINK, PTL_INS_BEFORE, &meh);
- if (rc != PTL_OK) {
- CERROR("PtlMEAttach for %s failed: %s(%d)\n",
- libcfs_id2str(peer->plp_id),
- ptllnd_errtype2str(rc), rc);
- rc = -EIO;
- goto failed;
- }
-
- gettimeofday(&tx->tx_bulk_posted, NULL);
-
- rc = PtlMDAttach(meh, md, LNET_UNLINK, &mdh);
- if (rc != PTL_OK) {
- CERROR("PtlMDAttach for %s failed: %s(%d)\n",
- libcfs_id2str(peer->plp_id),
- ptllnd_errtype2str(rc), rc);
- rc2 = PtlMEUnlink(meh);
- LASSERT (rc2 == PTL_OK);
- rc = -EIO;
- goto failed;
- }
- tx->tx_bulkmdh = mdh;
-
- /*
- * We need to set the stamp here because it
- * we could have received a HELLO above that set
- * peer->plp_stamp
- */
- tx->tx_msg.ptlm_dststamp = peer->plp_stamp;
-
- tx->tx_msg.ptlm_u.rdma.kptlrm_hdr = msg->msg_hdr;
- tx->tx_msg.ptlm_u.rdma.kptlrm_matchbits = matchbits;
-
- if (type == PTLLND_MSG_TYPE_GET) {
- tx->tx_lnetreplymsg = lnet_create_reply_msg(ni, msg);
- if (tx->tx_lnetreplymsg == NULL) {
- CERROR("Can't create reply for GET to %s\n",
- libcfs_id2str(msg->msg_target));
- rc = -ENOMEM;
- goto failed;
- }
- }
-
- tx->tx_lnetmsg = msg;
- PTLLND_HISTORY("%s[%d/%d+%d(%d)]: post passive %s p %d %p",
- libcfs_id2str(msg->msg_target),
- peer->plp_credits, peer->plp_outstanding_credits,
- peer->plp_sent_credits,
- plni->plni_peer_credits + peer->plp_lazy_credits,
- lnet_msgtyp2str(msg->msg_type),
- (le32_to_cpu(msg->msg_type) == LNET_MSG_PUT) ?
- le32_to_cpu(msg->msg_hdr.msg.put.ptl_index) :
- (le32_to_cpu(msg->msg_type) == LNET_MSG_GET) ?
- le32_to_cpu(msg->msg_hdr.msg.get.ptl_index) : -1,
- tx);
- ptllnd_post_tx(tx);
- return 0;
-
- failed:
- ptllnd_tx_done(tx);
- return rc;
-}
-
-int
-ptllnd_active_rdma(ptllnd_peer_t *peer, int type,
- lnet_msg_t *msg, __u64 matchbits,
- unsigned int niov, struct iovec *iov,
- unsigned int offset, unsigned int len)
-{
- lnet_ni_t *ni = peer->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_tx_t *tx = ptllnd_new_tx(peer, type, 0);
- ptl_md_t md;
- ptl_handle_md_t mdh;
- int rc;
-
- LASSERT (type == PTLLND_RDMA_READ ||
- type == PTLLND_RDMA_WRITE);
-
- if (tx == NULL) {
- CERROR("Can't allocate tx for RDMA %s with %s\n",
- (type == PTLLND_RDMA_WRITE) ? "write" : "read",
- libcfs_id2str(peer->plp_id));
- ptllnd_close_peer(peer, -ENOMEM);
- return -ENOMEM;
- }
-
- rc = ptllnd_set_txiov(tx, niov, iov, offset, len);
- if (rc != 0) {
- CERROR ("Can't allocate iov %d for %s\n",
- niov, libcfs_id2str(peer->plp_id));
- rc = -ENOMEM;
- goto failed;
- }
-
- md.user_ptr = ptllnd_obj2eventarg(tx, PTLLND_EVENTARG_TYPE_TX);
- md.eq_handle = plni->plni_eqh;
- md.max_size = 0;
- md.options = PTLLND_MD_OPTIONS;
- md.threshold = (type == PTLLND_RDMA_READ) ? 2 : 1;
-
- ptllnd_set_md_buffer(&md, tx);
-
- rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh);
- if (rc != PTL_OK) {
- CERROR("PtlMDBind for %s failed: %s(%d)\n",
- libcfs_id2str(peer->plp_id),
- ptllnd_errtype2str(rc), rc);
- rc = -EIO;
- goto failed;
- }
-
- tx->tx_bulkmdh = mdh;
- tx->tx_lnetmsg = msg;
-
- ptllnd_set_tx_deadline(tx);
- list_add_tail(&tx->tx_list, &peer->plp_activeq);
- gettimeofday(&tx->tx_bulk_posted, NULL);
-
- if (type == PTLLND_RDMA_READ)
- rc = PtlGet(mdh, peer->plp_ptlid,
- plni->plni_portal, 0, matchbits, 0);
- else
- rc = PtlPut(mdh, PTL_NOACK_REQ, peer->plp_ptlid,
- plni->plni_portal, 0, matchbits, 0,
- (msg == NULL) ? PTLLND_RDMA_FAIL : PTLLND_RDMA_OK);
-
- if (rc == PTL_OK)
- return 0;
-
- CERROR("Can't initiate RDMA with %s: %s(%d)\n",
- libcfs_id2str(peer->plp_id),
- ptllnd_errtype2str(rc), rc);
-
- tx->tx_lnetmsg = NULL;
- failed:
- tx->tx_status = rc;
- ptllnd_tx_done(tx); /* this will close peer */
- return rc;
-}
-
-int
-ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_peer_t *plp;
- ptllnd_tx_t *tx;
- int nob;
- int rc;
-
- LASSERT (!msg->msg_routing);
- LASSERT (msg->msg_kiov == NULL);
-
- LASSERT (msg->msg_niov <= PTL_MD_MAX_IOV); /* !!! */
-
- CDEBUG(D_NET, "%s [%d]+%d,%d -> %s%s\n",
- lnet_msgtyp2str(msg->msg_type),
- msg->msg_niov, msg->msg_offset, msg->msg_len,
- libcfs_nid2str(msg->msg_target.nid),
- msg->msg_target_is_router ? "(rtr)" : "");
-
- if ((msg->msg_target.pid & LNET_PID_USERFLAG) != 0) {
- CERROR("Can't send to non-kernel peer %s\n",
- libcfs_id2str(msg->msg_target));
- return -EHOSTUNREACH;
- }
-
- plp = ptllnd_find_peer(ni, msg->msg_target, 1);
- if (plp == NULL)
- return -ENOMEM;
-
- switch (msg->msg_type) {
- default:
- LBUG();
-
- case LNET_MSG_ACK:
- LASSERT (msg->msg_len == 0);
- break; /* send IMMEDIATE */
-
- case LNET_MSG_GET:
- if (msg->msg_target_is_router)
- break; /* send IMMEDIATE */
-
- nob = msg->msg_md->md_length;
- nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[nob]);
- if (nob <= plni->plni_max_msg_size)
- break;
-
- LASSERT ((msg->msg_md->md_options & LNET_MD_KIOV) == 0);
- rc = ptllnd_passive_rdma(plp, PTLLND_MSG_TYPE_GET, msg,
- msg->msg_md->md_niov,
- msg->msg_md->md_iov.iov,
- 0, msg->msg_md->md_length);
- ptllnd_peer_decref(plp);
- return rc;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- nob = msg->msg_len;
- nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[nob]);
- if (nob <= plp->plp_max_msg_size)
- break; /* send IMMEDIATE */
-
- rc = ptllnd_passive_rdma(plp, PTLLND_MSG_TYPE_PUT, msg,
- msg->msg_niov, msg->msg_iov,
- msg->msg_offset, msg->msg_len);
- ptllnd_peer_decref(plp);
- return rc;
- }
-
- /* send IMMEDIATE
- * NB copy the payload so we don't have to do a fragmented send */
-
- tx = ptllnd_new_tx(plp, PTLLND_MSG_TYPE_IMMEDIATE, msg->msg_len);
- if (tx == NULL) {
- CERROR("Can't allocate tx for lnet type %d to %s\n",
- msg->msg_type, libcfs_id2str(msg->msg_target));
- ptllnd_peer_decref(plp);
- return -ENOMEM;
- }
-
- lnet_copy_iov2flat(tx->tx_msgsize, &tx->tx_msg,
- offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload),
- msg->msg_niov, msg->msg_iov, msg->msg_offset,
- msg->msg_len);
- tx->tx_msg.ptlm_u.immediate.kptlim_hdr = msg->msg_hdr;
-
- tx->tx_lnetmsg = msg;
- PTLLND_HISTORY("%s[%d/%d+%d(%d)]: post immediate %s p %d %p",
- libcfs_id2str(msg->msg_target),
- plp->plp_credits, plp->plp_outstanding_credits,
- plp->plp_sent_credits,
- plni->plni_peer_credits + plp->plp_lazy_credits,
- lnet_msgtyp2str(msg->msg_type),
- (le32_to_cpu(msg->msg_type) == LNET_MSG_PUT) ?
- le32_to_cpu(msg->msg_hdr.msg.put.ptl_index) :
- (le32_to_cpu(msg->msg_type) == LNET_MSG_GET) ?
- le32_to_cpu(msg->msg_hdr.msg.get.ptl_index) : -1,
- tx);
- ptllnd_post_tx(tx);
- ptllnd_peer_decref(plp);
- return 0;
-}
-
-void
-ptllnd_rx_done(ptllnd_rx_t *rx)
-{
- ptllnd_peer_t *plp = rx->rx_peer;
- lnet_ni_t *ni = plp->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
-
- plp->plp_outstanding_credits++;
-
- PTLLND_HISTORY("%s[%d/%d+%d(%d)]: rx=%p done\n",
- libcfs_id2str(plp->plp_id),
- plp->plp_credits, plp->plp_outstanding_credits,
- plp->plp_sent_credits,
- plni->plni_peer_credits + plp->plp_lazy_credits, rx);
-
- ptllnd_check_sends(rx->rx_peer);
-
- LASSERT (plni->plni_nrxs > 0);
- plni->plni_nrxs--;
-}
-
-int
-ptllnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
- void **new_privatep)
-{
- /* Shouldn't get here; recvs only block for router buffers */
- LBUG();
- return 0;
-}
-
-int
-ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- ptllnd_rx_t *rx = private;
- int rc = 0;
- int nob;
-
- LASSERT (kiov == NULL);
- LASSERT (niov <= PTL_MD_MAX_IOV); /* !!! */
-
- switch (rx->rx_msg->ptlm_type) {
- default:
- LBUG();
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[mlen]);
- if (nob > rx->rx_nob) {
- CERROR("Immediate message from %s too big: %d(%d)\n",
- libcfs_id2str(rx->rx_peer->plp_id),
- nob, rx->rx_nob);
- rc = -EPROTO;
- break;
- }
- lnet_copy_flat2iov(niov, iov, offset,
- rx->rx_nob, rx->rx_msg,
- offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload),
- mlen);
- lnet_finalize(ni, msg, 0);
- break;
-
- case PTLLND_MSG_TYPE_PUT:
- rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_READ, msg,
- rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits,
- niov, iov, offset, mlen);
- break;
-
- case PTLLND_MSG_TYPE_GET:
- if (msg != NULL)
- rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_WRITE, msg,
- rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits,
- msg->msg_niov, msg->msg_iov,
- msg->msg_offset, msg->msg_len);
- else
- rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_WRITE, NULL,
- rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits,
- 0, NULL, 0, 0);
- break;
- }
-
- ptllnd_rx_done(rx);
- return rc;
-}
-
-void
-ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
- kptl_msg_t *msg, unsigned int nob)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- const int basenob = offsetof(kptl_msg_t, ptlm_u);
- lnet_process_id_t srcid;
- ptllnd_rx_t rx;
- int flip;
- __u16 msg_version;
- __u32 msg_cksum;
- ptllnd_peer_t *plp;
- int rc;
-
- if (nob < 6) {
- CERROR("Very short receive from %s\n",
- ptllnd_ptlid2str(initiator));
- return;
- }
-
- /* I can at least read MAGIC/VERSION */
-
- flip = msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC);
- if (!flip && msg->ptlm_magic != PTLLND_MSG_MAGIC) {
- CERROR("Bad protocol magic %08x from %s\n",
- msg->ptlm_magic, ptllnd_ptlid2str(initiator));
- return;
- }
-
- msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version;
-
- if (msg_version != PTLLND_MSG_VERSION) {
- CERROR("Bad protocol version %04x from %s: %04x expected\n",
- (__u32)msg_version, ptllnd_ptlid2str(initiator), PTLLND_MSG_VERSION);
-
- if (plni->plni_abort_on_protocol_mismatch)
- abort();
-
- return;
- }
-
- if (nob < basenob) {
- CERROR("Short receive from %s: got %d, wanted at least %d\n",
- ptllnd_ptlid2str(initiator), nob, basenob);
- return;
- }
-
- /* checksum must be computed with
- * 1) ptlm_cksum zero and
- * 2) BEFORE anything gets modified/flipped
- */
- msg_cksum = flip ? __swab32(msg->ptlm_cksum) : msg->ptlm_cksum;
- msg->ptlm_cksum = 0;
- if (msg_cksum != 0 &&
- msg_cksum != ptllnd_cksum(msg, offsetof(kptl_msg_t, ptlm_u))) {
- CERROR("Bad checksum from %s\n", ptllnd_ptlid2str(initiator));
- return;
- }
-
- msg->ptlm_version = msg_version;
- msg->ptlm_cksum = msg_cksum;
-
- if (flip) {
- /* NB stamps are opaque cookies */
- __swab32s(&msg->ptlm_nob);
- __swab64s(&msg->ptlm_srcnid);
- __swab64s(&msg->ptlm_dstnid);
- __swab32s(&msg->ptlm_srcpid);
- __swab32s(&msg->ptlm_dstpid);
- }
-
- srcid.nid = msg->ptlm_srcnid;
- srcid.pid = msg->ptlm_srcpid;
-
- if (LNET_NIDNET(msg->ptlm_srcnid) != LNET_NIDNET(ni->ni_nid)) {
- CERROR("Bad source id %s from %s\n",
- libcfs_id2str(srcid),
- ptllnd_ptlid2str(initiator));
- return;
- }
-
- if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) {
- CERROR("NAK from %s (%s)\n",
- libcfs_id2str(srcid),
- ptllnd_ptlid2str(initiator));
-
- if (plni->plni_dump_on_nak)
- ptllnd_dump_debug(ni, srcid);
-
- if (plni->plni_abort_on_nak)
- abort();
-
- return;
- }
-
- if (msg->ptlm_dstnid != ni->ni_nid ||
- msg->ptlm_dstpid != the_lnet.ln_pid) {
- CERROR("Bad dstid %s (%s expected) from %s\n",
- libcfs_id2str((lnet_process_id_t) {
- .nid = msg->ptlm_dstnid,
- .pid = msg->ptlm_dstpid}),
- libcfs_id2str((lnet_process_id_t) {
- .nid = ni->ni_nid,
- .pid = the_lnet.ln_pid}),
- libcfs_id2str(srcid));
- return;
- }
-
- if (msg->ptlm_dststamp != plni->plni_stamp) {
- CERROR("Bad dststamp "LPX64"("LPX64" expected) from %s\n",
- msg->ptlm_dststamp, plni->plni_stamp,
- libcfs_id2str(srcid));
- return;
- }
-
- PTLLND_HISTORY("RX %s: %s %d %p", libcfs_id2str(srcid),
- ptllnd_msgtype2str(msg->ptlm_type),
- msg->ptlm_credits, &rx);
-
- switch (msg->ptlm_type) {
- case PTLLND_MSG_TYPE_PUT:
- case PTLLND_MSG_TYPE_GET:
- if (nob < basenob + sizeof(kptl_rdma_msg_t)) {
- CERROR("Short rdma request from %s(%s)\n",
- libcfs_id2str(srcid),
- ptllnd_ptlid2str(initiator));
- return;
- }
- if (flip)
- __swab64s(&msg->ptlm_u.rdma.kptlrm_matchbits);
- break;
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- if (nob < offsetof(kptl_msg_t,
- ptlm_u.immediate.kptlim_payload)) {
- CERROR("Short immediate from %s(%s)\n",
- libcfs_id2str(srcid),
- ptllnd_ptlid2str(initiator));
- return;
- }
- break;
-
- case PTLLND_MSG_TYPE_HELLO:
- if (nob < basenob + sizeof(kptl_hello_msg_t)) {
- CERROR("Short hello from %s(%s)\n",
- libcfs_id2str(srcid),
- ptllnd_ptlid2str(initiator));
- return;
- }
- if(flip){
- __swab64s(&msg->ptlm_u.hello.kptlhm_matchbits);
- __swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size);
- }
- break;
-
- case PTLLND_MSG_TYPE_NOOP:
- break;
-
- default:
- CERROR("Bad message type %d from %s(%s)\n", msg->ptlm_type,
- libcfs_id2str(srcid),
- ptllnd_ptlid2str(initiator));
- return;
- }
-
- plp = ptllnd_find_peer(ni, srcid, 0);
- if (plp == NULL) {
- CERROR("Can't find peer %s\n", libcfs_id2str(srcid));
- return;
- }
-
- if (msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) {
- if (plp->plp_recvd_hello) {
- CERROR("Unexpected HELLO from %s\n",
- libcfs_id2str(srcid));
- ptllnd_peer_decref(plp);
- return;
- }
-
- plp->plp_max_msg_size = msg->ptlm_u.hello.kptlhm_max_msg_size;
- plp->plp_match = msg->ptlm_u.hello.kptlhm_matchbits;
- plp->plp_stamp = msg->ptlm_srcstamp;
- plp->plp_recvd_hello = 1;
-
- } else if (!plp->plp_recvd_hello) {
-
- CERROR("Bad message type %d (HELLO expected) from %s\n",
- msg->ptlm_type, libcfs_id2str(srcid));
- ptllnd_peer_decref(plp);
- return;
-
- } else if (msg->ptlm_srcstamp != plp->plp_stamp) {
-
- CERROR("Bad srcstamp "LPX64"("LPX64" expected) from %s\n",
- msg->ptlm_srcstamp, plp->plp_stamp,
- libcfs_id2str(srcid));
- ptllnd_peer_decref(plp);
- return;
- }
-
- /* Check peer only sends when I've sent her credits */
- if (plp->plp_sent_credits == 0) {
- CERROR("%s[%d/%d+%d(%d)]: unexpected message\n",
- libcfs_id2str(plp->plp_id),
- plp->plp_credits, plp->plp_outstanding_credits,
- plp->plp_sent_credits,
- plni->plni_peer_credits + plp->plp_lazy_credits);
- return;
- }
- plp->plp_sent_credits--;
-
- /* No check for credit overflow - the peer may post new buffers after
- * the startup handshake. */
- if (msg->ptlm_credits > 0) {
- plp->plp_credits += msg->ptlm_credits;
- ptllnd_check_sends(plp);
- }
-
- /* All OK so far; assume the message is good... */
-
- rx.rx_peer = plp;
- rx.rx_msg = msg;
- rx.rx_nob = nob;
- plni->plni_nrxs++;
-
- switch (msg->ptlm_type) {
- default: /* message types have been checked already */
- ptllnd_rx_done(&rx);
- break;
-
- case PTLLND_MSG_TYPE_PUT:
- case PTLLND_MSG_TYPE_GET:
- rc = lnet_parse(ni, &msg->ptlm_u.rdma.kptlrm_hdr,
- msg->ptlm_srcnid, &rx, 1);
- if (rc < 0)
- ptllnd_rx_done(&rx);
- break;
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- rc = lnet_parse(ni, &msg->ptlm_u.immediate.kptlim_hdr,
- msg->ptlm_srcnid, &rx, 0);
- if (rc < 0)
- ptllnd_rx_done(&rx);
- break;
- }
-
- ptllnd_peer_decref(plp);
-}
-
-void
-ptllnd_buf_event (lnet_ni_t *ni, ptl_event_t *event)
-{
- ptllnd_buffer_t *buf = ptllnd_eventarg2obj(event->md.user_ptr);
- ptllnd_ni_t *plni = ni->ni_data;
- char *msg = &buf->plb_buffer[event->offset];
- int repost;
- int unlinked = event->type == PTL_EVENT_UNLINK;
-
- LASSERT (buf->plb_ni == ni);
- LASSERT (event->type == PTL_EVENT_PUT_END ||
- event->type == PTL_EVENT_UNLINK);
-
- if (event->ni_fail_type != PTL_NI_OK) {
-
- CERROR("event type %s(%d), status %s(%d) from %s\n",
- ptllnd_evtype2str(event->type), event->type,
- ptllnd_errtype2str(event->ni_fail_type),
- event->ni_fail_type,
- ptllnd_ptlid2str(event->initiator));
-
- } else if (event->type == PTL_EVENT_PUT_END) {
-#if (PTL_MD_LOCAL_ALIGN8 == 0)
- /* Portals can't force message alignment - someone sending an
- * odd-length message could misalign subsequent messages */
- if ((event->mlength & 7) != 0) {
- CERROR("Message from %s has odd length %llu: "
- "probable version incompatibility\n",
- ptllnd_ptlid2str(event->initiator),
- event->mlength);
- LBUG();
- }
-#endif
- LASSERT ((event->offset & 7) == 0);
-
- ptllnd_parse_request(ni, event->initiator,
- (kptl_msg_t *)msg, event->mlength);
- }
-
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
- /* UNLINK event only on explicit unlink */
- repost = (event->unlinked && event->type != PTL_EVENT_UNLINK);
- if (event->unlinked)
- unlinked = 1;
-#else
- /* UNLINK event only on implicit unlink */
- repost = (event->type == PTL_EVENT_UNLINK);
-#endif
-
- if (unlinked) {
- LASSERT(buf->plb_posted);
- buf->plb_posted = 0;
- plni->plni_nposted_buffers--;
- }
-
- if (repost)
- (void) ptllnd_post_buffer(buf);
-}
-
-void
-ptllnd_tx_event (lnet_ni_t *ni, ptl_event_t *event)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_tx_t *tx = ptllnd_eventarg2obj(event->md.user_ptr);
- int error = (event->ni_fail_type != PTL_NI_OK);
- int isreq;
- int isbulk;
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
- int unlinked = event->unlinked;
-#else
- int unlinked = (event->type == PTL_EVENT_UNLINK);
-#endif
-
- if (error)
- CERROR("Error %s(%d) event %s(%d) unlinked %d, %s(%d) for %s\n",
- ptllnd_errtype2str(event->ni_fail_type),
- event->ni_fail_type,
- ptllnd_evtype2str(event->type), event->type,
- unlinked, ptllnd_msgtype2str(tx->tx_type), tx->tx_type,
- libcfs_id2str(tx->tx_peer->plp_id));
-
- LASSERT (!PtlHandleIsEqual(event->md_handle, PTL_INVALID_HANDLE));
-
- isreq = PtlHandleIsEqual(event->md_handle, tx->tx_reqmdh);
- if (isreq) {
- LASSERT (event->md.start == (void *)&tx->tx_msg);
- if (unlinked) {
- tx->tx_reqmdh = PTL_INVALID_HANDLE;
- gettimeofday(&tx->tx_req_done, NULL);
- }
- }
-
- isbulk = PtlHandleIsEqual(event->md_handle, tx->tx_bulkmdh);
- if ( isbulk && unlinked ) {
- tx->tx_bulkmdh = PTL_INVALID_HANDLE;
- gettimeofday(&tx->tx_bulk_done, NULL);
- }
-
- LASSERT (!isreq != !isbulk); /* always one and only 1 match */
-
- PTLLND_HISTORY("%s[%d/%d+%d(%d)]: TX done %p %s%s",
- libcfs_id2str(tx->tx_peer->plp_id),
- tx->tx_peer->plp_credits,
- tx->tx_peer->plp_outstanding_credits,
- tx->tx_peer->plp_sent_credits,
- plni->plni_peer_credits + tx->tx_peer->plp_lazy_credits,
- tx, isreq ? "REQ" : "BULK", unlinked ? "(unlinked)" : "");
-
- LASSERT (!isreq != !isbulk); /* always one and only 1 match */
- switch (tx->tx_type) {
- default:
- LBUG();
-
- case PTLLND_MSG_TYPE_NOOP:
- case PTLLND_MSG_TYPE_HELLO:
- case PTLLND_MSG_TYPE_IMMEDIATE:
- LASSERT (event->type == PTL_EVENT_UNLINK ||
- event->type == PTL_EVENT_SEND_END);
- LASSERT (isreq);
- break;
-
- case PTLLND_MSG_TYPE_GET:
- LASSERT (event->type == PTL_EVENT_UNLINK ||
- (isreq && event->type == PTL_EVENT_SEND_END) ||
- (isbulk && event->type == PTL_EVENT_PUT_END));
-
- if (isbulk && !error && event->type == PTL_EVENT_PUT_END) {
- /* Check GET matched */
- if (event->hdr_data == PTLLND_RDMA_OK) {
- lnet_set_reply_msg_len(ni,
- tx->tx_lnetreplymsg,
- event->mlength);
- } else {
- CERROR ("Unmatched GET with %s\n",
- libcfs_id2str(tx->tx_peer->plp_id));
- tx->tx_status = -EIO;
- }
- }
- break;
-
- case PTLLND_MSG_TYPE_PUT:
- LASSERT (event->type == PTL_EVENT_UNLINK ||
- (isreq && event->type == PTL_EVENT_SEND_END) ||
- (isbulk && event->type == PTL_EVENT_GET_END));
- break;
-
- case PTLLND_RDMA_READ:
- LASSERT (event->type == PTL_EVENT_UNLINK ||
- event->type == PTL_EVENT_SEND_END ||
- event->type == PTL_EVENT_REPLY_END);
- LASSERT (isbulk);
- break;
-
- case PTLLND_RDMA_WRITE:
- LASSERT (event->type == PTL_EVENT_UNLINK ||
- event->type == PTL_EVENT_SEND_END);
- LASSERT (isbulk);
- }
-
- /* Schedule ptllnd_tx_done() on error or last completion event */
- if (error ||
- (PtlHandleIsEqual(tx->tx_bulkmdh, PTL_INVALID_HANDLE) &&
- PtlHandleIsEqual(tx->tx_reqmdh, PTL_INVALID_HANDLE))) {
- if (error)
- tx->tx_status = -EIO;
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &plni->plni_zombie_txs);
- }
-}
-
-ptllnd_tx_t *
-ptllnd_find_timed_out_tx(ptllnd_peer_t *peer)
-{
- time_t now = cfs_time_current_sec();
- struct list_head *tmp;
-
- list_for_each(tmp, &peer->plp_txq) {
- ptllnd_tx_t *tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
- if (tx->tx_deadline < now)
- return tx;
- }
-
- list_for_each(tmp, &peer->plp_activeq) {
- ptllnd_tx_t *tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
- if (tx->tx_deadline < now)
- return tx;
- }
-
- return NULL;
-}
-
-void
-ptllnd_check_peer(ptllnd_peer_t *peer)
-{
- ptllnd_tx_t *tx = ptllnd_find_timed_out_tx(peer);
-
- if (tx == NULL)
- return;
-
- CERROR("%s: timed out\n", libcfs_id2str(peer->plp_id));
- ptllnd_close_peer(peer, -ETIMEDOUT);
-}
-
-void
-ptllnd_watchdog (lnet_ni_t *ni, time_t now)
-{
- ptllnd_ni_t *plni = ni->ni_data;
- const int n = 4;
- int p = plni->plni_watchdog_interval;
- int chunk = plni->plni_peer_hash_size;
- int interval = now - (plni->plni_watchdog_nextt - p);
- int i;
- struct list_head *hashlist;
- struct list_head *tmp;
- struct list_head *nxt;
-
- /* Time to check for RDMA timeouts on a few more peers:
- * I try to do checks every 'p' seconds on a proportion of the peer
- * table and I need to check every connection 'n' times within a
- * timeout interval, to ensure I detect a timeout on any connection
- * within (n+1)/n times the timeout interval. */
-
- LASSERT (now >= plni->plni_watchdog_nextt);
-
- if (plni->plni_timeout > n * interval) { /* Scan less than the whole table? */
- chunk = (chunk * n * interval) / plni->plni_timeout;
- if (chunk == 0)
- chunk = 1;
- }
-
- for (i = 0; i < chunk; i++) {
- hashlist = &plni->plni_peer_hash[plni->plni_watchdog_peeridx];
-
- list_for_each_safe(tmp, nxt, hashlist) {
- ptllnd_check_peer(list_entry(tmp, ptllnd_peer_t, plp_list));
- }
-
- plni->plni_watchdog_peeridx = (plni->plni_watchdog_peeridx + 1) %
- plni->plni_peer_hash_size;
- }
-
- plni->plni_watchdog_nextt = now + p;
-}
-
-void
-ptllnd_wait (lnet_ni_t *ni, int milliseconds)
-{
- static struct timeval prevt;
- static int prevt_count;
- static int call_count;
-
- struct timeval start;
- struct timeval then;
- struct timeval now;
- struct timeval deadline;
-
- ptllnd_ni_t *plni = ni->ni_data;
- ptllnd_tx_t *tx;
- ptl_event_t event;
- int which;
- int rc;
- int found = 0;
- int timeout = 0;
-
- /* Handle any currently queued events, returning immediately if any.
- * Otherwise block for the timeout and handle all events queued
- * then. */
-
- gettimeofday(&start, NULL);
- call_count++;
-
- if (milliseconds <= 0) {
- deadline = start;
- } else {
- deadline.tv_sec = start.tv_sec + milliseconds/1000;
- deadline.tv_usec = start.tv_usec + (milliseconds % 1000)*1000;
-
- if (deadline.tv_usec >= 1000000) {
- start.tv_usec -= 1000000;
- start.tv_sec++;
- }
- }
-
- for (;;) {
- gettimeofday(&then, NULL);
-
- rc = PtlEQPoll(&plni->plni_eqh, 1, timeout, &event, &which);
-
- gettimeofday(&now, NULL);
-
- if ((now.tv_sec*1000 + now.tv_usec/1000) -
- (then.tv_sec*1000 + then.tv_usec/1000) > timeout + 1000) {
- /* 1000 mS grace...........................^ */
- CERROR("SLOW PtlEQPoll(%d): %dmS elapsed\n", timeout,
- (int)(now.tv_sec*1000 + now.tv_usec/1000) -
- (int)(then.tv_sec*1000 + then.tv_usec/1000));
- }
-
- if (rc == PTL_EQ_EMPTY) {
- if (found) /* handled some events */
- break;
-
- if (now.tv_sec >= plni->plni_watchdog_nextt) { /* check timeouts? */
- ptllnd_watchdog(ni, now.tv_sec);
- LASSERT (now.tv_sec < plni->plni_watchdog_nextt);
- }
-
- if (now.tv_sec > deadline.tv_sec || /* timeout expired */
- (now.tv_sec == deadline.tv_sec &&
- now.tv_usec >= deadline.tv_usec))
- break;
-
- if (milliseconds < 0 ||
- plni->plni_watchdog_nextt <= deadline.tv_sec) {
- timeout = (plni->plni_watchdog_nextt - now.tv_sec)*1000;
- } else {
- timeout = (deadline.tv_sec - now.tv_sec)*1000 +
- (deadline.tv_usec - now.tv_usec)/1000;
- }
-
- continue;
- }
-
- LASSERT (rc == PTL_OK || rc == PTL_EQ_DROPPED);
-
- if (rc == PTL_EQ_DROPPED)
- CERROR("Event queue: size %d is too small\n",
- plni->plni_eq_size);
-
- timeout = 0;
- found = 1;
-
- switch (ptllnd_eventarg2type(event.md.user_ptr)) {
- default:
- LBUG();
-
- case PTLLND_EVENTARG_TYPE_TX:
- ptllnd_tx_event(ni, &event);
- break;
-
- case PTLLND_EVENTARG_TYPE_BUF:
- ptllnd_buf_event(ni, &event);
- break;
- }
- }
-
- while (!list_empty(&plni->plni_zombie_txs)) {
- tx = list_entry(plni->plni_zombie_txs.next,
- ptllnd_tx_t, tx_list);
- list_del_init(&tx->tx_list);
- ptllnd_tx_done(tx);
- }
-
- if (prevt.tv_sec == 0 ||
- prevt.tv_sec != now.tv_sec) {
- PTLLND_HISTORY("%d wait entered at %d.%06d - prev %d %d.%06d",
- call_count, (int)start.tv_sec, (int)start.tv_usec,
- prevt_count, (int)prevt.tv_sec, (int)prevt.tv_usec);
- prevt = now;
- }
-}
+++ /dev/null
-.deps
-Makefile
-Makefile.in
+++ /dev/null
-if LIBLUSTRE
-if BUILD_USOCKLND
-noinst_LIBRARIES = libsocklnd.a
-endif
-endif
-
-noinst_HEADERS = usocklnd.h
-libsocklnd_a_SOURCES = usocklnd.h usocklnd.c usocklnd_cb.c poll.c \
- handlers.c conn.c
-libsocklnd_a_CPPFLAGS = $(LLCPPFLAGS)
-libsocklnd_a_CFLAGS = $(LLCFLAGS)
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Maxim Patlasov <maxim@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- */
-
-#include "usocklnd.h"
-
-/* Return 1 if the conn is timed out, 0 else */
-int
-usocklnd_conn_timed_out(usock_conn_t *conn, cfs_time_t current_time)
-{
- if (conn->uc_tx_flag && /* sending is in progress */
- cfs_time_aftereq(current_time, conn->uc_tx_deadline))
- return 1;
-
- if (conn->uc_rx_flag && /* receiving is in progress */
- cfs_time_aftereq(current_time, conn->uc_rx_deadline))
- return 1;
-
- return 0;
-}
-
-void
-usocklnd_conn_kill(usock_conn_t *conn)
-{
- pthread_mutex_lock(&conn->uc_lock);
- if (conn->uc_state != UC_DEAD)
- usocklnd_conn_kill_locked(conn);
- pthread_mutex_unlock(&conn->uc_lock);
-}
-
-/* Mark the conn as DEAD and schedule its deletion */
-void
-usocklnd_conn_kill_locked(usock_conn_t *conn)
-{
- conn->uc_rx_flag = conn->uc_tx_flag = 0;
- conn->uc_state = UC_DEAD;
- usocklnd_add_killrequest(conn);
-}
-
-usock_conn_t *
-usocklnd_conn_allocate()
-{
- usock_conn_t *conn;
- usock_pollrequest_t *pr;
-
- LIBCFS_ALLOC (pr, sizeof(*pr));
- if (pr == NULL)
- return NULL;
-
- LIBCFS_ALLOC (conn, sizeof(*conn));
- if (conn == NULL) {
- LIBCFS_FREE (pr, sizeof(*pr));
- return NULL;
- }
- memset(conn, 0, sizeof(*conn));
- conn->uc_preq = pr;
-
- LIBCFS_ALLOC (conn->uc_rx_hello,
- offsetof(ksock_hello_msg_t,
- kshm_ips[LNET_MAX_INTERFACES]));
- if (conn->uc_rx_hello == NULL) {
- LIBCFS_FREE (pr, sizeof(*pr));
- LIBCFS_FREE (conn, sizeof(*conn));
- return NULL;
- }
-
- return conn;
-}
-
-void
-usocklnd_conn_free(usock_conn_t *conn)
-{
- usock_pollrequest_t *pr = conn->uc_preq;
-
- if (pr != NULL)
- LIBCFS_FREE (pr, sizeof(*pr));
-
- if (conn->uc_rx_hello != NULL)
- LIBCFS_FREE (conn->uc_rx_hello,
- offsetof(ksock_hello_msg_t,
- kshm_ips[LNET_MAX_INTERFACES]));
-
- LIBCFS_FREE (conn, sizeof(*conn));
-}
-
-void
-usocklnd_tear_peer_conn(usock_conn_t *conn)
-{
- usock_peer_t *peer = conn->uc_peer;
- int idx = usocklnd_type2idx(conn->uc_type);
- lnet_ni_t *ni;
- lnet_process_id_t id;
- int decref_flag = 0;
- int killall_flag = 0;
-
- if (peer == NULL) /* nothing to tear */
- return;
-
- pthread_mutex_lock(&peer->up_lock);
- pthread_mutex_lock(&conn->uc_lock);
-
- ni = peer->up_ni;
- id = peer->up_peerid;
-
- if (peer->up_conns[idx] == conn) {
- if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD) {
- /* change state not to finalize twice */
- conn->uc_rx_state = UC_RX_KSM_HEADER;
- lnet_finalize(peer->up_ni, conn->uc_rx_lnetmsg, -EIO);
- }
-
- usocklnd_destroy_txlist(peer->up_ni,
- &conn->uc_tx_list);
-
- peer->up_conns[idx] = NULL;
- conn->uc_peer = NULL;
- decref_flag = 1;
-
- if(conn->uc_errored && !peer->up_errored)
- peer->up_errored = killall_flag = 1;
- }
-
- pthread_mutex_unlock(&conn->uc_lock);
-
- if (killall_flag)
- usocklnd_del_conns_locked(peer);
-
- pthread_mutex_unlock(&peer->up_lock);
-
- if (!decref_flag)
- return;
-
- usocklnd_conn_decref(conn);
- usocklnd_peer_decref(peer);
-
- usocklnd_check_peer_stale(ni, id);
-}
-
-/* Remove peer from hash list if all up_conns[i] is NULL &&
- * hash table is the only consumer of the peer */
-void
-usocklnd_check_peer_stale(lnet_ni_t *ni, lnet_process_id_t id)
-{
- usock_peer_t *peer;
-
- pthread_rwlock_wrlock(&usock_data.ud_peers_lock);
- peer = usocklnd_find_peer_locked(ni, id);
-
- if (peer == NULL) {
- pthread_rwlock_unlock(&usock_data.ud_peers_lock);
- return;
- }
-
- if (cfs_atomic_read(&peer->up_refcount) == 2) {
- int i;
- for (i = 0; i < N_CONN_TYPES; i++)
- LASSERT (peer->up_conns[i] == NULL);
-
- list_del(&peer->up_list);
-
- if (peer->up_errored &&
- (peer->up_peerid.pid & LNET_PID_USERFLAG) == 0)
- lnet_notify (peer->up_ni, peer->up_peerid.nid, 0,
- cfs_time_seconds(peer->up_last_alive));
-
- usocklnd_peer_decref(peer);
- }
-
- usocklnd_peer_decref(peer);
- pthread_rwlock_unlock(&usock_data.ud_peers_lock);
-}
-
-/* Returns 0 on success, <0 else */
-int
-usocklnd_create_passive_conn(lnet_ni_t *ni, int fd, usock_conn_t **connp)
-{
- int rc;
- __u32 peer_ip;
- __u16 peer_port;
- usock_conn_t *conn;
-
- rc = libcfs_getpeername(fd, &peer_ip, &peer_port);
- if (rc)
- return rc;
-
- rc = usocklnd_set_sock_options(fd);
- if (rc)
- return rc;
-
- conn = usocklnd_conn_allocate();
- if (conn == NULL)
- return -ENOMEM;
-
- usocklnd_rx_hellomagic_state_transition(conn);
-
- conn->uc_fd = fd;
- conn->uc_peer_ip = peer_ip;
- conn->uc_peer_port = peer_port;
- conn->uc_state = UC_RECEIVING_HELLO;
- conn->uc_pt_idx = usocklnd_ip2pt_idx(peer_ip);
- conn->uc_ni = ni;
- CFS_INIT_LIST_HEAD (&conn->uc_tx_list);
- CFS_INIT_LIST_HEAD (&conn->uc_zcack_list);
- pthread_mutex_init(&conn->uc_lock, NULL);
- cfs_atomic_set(&conn->uc_refcount, 1); /* 1 ref for me */
-
- *connp = conn;
- return 0;
-}
-
-/* Returns 0 on success, <0 else */
-int
-usocklnd_create_active_conn(usock_peer_t *peer, int type,
- usock_conn_t **connp)
-{
- int rc;
- int fd;
- usock_conn_t *conn;
- __u32 dst_ip = LNET_NIDADDR(peer->up_peerid.nid);
- __u16 dst_port = lnet_acceptor_port();
-
- conn = usocklnd_conn_allocate();
- if (conn == NULL)
- return -ENOMEM;
-
- conn->uc_tx_hello = usocklnd_create_cr_hello_tx(peer->up_ni, type,
- peer->up_peerid.nid);
- if (conn->uc_tx_hello == NULL) {
- usocklnd_conn_free(conn);
- return -ENOMEM;
- }
-
- if (the_lnet.ln_pid & LNET_PID_USERFLAG)
- rc = usocklnd_connect_cli_mode(&fd, dst_ip, dst_port);
- else
- rc = usocklnd_connect_srv_mode(&fd, dst_ip, dst_port);
-
- if (rc) {
- usocklnd_destroy_tx(NULL, conn->uc_tx_hello);
- usocklnd_conn_free(conn);
- return rc;
- }
-
- conn->uc_tx_deadline = cfs_time_shift(usock_tuns.ut_timeout);
- conn->uc_tx_flag = 1;
-
- conn->uc_fd = fd;
- conn->uc_peer_ip = dst_ip;
- conn->uc_peer_port = dst_port;
- conn->uc_type = type;
- conn->uc_activeflag = 1;
- conn->uc_state = UC_CONNECTING;
- conn->uc_pt_idx = usocklnd_ip2pt_idx(dst_ip);
- conn->uc_ni = NULL;
- conn->uc_peerid = peer->up_peerid;
- conn->uc_peer = peer;
- usocklnd_peer_addref(peer);
- CFS_INIT_LIST_HEAD (&conn->uc_tx_list);
- CFS_INIT_LIST_HEAD (&conn->uc_zcack_list);
- pthread_mutex_init(&conn->uc_lock, NULL);
- cfs_atomic_set(&conn->uc_refcount, 1); /* 1 ref for me */
-
- *connp = conn;
- return 0;
-}
-
-/* Returns 0 on success, <0 else */
-int
-usocklnd_connect_srv_mode(int *fdp, __u32 dst_ip, __u16 dst_port)
-{
- __u16 port;
- int fd;
- int rc;
-
- for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
- port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
- port--) {
- /* Iterate through reserved ports. */
-
- rc = libcfs_sock_create(&fd);
- if (rc)
- return rc;
-
- rc = libcfs_sock_bind_to_port(fd, port);
- if (rc) {
- close(fd);
- continue;
- }
-
- rc = usocklnd_set_sock_options(fd);
- if (rc) {
- close(fd);
- return rc;
- }
-
- rc = libcfs_sock_connect(fd, dst_ip, dst_port);
- if (rc == 0) {
- *fdp = fd;
- return 0;
- }
-
- if (rc != -EADDRINUSE && rc != -EADDRNOTAVAIL) {
- close(fd);
- return rc;
- }
-
- close(fd);
- }
-
- CERROR("Can't bind to any reserved port\n");
- return rc;
-}
-
-/* Returns 0 on success, <0 else */
-int
-usocklnd_connect_cli_mode(int *fdp, __u32 dst_ip, __u16 dst_port)
-{
- int fd;
- int rc;
-
- rc = libcfs_sock_create(&fd);
- if (rc)
- return rc;
-
- rc = usocklnd_set_sock_options(fd);
- if (rc) {
- close(fd);
- return rc;
- }
-
- rc = libcfs_sock_connect(fd, dst_ip, dst_port);
- if (rc) {
- close(fd);
- return rc;
- }
-
- *fdp = fd;
- return 0;
-}
-
-int
-usocklnd_set_sock_options(int fd)
-{
- int rc;
-
- rc = libcfs_sock_set_nagle(fd, usock_tuns.ut_socknagle);
- if (rc)
- return rc;
-
- if (usock_tuns.ut_sockbufsiz) {
- rc = libcfs_sock_set_bufsiz(fd, usock_tuns.ut_sockbufsiz);
- if (rc)
- return rc;
- }
-
- return libcfs_fcntl_nonblock(fd);
-}
-
-void
-usocklnd_init_msg(ksock_msg_t *msg, int type)
-{
- msg->ksm_type = type;
- msg->ksm_csum = 0;
- msg->ksm_zc_req_cookie = 0;
- msg->ksm_zc_ack_cookie = 0;
-}
-
-usock_tx_t *
-usocklnd_create_noop_tx(__u64 cookie)
-{
- usock_tx_t *tx;
-
- LIBCFS_ALLOC (tx, sizeof(usock_tx_t));
- if (tx == NULL)
- return NULL;
-
- tx->tx_size = sizeof(usock_tx_t);
- tx->tx_lnetmsg = NULL;
-
- usocklnd_init_msg(&tx->tx_msg, KSOCK_MSG_NOOP);
- tx->tx_msg.ksm_zc_ack_cookie = cookie;
-
- tx->tx_iova[0].iov_base = (void *)&tx->tx_msg;
- tx->tx_iova[0].iov_len = tx->tx_resid = tx->tx_nob =
- offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
- tx->tx_iov = tx->tx_iova;
- tx->tx_niov = 1;
-
- return tx;
-}
-
-usock_tx_t *
-usocklnd_create_tx(lnet_msg_t *lntmsg)
-{
- usock_tx_t *tx;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- int size = offsetof(usock_tx_t,
- tx_iova[1 + payload_niov]);
-
- LIBCFS_ALLOC (tx, size);
- if (tx == NULL)
- return NULL;
-
- tx->tx_size = size;
- tx->tx_lnetmsg = lntmsg;
-
- tx->tx_resid = tx->tx_nob =
- offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload) +
- payload_nob;
-
- usocklnd_init_msg(&tx->tx_msg, KSOCK_MSG_LNET);
- tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = lntmsg->msg_hdr;
- tx->tx_iova[0].iov_base = (void *)&tx->tx_msg;
- tx->tx_iova[0].iov_len = offsetof(ksock_msg_t,
- ksm_u.lnetmsg.ksnm_payload);
- tx->tx_iov = tx->tx_iova;
-
- tx->tx_niov = 1 +
- lnet_extract_iov(payload_niov, &tx->tx_iov[1],
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-
- return tx;
-}
-
-void
-usocklnd_init_hello_msg(ksock_hello_msg_t *hello,
- lnet_ni_t *ni, int type, lnet_nid_t peer_nid)
-{
- usock_net_t *net = (usock_net_t *)ni->ni_data;
-
- hello->kshm_magic = LNET_PROTO_MAGIC;
- hello->kshm_version = KSOCK_PROTO_V2;
- hello->kshm_nips = 0;
- hello->kshm_ctype = type;
-
- hello->kshm_dst_incarnation = 0; /* not used */
- hello->kshm_src_incarnation = net->un_incarnation;
-
- hello->kshm_src_pid = the_lnet.ln_pid;
- hello->kshm_src_nid = ni->ni_nid;
- hello->kshm_dst_nid = peer_nid;
- hello->kshm_dst_pid = 0; /* not used */
-}
-
-usock_tx_t *
-usocklnd_create_hello_tx(lnet_ni_t *ni,
- int type, lnet_nid_t peer_nid)
-{
- usock_tx_t *tx;
- int size;
- ksock_hello_msg_t *hello;
-
- size = sizeof(usock_tx_t) + offsetof(ksock_hello_msg_t, kshm_ips);
- LIBCFS_ALLOC (tx, size);
- if (tx == NULL)
- return NULL;
-
- tx->tx_size = size;
- tx->tx_lnetmsg = NULL;
-
- hello = (ksock_hello_msg_t *)&tx->tx_iova[1];
- usocklnd_init_hello_msg(hello, ni, type, peer_nid);
-
- tx->tx_iova[0].iov_base = (void *)hello;
- tx->tx_iova[0].iov_len = tx->tx_resid = tx->tx_nob =
- offsetof(ksock_hello_msg_t, kshm_ips);
- tx->tx_iov = tx->tx_iova;
- tx->tx_niov = 1;
-
- return tx;
-}
-
-usock_tx_t *
-usocklnd_create_cr_hello_tx(lnet_ni_t *ni,
- int type, lnet_nid_t peer_nid)
-{
- usock_tx_t *tx;
- int size;
- lnet_acceptor_connreq_t *cr;
- ksock_hello_msg_t *hello;
-
- size = sizeof(usock_tx_t) +
- sizeof(lnet_acceptor_connreq_t) +
- offsetof(ksock_hello_msg_t, kshm_ips);
- LIBCFS_ALLOC (tx, size);
- if (tx == NULL)
- return NULL;
-
- tx->tx_size = size;
- tx->tx_lnetmsg = NULL;
-
- cr = (lnet_acceptor_connreq_t *)&tx->tx_iova[1];
- memset(cr, 0, sizeof(*cr));
- cr->acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr->acr_version = LNET_PROTO_ACCEPTOR_VERSION;
- cr->acr_nid = peer_nid;
-
- hello = (ksock_hello_msg_t *)((char *)cr + sizeof(*cr));
- usocklnd_init_hello_msg(hello, ni, type, peer_nid);
-
- tx->tx_iova[0].iov_base = (void *)cr;
- tx->tx_iova[0].iov_len = tx->tx_resid = tx->tx_nob =
- sizeof(lnet_acceptor_connreq_t) +
- offsetof(ksock_hello_msg_t, kshm_ips);
- tx->tx_iov = tx->tx_iova;
- tx->tx_niov = 1;
-
- return tx;
-}
-
-void
-usocklnd_destroy_tx(lnet_ni_t *ni, usock_tx_t *tx)
-{
- lnet_msg_t *lnetmsg = tx->tx_lnetmsg;
- int rc = (tx->tx_resid == 0) ? 0 : -EIO;
-
- LASSERT (ni != NULL || lnetmsg == NULL);
-
- LIBCFS_FREE (tx, tx->tx_size);
-
- if (lnetmsg != NULL) /* NOOP and hello go without lnetmsg */
- lnet_finalize(ni, lnetmsg, rc);
-}
-
-void
-usocklnd_destroy_txlist(lnet_ni_t *ni, struct list_head *txlist)
-{
- usock_tx_t *tx;
-
- while (!list_empty(txlist)) {
- tx = list_entry(txlist->next, usock_tx_t, tx_list);
- list_del(&tx->tx_list);
-
- usocklnd_destroy_tx(ni, tx);
- }
-}
-
-void
-usocklnd_destroy_zcack_list(struct list_head *zcack_list)
-{
- usock_zc_ack_t *zcack;
-
- while (!list_empty(zcack_list)) {
- zcack = list_entry(zcack_list->next, usock_zc_ack_t, zc_list);
- list_del(&zcack->zc_list);
-
- LIBCFS_FREE (zcack, sizeof(*zcack));
- }
-}
-
-void
-usocklnd_destroy_peer(usock_peer_t *peer)
-{
- usock_net_t *net = peer->up_ni->ni_data;
- int i;
-
- for (i = 0; i < N_CONN_TYPES; i++)
- LASSERT (peer->up_conns[i] == NULL);
-
- LIBCFS_FREE (peer, sizeof (*peer));
-
- pthread_mutex_lock(&net->un_lock);
- if(--net->un_peercount == 0)
- pthread_cond_signal(&net->un_cond);
- pthread_mutex_unlock(&net->un_lock);
-}
-
-void
-usocklnd_destroy_conn(usock_conn_t *conn)
-{
- LASSERT (conn->uc_peer == NULL || conn->uc_ni == NULL);
-
- if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD) {
- LASSERT (conn->uc_peer != NULL);
- lnet_finalize(conn->uc_peer->up_ni, conn->uc_rx_lnetmsg, -EIO);
- }
-
- if (!list_empty(&conn->uc_tx_list)) {
- LASSERT (conn->uc_peer != NULL);
- usocklnd_destroy_txlist(conn->uc_peer->up_ni, &conn->uc_tx_list);
- }
-
- usocklnd_destroy_zcack_list(&conn->uc_zcack_list);
-
- if (conn->uc_peer != NULL)
- usocklnd_peer_decref(conn->uc_peer);
-
- if (conn->uc_ni != NULL)
- lnet_ni_decref(conn->uc_ni);
-
- if (conn->uc_tx_hello)
- usocklnd_destroy_tx(NULL, conn->uc_tx_hello);
-
- usocklnd_conn_free(conn);
-}
-
-int
-usocklnd_get_conn_type(lnet_msg_t *lntmsg)
-{
- int nob;
-
- if (the_lnet.ln_pid & LNET_PID_USERFLAG)
- return SOCKLND_CONN_ANY;
-
- nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload) +
- lntmsg->msg_len;
-
- if (nob >= usock_tuns.ut_min_bulk)
- return SOCKLND_CONN_BULK_OUT;
- else
- return SOCKLND_CONN_CONTROL;
-}
-
-int usocklnd_type2idx(int type)
-{
- switch (type) {
- case SOCKLND_CONN_ANY:
- case SOCKLND_CONN_CONTROL:
- return 0;
- case SOCKLND_CONN_BULK_IN:
- return 1;
- case SOCKLND_CONN_BULK_OUT:
- return 2;
- default:
- LBUG();
- }
-}
-
-usock_peer_t *
-usocklnd_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id)
-{
- struct list_head *peer_list = usocklnd_nid2peerlist(id.nid);
- struct list_head *tmp;
- usock_peer_t *peer;
-
- list_for_each (tmp, peer_list) {
-
- peer = list_entry (tmp, usock_peer_t, up_list);
-
- if (peer->up_ni != ni)
- continue;
-
- if (peer->up_peerid.nid != id.nid ||
- peer->up_peerid.pid != id.pid)
- continue;
-
- usocklnd_peer_addref(peer);
- return peer;
- }
- return (NULL);
-}
-
-int
-usocklnd_create_peer(lnet_ni_t *ni, lnet_process_id_t id,
- usock_peer_t **peerp)
-{
- usock_net_t *net = ni->ni_data;
- usock_peer_t *peer;
- int i;
-
- LIBCFS_ALLOC (peer, sizeof (*peer));
- if (peer == NULL)
- return -ENOMEM;
-
- for (i = 0; i < N_CONN_TYPES; i++)
- peer->up_conns[i] = NULL;
-
- peer->up_peerid = id;
- peer->up_ni = ni;
- peer->up_incrn_is_set = 0;
- peer->up_errored = 0;
- peer->up_last_alive = 0;
- cfs_atomic_set (&peer->up_refcount, 1); /* 1 ref for caller */
- pthread_mutex_init(&peer->up_lock, NULL);
-
- pthread_mutex_lock(&net->un_lock);
- net->un_peercount++;
- pthread_mutex_unlock(&net->un_lock);
-
- *peerp = peer;
- return 0;
-}
-
-/* Safely create new peer if needed. Save result in *peerp.
- * Returns 0 on success, <0 else */
-int
-usocklnd_find_or_create_peer(lnet_ni_t *ni, lnet_process_id_t id,
- usock_peer_t **peerp)
-{
- int rc;
- usock_peer_t *peer;
- usock_peer_t *peer2;
- usock_net_t *net = ni->ni_data;
-
- pthread_rwlock_rdlock(&usock_data.ud_peers_lock);
- peer = usocklnd_find_peer_locked(ni, id);
- pthread_rwlock_unlock(&usock_data.ud_peers_lock);
-
- if (peer != NULL)
- goto find_or_create_peer_done;
-
- rc = usocklnd_create_peer(ni, id, &peer);
- if (rc)
- return rc;
-
- pthread_rwlock_wrlock(&usock_data.ud_peers_lock);
- peer2 = usocklnd_find_peer_locked(ni, id);
- if (peer2 == NULL) {
- if (net->un_shutdown) {
- pthread_rwlock_unlock(&usock_data.ud_peers_lock);
- usocklnd_peer_decref(peer); /* should destroy peer */
- CERROR("Can't create peer: network shutdown\n");
- return -ESHUTDOWN;
- }
-
- /* peer table will take 1 of my refs on peer */
- usocklnd_peer_addref(peer);
- list_add_tail (&peer->up_list,
- usocklnd_nid2peerlist(id.nid));
- } else {
- usocklnd_peer_decref(peer); /* should destroy peer */
- peer = peer2;
- }
- pthread_rwlock_unlock(&usock_data.ud_peers_lock);
-
- find_or_create_peer_done:
- *peerp = peer;
- return 0;
-}
-
-/* NB: both peer and conn locks are held */
-static int
-usocklnd_enqueue_zcack(usock_conn_t *conn, usock_zc_ack_t *zc_ack)
-{
- if (conn->uc_state == UC_READY &&
- list_empty(&conn->uc_tx_list) &&
- list_empty(&conn->uc_zcack_list) &&
- !conn->uc_sending) {
- int rc = usocklnd_add_pollrequest(conn, POLL_TX_SET_REQUEST,
- POLLOUT);
- if (rc != 0)
- return rc;
- }
-
- list_add_tail(&zc_ack->zc_list, &conn->uc_zcack_list);
- return 0;
-}
-
-/* NB: both peer and conn locks are held
- * NB: if sending isn't in progress. the caller *MUST* send tx
- * immediately after we'll return */
-static void
-usocklnd_enqueue_tx(usock_conn_t *conn, usock_tx_t *tx,
- int *send_immediately)
-{
- if (conn->uc_state == UC_READY &&
- list_empty(&conn->uc_tx_list) &&
- list_empty(&conn->uc_zcack_list) &&
- !conn->uc_sending) {
- conn->uc_sending = 1;
- *send_immediately = 1;
- return;
- }
-
- *send_immediately = 0;
- list_add_tail(&tx->tx_list, &conn->uc_tx_list);
-}
-
-/* Safely create new conn if needed. Save result in *connp.
- * Returns 0 on success, <0 else */
-int
-usocklnd_find_or_create_conn(usock_peer_t *peer, int type,
- usock_conn_t **connp,
- usock_tx_t *tx, usock_zc_ack_t *zc_ack,
- int *send_immediately)
-{
- usock_conn_t *conn;
- int idx;
- int rc;
- lnet_pid_t userflag = peer->up_peerid.pid & LNET_PID_USERFLAG;
-
- if (userflag)
- type = SOCKLND_CONN_ANY;
-
- idx = usocklnd_type2idx(type);
-
- pthread_mutex_lock(&peer->up_lock);
- if (peer->up_conns[idx] != NULL) {
- conn = peer->up_conns[idx];
- LASSERT(conn->uc_type == type);
- } else {
- if (userflag) {
- CERROR("Refusing to create a connection to "
- "userspace process %s\n",
- libcfs_id2str(peer->up_peerid));
- rc = -EHOSTUNREACH;
- goto find_or_create_conn_failed;
- }
-
- rc = usocklnd_create_active_conn(peer, type, &conn);
- if (rc) {
- peer->up_errored = 1;
- usocklnd_del_conns_locked(peer);
- goto find_or_create_conn_failed;
- }
-
- /* peer takes 1 of conn refcount */
- usocklnd_link_conn_to_peer(conn, peer, idx);
-
- rc = usocklnd_add_pollrequest(conn, POLL_ADD_REQUEST, POLLOUT);
- if (rc) {
- peer->up_conns[idx] = NULL;
- usocklnd_conn_decref(conn); /* should destroy conn */
- goto find_or_create_conn_failed;
- }
- usocklnd_wakeup_pollthread(conn->uc_pt_idx);
- }
-
- pthread_mutex_lock(&conn->uc_lock);
- LASSERT(conn->uc_peer == peer);
-
- LASSERT(tx == NULL || zc_ack == NULL);
- if (tx != NULL) {
- usocklnd_enqueue_tx(conn, tx, send_immediately);
- } else {
- rc = usocklnd_enqueue_zcack(conn, zc_ack);
- if (rc != 0) {
- usocklnd_conn_kill_locked(conn);
- pthread_mutex_unlock(&conn->uc_lock);
- goto find_or_create_conn_failed;
- }
- }
- pthread_mutex_unlock(&conn->uc_lock);
-
- usocklnd_conn_addref(conn);
- pthread_mutex_unlock(&peer->up_lock);
-
- *connp = conn;
- return 0;
-
- find_or_create_conn_failed:
- pthread_mutex_unlock(&peer->up_lock);
- return rc;
-}
-
-void
-usocklnd_link_conn_to_peer(usock_conn_t *conn, usock_peer_t *peer, int idx)
-{
- peer->up_conns[idx] = conn;
- peer->up_errored = 0; /* this new fresh conn will try
- * revitalize even stale errored peer */
-}
-
-int
-usocklnd_invert_type(int type)
-{
- switch (type)
- {
- case SOCKLND_CONN_ANY:
- case SOCKLND_CONN_CONTROL:
- return (type);
- case SOCKLND_CONN_BULK_IN:
- return SOCKLND_CONN_BULK_OUT;
- case SOCKLND_CONN_BULK_OUT:
- return SOCKLND_CONN_BULK_IN;
- default:
- return SOCKLND_CONN_NONE;
- }
-}
-
-void
-usocklnd_conn_new_state(usock_conn_t *conn, int new_state)
-{
- pthread_mutex_lock(&conn->uc_lock);
- if (conn->uc_state != UC_DEAD)
- conn->uc_state = new_state;
- pthread_mutex_unlock(&conn->uc_lock);
-}
-
-/* NB: peer is locked by caller */
-void
-usocklnd_cleanup_stale_conns(usock_peer_t *peer, __u64 incrn,
- usock_conn_t *skip_conn)
-{
- int i;
-
- if (!peer->up_incrn_is_set) {
- peer->up_incarnation = incrn;
- peer->up_incrn_is_set = 1;
- return;
- }
-
- if (peer->up_incarnation == incrn)
- return;
-
- peer->up_incarnation = incrn;
-
- for (i = 0; i < N_CONN_TYPES; i++) {
- usock_conn_t *conn = peer->up_conns[i];
-
- if (conn == NULL || conn == skip_conn)
- continue;
-
- pthread_mutex_lock(&conn->uc_lock);
- LASSERT (conn->uc_peer == peer);
- conn->uc_peer = NULL;
- peer->up_conns[i] = NULL;
- if (conn->uc_state != UC_DEAD)
- usocklnd_conn_kill_locked(conn);
- pthread_mutex_unlock(&conn->uc_lock);
-
- usocklnd_conn_decref(conn);
- usocklnd_peer_decref(peer);
- }
-}
-
-/* RX state transition to UC_RX_HELLO_MAGIC: update RX part to receive
- * MAGIC part of hello and set uc_rx_state
- */
-void
-usocklnd_rx_hellomagic_state_transition(usock_conn_t *conn)
-{
- LASSERT(conn->uc_rx_hello != NULL);
-
- conn->uc_rx_niov = 1;
- conn->uc_rx_iov = conn->uc_rx_iova;
- conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_magic;
- conn->uc_rx_iov[0].iov_len =
- conn->uc_rx_nob_wanted =
- conn->uc_rx_nob_left =
- sizeof(conn->uc_rx_hello->kshm_magic);
-
- conn->uc_rx_state = UC_RX_HELLO_MAGIC;
-
- conn->uc_rx_flag = 1; /* waiting for incoming hello */
- conn->uc_rx_deadline = cfs_time_shift(usock_tuns.ut_timeout);
-}
-
-/* RX state transition to UC_RX_HELLO_VERSION: update RX part to receive
- * VERSION part of hello and set uc_rx_state
- */
-void
-usocklnd_rx_helloversion_state_transition(usock_conn_t *conn)
-{
- LASSERT(conn->uc_rx_hello != NULL);
-
- conn->uc_rx_niov = 1;
- conn->uc_rx_iov = conn->uc_rx_iova;
- conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_version;
- conn->uc_rx_iov[0].iov_len =
- conn->uc_rx_nob_wanted =
- conn->uc_rx_nob_left =
- sizeof(conn->uc_rx_hello->kshm_version);
-
- conn->uc_rx_state = UC_RX_HELLO_VERSION;
-}
-
-/* RX state transition to UC_RX_HELLO_BODY: update RX part to receive
- * the rest of hello and set uc_rx_state
- */
-void
-usocklnd_rx_hellobody_state_transition(usock_conn_t *conn)
-{
- LASSERT(conn->uc_rx_hello != NULL);
-
- conn->uc_rx_niov = 1;
- conn->uc_rx_iov = conn->uc_rx_iova;
- conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_src_nid;
- conn->uc_rx_iov[0].iov_len =
- conn->uc_rx_nob_wanted =
- conn->uc_rx_nob_left =
- offsetof(ksock_hello_msg_t, kshm_ips) -
- offsetof(ksock_hello_msg_t, kshm_src_nid);
-
- conn->uc_rx_state = UC_RX_HELLO_BODY;
-}
-
-/* RX state transition to UC_RX_HELLO_IPS: update RX part to receive
- * array of IPs and set uc_rx_state
- */
-void
-usocklnd_rx_helloIPs_state_transition(usock_conn_t *conn)
-{
- LASSERT(conn->uc_rx_hello != NULL);
-
- conn->uc_rx_niov = 1;
- conn->uc_rx_iov = conn->uc_rx_iova;
- conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_ips;
- conn->uc_rx_iov[0].iov_len =
- conn->uc_rx_nob_wanted =
- conn->uc_rx_nob_left =
- conn->uc_rx_hello->kshm_nips *
- sizeof(conn->uc_rx_hello->kshm_ips[0]);
-
- conn->uc_rx_state = UC_RX_HELLO_IPS;
-}
-
-/* RX state transition to UC_RX_LNET_HEADER: update RX part to receive
- * LNET header and set uc_rx_state
- */
-void
-usocklnd_rx_lnethdr_state_transition(usock_conn_t *conn)
-{
- conn->uc_rx_niov = 1;
- conn->uc_rx_iov = conn->uc_rx_iova;
- conn->uc_rx_iov[0].iov_base = &conn->uc_rx_msg.ksm_u.lnetmsg;
- conn->uc_rx_iov[0].iov_len =
- conn->uc_rx_nob_wanted =
- conn->uc_rx_nob_left =
- sizeof(ksock_lnet_msg_t);
-
- conn->uc_rx_state = UC_RX_LNET_HEADER;
- conn->uc_rx_flag = 1;
-}
-
-/* RX state transition to UC_RX_KSM_HEADER: update RX part to receive
- * KSM header and set uc_rx_state
- */
-void
-usocklnd_rx_ksmhdr_state_transition(usock_conn_t *conn)
-{
- conn->uc_rx_niov = 1;
- conn->uc_rx_iov = conn->uc_rx_iova;
- conn->uc_rx_iov[0].iov_base = &conn->uc_rx_msg;
- conn->uc_rx_iov[0].iov_len =
- conn->uc_rx_nob_wanted =
- conn->uc_rx_nob_left =
- offsetof(ksock_msg_t, ksm_u);
-
- conn->uc_rx_state = UC_RX_KSM_HEADER;
- conn->uc_rx_flag = 0;
-}
-
-/* RX state transition to UC_RX_SKIPPING: update RX part for
- * skipping and set uc_rx_state
- */
-void
-usocklnd_rx_skipping_state_transition(usock_conn_t *conn)
-{
- static char skip_buffer[4096];
-
- int nob;
- unsigned int niov = 0;
- int skipped = 0;
- int nob_to_skip = conn->uc_rx_nob_left;
-
- LASSERT(nob_to_skip != 0);
-
- conn->uc_rx_iov = conn->uc_rx_iova;
-
- /* Set up to skip as much as possible now. If there's more left
- * (ran out of iov entries) we'll get called again */
-
- do {
- nob = MIN (nob_to_skip, sizeof(skip_buffer));
-
- conn->uc_rx_iov[niov].iov_base = skip_buffer;
- conn->uc_rx_iov[niov].iov_len = nob;
- niov++;
- skipped += nob;
- nob_to_skip -=nob;
-
- } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
- niov < sizeof(conn->uc_rx_iova) / sizeof (struct iovec));
-
- conn->uc_rx_niov = niov;
- conn->uc_rx_nob_wanted = skipped;
-
- conn->uc_rx_state = UC_RX_SKIPPING;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Maxim Patlasov <maxim@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- */
-
-#include "usocklnd.h"
-#include <unistd.h>
-#include <sys/syscall.h>
-
-int
-usocklnd_notifier_handler(int fd)
-{
- int notification;
- return syscall(SYS_read, fd, ¬ification, sizeof(notification));
-}
-
-void
-usocklnd_exception_handler(usock_conn_t *conn)
-{
- pthread_mutex_lock(&conn->uc_lock);
-
- if (conn->uc_state == UC_CONNECTING ||
- conn->uc_state == UC_SENDING_HELLO)
- usocklnd_conn_kill_locked(conn);
-
- pthread_mutex_unlock(&conn->uc_lock);
-}
-
-int
-usocklnd_read_handler(usock_conn_t *conn)
-{
- int rc;
- int continue_reading;
- int state;
-
- read_again:
- rc = 0;
- pthread_mutex_lock(&conn->uc_lock);
- state = conn->uc_state;
-
- /* process special case: LNET calls lnd_recv() asyncronously */
- if (state == UC_READY && conn->uc_rx_state == UC_RX_PARSE) {
- /* still don't have usocklnd_recv() called */
- rc = usocklnd_add_pollrequest(conn, POLL_RX_SET_REQUEST, 0);
- if (rc == 0)
- conn->uc_rx_state = UC_RX_PARSE_WAIT;
- else
- usocklnd_conn_kill_locked(conn);
-
- pthread_mutex_unlock(&conn->uc_lock);
- return rc;
- }
-
- pthread_mutex_unlock(&conn->uc_lock);
- /* From here and below the conn cannot be changed
- * asyncronously, except:
- * 1) usocklnd_send() can work with uc_tx_list and uc_zcack_list,
- * 2) usocklnd_shutdown() can change uc_state to UC_DEAD */
-
- switch (state) {
-
- case UC_RECEIVING_HELLO:
- case UC_READY:
- if (conn->uc_rx_nob_wanted != 0) {
- /* read from conn fd as much wanted data as possible */
- rc = usocklnd_read_data(conn);
- if (rc == 0) /* partial read */
- break;
- if (rc < 0) {/* error happened or EOF */
- usocklnd_conn_kill(conn);
- break;
- }
- }
-
- /* process incoming data */
- if (state == UC_READY )
- rc = usocklnd_read_msg(conn, &continue_reading);
- else /* state == UC_RECEIVING_HELLO */
- rc = usocklnd_read_hello(conn, &continue_reading);
-
- if (rc < 0) {
- usocklnd_conn_kill(conn);
- break;
- }
-
- if (continue_reading)
- goto read_again;
-
- break;
-
- case UC_DEAD:
- break;
-
- default:
- LBUG();
- }
-
- return rc;
-}
-
-/* Switch on rx_state.
- * Return 0 on success, 1 if whole packet is read, else return <0
- * Always set cont_flag: 1 if we're ready to continue reading, else 0
- * NB: If whole packet is read, cont_flag will be set to zero to take
- * care of fairess
- */
-int
-usocklnd_read_msg(usock_conn_t *conn, int *cont_flag)
-{
- int rc = 0;
- __u64 cookie;
-
- *cont_flag = 0;
-
- /* smth. new emerged in RX part - let's process it */
- switch (conn->uc_rx_state) {
- case UC_RX_KSM_HEADER:
- if (conn->uc_flip) {
- __swab32s(&conn->uc_rx_msg.ksm_type);
- __swab32s(&conn->uc_rx_msg.ksm_csum);
- __swab64s(&conn->uc_rx_msg.ksm_zc_req_cookie);
- __swab64s(&conn->uc_rx_msg.ksm_zc_ack_cookie);
- }
-
- /* we never send packets for wich zc-acking is required */
- if (conn->uc_rx_msg.ksm_type != KSOCK_MSG_LNET ||
- conn->uc_rx_msg.ksm_zc_ack_cookie != 0) {
- conn->uc_errored = 1;
- return -EPROTO;
- }
-
- /* zc_req will be processed later, when
- lnet payload will be received */
-
- usocklnd_rx_lnethdr_state_transition(conn);
- *cont_flag = 1;
- break;
-
- case UC_RX_LNET_HEADER:
- if (the_lnet.ln_pid & LNET_PID_USERFLAG) {
- /* replace dest_nid,pid (ksocknal sets its own) */
- conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr.dest_nid =
- cpu_to_le64(conn->uc_peer->up_ni->ni_nid);
- conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr.dest_pid =
- cpu_to_le32(the_lnet.ln_pid);
-
- } else if (conn->uc_peer->up_peerid.pid & LNET_PID_USERFLAG) {
- /* Userspace peer */
- lnet_process_id_t *id = &conn->uc_peer->up_peerid;
- lnet_hdr_t *lhdr = &conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr;
-
- /* Substitute process ID assigned at connection time */
- lhdr->src_pid = cpu_to_le32(id->pid);
- lhdr->src_nid = cpu_to_le64(id->nid);
- }
-
- conn->uc_rx_state = UC_RX_PARSE;
- usocklnd_conn_addref(conn); /* ++ref while parsing */
-
- rc = lnet_parse(conn->uc_peer->up_ni,
- &conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr,
- conn->uc_peerid.nid, conn, 0);
-
- if (rc < 0) {
- /* I just received garbage: give up on this conn */
- conn->uc_errored = 1;
- usocklnd_conn_decref(conn);
- return -EPROTO;
- }
-
- /* Race with usocklnd_recv() is possible */
- pthread_mutex_lock(&conn->uc_lock);
- LASSERT (conn->uc_rx_state == UC_RX_PARSE ||
- conn->uc_rx_state == UC_RX_LNET_PAYLOAD);
-
- /* check whether usocklnd_recv() got called */
- if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD)
- *cont_flag = 1;
- pthread_mutex_unlock(&conn->uc_lock);
- break;
-
- case UC_RX_PARSE:
- LBUG(); /* it's error to be here, because this special
- * case is handled by caller */
- break;
-
- case UC_RX_PARSE_WAIT:
- LBUG(); /* it's error to be here, because the conn
- * shouldn't wait for POLLIN event in this
- * state */
- break;
-
- case UC_RX_LNET_PAYLOAD:
- /* payload all received */
-
- lnet_finalize(conn->uc_peer->up_ni, conn->uc_rx_lnetmsg, 0);
-
- cookie = conn->uc_rx_msg.ksm_zc_req_cookie;
- if (cookie != 0)
- rc = usocklnd_handle_zc_req(conn->uc_peer, cookie);
-
- if (rc != 0) {
- /* change state not to finalize twice */
- conn->uc_rx_state = UC_RX_KSM_HEADER;
- return -EPROTO;
- }
-
- /* Fall through */
-
- case UC_RX_SKIPPING:
- if (conn->uc_rx_nob_left != 0) {
- usocklnd_rx_skipping_state_transition(conn);
- *cont_flag = 1;
- } else {
- usocklnd_rx_ksmhdr_state_transition(conn);
- rc = 1; /* whole packet is read */
- }
-
- break;
-
- default:
- LBUG(); /* unknown state */
- }
-
- return rc;
-}
-
-/* Handle incoming ZC request from sender.
- * NB: it's called only from read_handler, so we're sure that
- * the conn cannot become zombie in the middle of processing */
-int
-usocklnd_handle_zc_req(usock_peer_t *peer, __u64 cookie)
-{
- usock_conn_t *conn;
- usock_zc_ack_t *zc_ack;
- int type;
- int rc;
- int dummy;
-
- LIBCFS_ALLOC (zc_ack, sizeof(*zc_ack));
- if (zc_ack == NULL)
- return -ENOMEM;
- zc_ack->zc_cookie = cookie;
-
- /* Let's assume that CONTROL is the best type for zcack,
- * but userspace clients don't use typed connections */
- if (the_lnet.ln_pid & LNET_PID_USERFLAG)
- type = SOCKLND_CONN_ANY;
- else
- type = SOCKLND_CONN_CONTROL;
-
- rc = usocklnd_find_or_create_conn(peer, type, &conn, NULL, zc_ack,
- &dummy);
- if (rc != 0) {
- LIBCFS_FREE (zc_ack, sizeof(*zc_ack));
- return rc;
- }
- usocklnd_conn_decref(conn);
-
- return 0;
-}
-
-/* Switch on rx_state.
- * Return 0 on success, else return <0
- * Always set cont_flag: 1 if we're ready to continue reading, else 0
- */
-int
-usocklnd_read_hello(usock_conn_t *conn, int *cont_flag)
-{
- int rc = 0;
- ksock_hello_msg_t *hello = conn->uc_rx_hello;
-
- *cont_flag = 0;
-
- /* smth. new emerged in hello - let's process it */
- switch (conn->uc_rx_state) {
- case UC_RX_HELLO_MAGIC:
- if (hello->kshm_magic == LNET_PROTO_MAGIC)
- conn->uc_flip = 0;
- else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC))
- conn->uc_flip = 1;
- else
- return -EPROTO;
-
- usocklnd_rx_helloversion_state_transition(conn);
- *cont_flag = 1;
- break;
-
- case UC_RX_HELLO_VERSION:
- if ((!conn->uc_flip &&
- (hello->kshm_version != KSOCK_PROTO_V2)) ||
- (conn->uc_flip &&
- (hello->kshm_version != __swab32(KSOCK_PROTO_V2))))
- return -EPROTO;
-
- usocklnd_rx_hellobody_state_transition(conn);
- *cont_flag = 1;
- break;
-
- case UC_RX_HELLO_BODY:
- if (conn->uc_flip) {
- ksock_hello_msg_t *hello = conn->uc_rx_hello;
- __swab32s(&hello->kshm_src_pid);
- __swab64s(&hello->kshm_src_nid);
- __swab32s(&hello->kshm_dst_pid);
- __swab64s(&hello->kshm_dst_nid);
- __swab64s(&hello->kshm_src_incarnation);
- __swab64s(&hello->kshm_dst_incarnation);
- __swab32s(&hello->kshm_ctype);
- __swab32s(&hello->kshm_nips);
- }
-
- if (conn->uc_rx_hello->kshm_nips > LNET_MAX_INTERFACES) {
- CERROR("Bad nips %d from ip %u.%u.%u.%u port %d\n",
- conn->uc_rx_hello->kshm_nips,
- HIPQUAD(conn->uc_peer_ip), conn->uc_peer_port);
- return -EPROTO;
- }
-
- if (conn->uc_rx_hello->kshm_nips) {
- usocklnd_rx_helloIPs_state_transition(conn);
- *cont_flag = 1;
- break;
- }
- /* fall through */
-
- case UC_RX_HELLO_IPS:
- if (conn->uc_activeflag == 1) /* active conn */
- rc = usocklnd_activeconn_hellorecv(conn);
- else /* passive conn */
- rc = usocklnd_passiveconn_hellorecv(conn);
-
- break;
-
- default:
- LBUG(); /* unknown state */
- }
-
- return rc;
-}
-
-/* All actions that we need after receiving hello on active conn:
- * 1) Schedule removing if we're zombie
- * 2) Restart active conn if we lost the race
- * 3) Else: update RX part to receive KSM header
- */
-int
-usocklnd_activeconn_hellorecv(usock_conn_t *conn)
-{
- int rc = 0;
- ksock_hello_msg_t *hello = conn->uc_rx_hello;
- usock_peer_t *peer = conn->uc_peer;
-
- /* Active conn with peer==NULL is zombie.
- * Don't try to link it to peer because the conn
- * has already had a chance to proceed at the beginning */
- if (peer == NULL) {
- LASSERT(list_empty(&conn->uc_tx_list) &&
- list_empty(&conn->uc_zcack_list));
-
- usocklnd_conn_kill(conn);
- return 0;
- }
-
- peer->up_last_alive = cfs_time_current();
-
- /* peer says that we lost the race */
- if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
- /* Start new active conn, relink txs and zc_acks from
- * the conn to new conn, schedule removing the conn.
- * Actually, we're expecting that a passive conn will
- * make us zombie soon and take care of our txs and
- * zc_acks */
-
- struct list_head tx_list, zcack_list;
- usock_conn_t *conn2;
- int idx = usocklnd_type2idx(conn->uc_type);
-
- CFS_INIT_LIST_HEAD (&tx_list);
- CFS_INIT_LIST_HEAD (&zcack_list);
-
- /* Block usocklnd_send() to check peer->up_conns[idx]
- * and to enqueue more txs */
- pthread_mutex_lock(&peer->up_lock);
- pthread_mutex_lock(&conn->uc_lock);
-
- /* usocklnd_shutdown() could kill us */
- if (conn->uc_state == UC_DEAD) {
- pthread_mutex_unlock(&conn->uc_lock);
- pthread_mutex_unlock(&peer->up_lock);
- return 0;
- }
-
- LASSERT (peer == conn->uc_peer);
- LASSERT (peer->up_conns[idx] == conn);
-
- rc = usocklnd_create_active_conn(peer, conn->uc_type, &conn2);
- if (rc) {
- conn->uc_errored = 1;
- pthread_mutex_unlock(&conn->uc_lock);
- pthread_mutex_unlock(&peer->up_lock);
- return rc;
- }
-
- usocklnd_link_conn_to_peer(conn2, peer, idx);
- conn2->uc_peer = peer;
-
- /* unlink txs and zcack from the conn */
- list_add(&tx_list, &conn->uc_tx_list);
- list_del_init(&conn->uc_tx_list);
- list_add(&zcack_list, &conn->uc_zcack_list);
- list_del_init(&conn->uc_zcack_list);
-
- /* link they to the conn2 */
- list_add(&conn2->uc_tx_list, &tx_list);
- list_del_init(&tx_list);
- list_add(&conn2->uc_zcack_list, &zcack_list);
- list_del_init(&zcack_list);
-
- /* make conn zombie */
- conn->uc_peer = NULL;
- usocklnd_peer_decref(peer);
-
- /* schedule conn2 for processing */
- rc = usocklnd_add_pollrequest(conn2, POLL_ADD_REQUEST, POLLOUT);
- if (rc) {
- peer->up_conns[idx] = NULL;
- usocklnd_conn_decref(conn2); /* should destroy conn */
- } else {
- usocklnd_conn_kill_locked(conn);
- }
-
- pthread_mutex_unlock(&conn->uc_lock);
- pthread_mutex_unlock(&peer->up_lock);
- usocklnd_conn_decref(conn);
-
- } else { /* hello->kshm_ctype != SOCKLND_CONN_NONE */
- if (conn->uc_type != usocklnd_invert_type(hello->kshm_ctype))
- return -EPROTO;
-
- pthread_mutex_lock(&peer->up_lock);
- usocklnd_cleanup_stale_conns(peer, hello->kshm_src_incarnation,
- conn);
- pthread_mutex_unlock(&peer->up_lock);
-
- /* safely transit to UC_READY state */
- /* rc == 0 */
- pthread_mutex_lock(&conn->uc_lock);
- if (conn->uc_state != UC_DEAD) {
- usocklnd_rx_ksmhdr_state_transition(conn);
-
- /* POLLIN is already set because we just
- * received hello, but maybe we've smth. to
- * send? */
- LASSERT (conn->uc_sending == 0);
- if ( !list_empty(&conn->uc_tx_list) ||
- !list_empty(&conn->uc_zcack_list) ) {
-
- conn->uc_tx_deadline =
- cfs_time_shift(usock_tuns.ut_timeout);
- conn->uc_tx_flag = 1;
- rc = usocklnd_add_pollrequest(conn,
- POLL_SET_REQUEST,
- POLLIN | POLLOUT);
- }
-
- if (rc == 0)
- conn->uc_state = UC_READY;
- }
- pthread_mutex_unlock(&conn->uc_lock);
- }
-
- return rc;
-}
-
-/* All actions that we need after receiving hello on passive conn:
- * 1) Stash peer's nid, pid, incarnation and conn type
- * 2) Cope with easy case: conn[idx] is empty - just save conn there
- * 3) Resolve race:
- * a) if our nid is higher - reply with CONN_NONE and make us zombie
- * b) if peer's nid is higher - postpone race resolution till
- * READY state
- * 4) Anyhow, send reply hello
-*/
-int
-usocklnd_passiveconn_hellorecv(usock_conn_t *conn)
-{
- ksock_hello_msg_t *hello = conn->uc_rx_hello;
- int type;
- int idx;
- int rc;
- usock_peer_t *peer;
- lnet_ni_t *ni = conn->uc_ni;
- __u32 peer_ip = conn->uc_peer_ip;
- __u16 peer_port = conn->uc_peer_port;
-
- /* don't know parent peer yet and not zombie */
- LASSERT (conn->uc_peer == NULL &&
- ni != NULL);
-
- /* don't know peer's nid and incarnation yet */
- if (peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
- /* do not trust liblustre clients */
- conn->uc_peerid.pid = peer_port | LNET_PID_USERFLAG;
- conn->uc_peerid.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
- peer_ip);
- if (hello->kshm_ctype != SOCKLND_CONN_ANY) {
- lnet_ni_decref(ni);
- conn->uc_ni = NULL;
- CERROR("Refusing to accept connection of type=%d from "
- "userspace process %u.%u.%u.%u:%d\n", hello->kshm_ctype,
- HIPQUAD(peer_ip), peer_port);
- return -EINVAL;
- }
- } else {
- conn->uc_peerid.pid = hello->kshm_src_pid;
- conn->uc_peerid.nid = hello->kshm_src_nid;
- }
- conn->uc_type = type = usocklnd_invert_type(hello->kshm_ctype);
-
- rc = usocklnd_find_or_create_peer(ni, conn->uc_peerid, &peer);
- if (rc) {
- lnet_ni_decref(ni);
- conn->uc_ni = NULL;
- return rc;
- }
-
- peer->up_last_alive = cfs_time_current();
-
- idx = usocklnd_type2idx(conn->uc_type);
-
- /* safely check whether we're first */
- pthread_mutex_lock(&peer->up_lock);
-
- usocklnd_cleanup_stale_conns(peer, hello->kshm_src_incarnation, NULL);
-
- if (peer->up_conns[idx] == NULL) {
- peer->up_last_alive = cfs_time_current();
- conn->uc_peer = peer;
- conn->uc_ni = NULL;
- usocklnd_link_conn_to_peer(conn, peer, idx);
- usocklnd_conn_addref(conn);
- } else {
- usocklnd_peer_decref(peer);
-
- /* Resolve race in favour of higher NID */
- if (conn->uc_peerid.nid < conn->uc_ni->ni_nid) {
- /* make us zombie */
- conn->uc_ni = NULL;
- type = SOCKLND_CONN_NONE;
- }
-
- /* if conn->uc_peerid.nid > conn->uc_ni->ni_nid,
- * postpone race resolution till READY state
- * (hopefully that conn[idx] will die because of
- * incoming hello of CONN_NONE type) */
- }
- pthread_mutex_unlock(&peer->up_lock);
-
- /* allocate and initialize fake tx with hello */
- conn->uc_tx_hello = usocklnd_create_hello_tx(ni, type,
- conn->uc_peerid.nid);
- if (conn->uc_ni == NULL)
- lnet_ni_decref(ni);
-
- if (conn->uc_tx_hello == NULL)
- return -ENOMEM;
-
- /* rc == 0 */
- pthread_mutex_lock(&conn->uc_lock);
- if (conn->uc_state == UC_DEAD)
- goto passive_hellorecv_done;
-
- conn->uc_state = UC_SENDING_HELLO;
- conn->uc_tx_deadline = cfs_time_shift(usock_tuns.ut_timeout);
- conn->uc_tx_flag = 1;
- rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, POLLOUT);
-
- passive_hellorecv_done:
- pthread_mutex_unlock(&conn->uc_lock);
- return rc;
-}
-
-int
-usocklnd_write_handler(usock_conn_t *conn)
-{
- usock_tx_t *tx;
- int ret;
- int rc = 0;
- int state;
- usock_peer_t *peer;
- lnet_ni_t *ni;
-
- pthread_mutex_lock(&conn->uc_lock); /* like membar */
- state = conn->uc_state;
- pthread_mutex_unlock(&conn->uc_lock);
-
- switch (state) {
- case UC_CONNECTING:
- /* hello_tx has already been initialized
- * in usocklnd_create_active_conn() */
- usocklnd_conn_new_state(conn, UC_SENDING_HELLO);
- /* fall through */
-
- case UC_SENDING_HELLO:
- rc = usocklnd_send_tx(conn, conn->uc_tx_hello);
- if (rc <= 0) /* error or partial send or connection closed */
- break;
-
- /* tx with hello was sent successfully */
- usocklnd_destroy_tx(NULL, conn->uc_tx_hello);
- conn->uc_tx_hello = NULL;
-
- if (conn->uc_activeflag == 1) /* active conn */
- rc = usocklnd_activeconn_hellosent(conn);
- else /* passive conn */
- rc = usocklnd_passiveconn_hellosent(conn);
-
- break;
-
- case UC_READY:
- pthread_mutex_lock(&conn->uc_lock);
-
- peer = conn->uc_peer;
- LASSERT (peer != NULL);
- ni = peer->up_ni;
-
- if (list_empty(&conn->uc_tx_list) &&
- list_empty(&conn->uc_zcack_list)) {
- LASSERT(usock_tuns.ut_fair_limit > 1);
- pthread_mutex_unlock(&conn->uc_lock);
- return 0;
- }
-
- tx = usocklnd_try_piggyback(&conn->uc_tx_list,
- &conn->uc_zcack_list);
- if (tx != NULL)
- conn->uc_sending = 1;
- else
- rc = -ENOMEM;
-
- pthread_mutex_unlock(&conn->uc_lock);
-
- if (rc)
- break;
-
- rc = usocklnd_send_tx(conn, tx);
- if (rc == 0) { /* partial send or connection closed */
- pthread_mutex_lock(&conn->uc_lock);
- list_add(&tx->tx_list, &conn->uc_tx_list);
- conn->uc_sending = 0;
- pthread_mutex_unlock(&conn->uc_lock);
- break;
- }
- if (rc < 0) { /* real error */
- usocklnd_destroy_tx(ni, tx);
- break;
- }
-
- /* rc == 1: tx was sent completely */
- usocklnd_destroy_tx(ni, tx);
-
- pthread_mutex_lock(&conn->uc_lock);
- conn->uc_sending = 0;
- if (conn->uc_state != UC_DEAD &&
- list_empty(&conn->uc_tx_list) &&
- list_empty(&conn->uc_zcack_list)) {
- conn->uc_tx_flag = 0;
- ret = usocklnd_add_pollrequest(conn,
- POLL_TX_SET_REQUEST, 0);
- if (ret)
- rc = ret;
- }
- pthread_mutex_unlock(&conn->uc_lock);
-
- break;
-
- case UC_DEAD:
- break;
-
- default:
- LBUG();
- }
-
- if (rc < 0)
- usocklnd_conn_kill(conn);
-
- return rc;
-}
-
-/* Return the first tx from tx_list with piggybacked zc_ack
- * from zcack_list when possible. If tx_list is empty, return
- * brand new noop tx for zc_ack from zcack_list. Return NULL
- * if an error happened */
-usock_tx_t *
-usocklnd_try_piggyback(struct list_head *tx_list_p,
- struct list_head *zcack_list_p)
-{
- usock_tx_t *tx;
- usock_zc_ack_t *zc_ack;
-
- /* assign tx and zc_ack */
- if (list_empty(tx_list_p))
- tx = NULL;
- else {
- tx = list_entry(tx_list_p->next, usock_tx_t, tx_list);
- list_del(&tx->tx_list);
-
- /* already piggybacked or partially send */
- if (tx->tx_msg.ksm_zc_ack_cookie ||
- tx->tx_resid != tx->tx_nob)
- return tx;
- }
-
- if (list_empty(zcack_list_p)) {
- /* nothing to piggyback */
- return tx;
- } else {
- zc_ack = list_entry(zcack_list_p->next,
- usock_zc_ack_t, zc_list);
- list_del(&zc_ack->zc_list);
- }
-
- if (tx != NULL)
- /* piggyback the zc-ack cookie */
- tx->tx_msg.ksm_zc_ack_cookie = zc_ack->zc_cookie;
- else
- /* cannot piggyback, need noop */
- tx = usocklnd_create_noop_tx(zc_ack->zc_cookie);
-
- LIBCFS_FREE (zc_ack, sizeof(*zc_ack));
- return tx;
-}
-
-/* All actions that we need after sending hello on active conn:
- * 1) update RX iov to receive hello
- * 2) state transition to UC_RECEIVING_HELLO
- * 3) notify poll_thread that we're waiting for incoming hello */
-int
-usocklnd_activeconn_hellosent(usock_conn_t *conn)
-{
- int rc = 0;
-
- pthread_mutex_lock(&conn->uc_lock);
-
- if (conn->uc_state != UC_DEAD) {
- usocklnd_rx_hellomagic_state_transition(conn);
- conn->uc_state = UC_RECEIVING_HELLO;
- conn->uc_tx_flag = 0;
- rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, POLLIN);
- }
-
- pthread_mutex_unlock(&conn->uc_lock);
-
- return rc;
-}
-
-/* All actions that we need after sending hello on passive conn:
- * 1) Cope with 1st easy case: conn is already linked to a peer
- * 2) Cope with 2nd easy case: remove zombie conn
- * 3) Resolve race:
- * a) find the peer
- * b) link the conn to the peer if conn[idx] is empty
- * c) if the conn[idx] isn't empty and is in READY state,
- * remove the conn as duplicated
- * d) if the conn[idx] isn't empty and isn't in READY state,
- * override conn[idx] with the conn
- */
-int
-usocklnd_passiveconn_hellosent(usock_conn_t *conn)
-{
- usock_conn_t *conn2;
- usock_peer_t *peer;
- struct list_head tx_list;
- struct list_head zcack_list;
- int idx;
- int rc = 0;
-
- /* almost nothing to do if conn is already linked to peer hash table */
- if (conn->uc_peer != NULL)
- goto passive_hellosent_done;
-
- /* conn->uc_peer == NULL, so the conn isn't accessible via
- * peer hash list, so nobody can touch the conn but us */
-
- if (conn->uc_ni == NULL) /* remove zombie conn */
- goto passive_hellosent_connkill;
-
- /* all code below is race resolution, because normally
- * passive conn is linked to peer just after receiving hello */
- CFS_INIT_LIST_HEAD (&tx_list);
- CFS_INIT_LIST_HEAD (&zcack_list);
-
- /* conn is passive and isn't linked to any peer,
- so its tx and zc_ack lists have to be empty */
- LASSERT (list_empty(&conn->uc_tx_list) &&
- list_empty(&conn->uc_zcack_list) &&
- conn->uc_sending == 0);
-
- rc = usocklnd_find_or_create_peer(conn->uc_ni, conn->uc_peerid, &peer);
- if (rc)
- return rc;
-
- idx = usocklnd_type2idx(conn->uc_type);
-
- /* try to link conn to peer */
- pthread_mutex_lock(&peer->up_lock);
- if (peer->up_conns[idx] == NULL) {
- usocklnd_link_conn_to_peer(conn, peer, idx);
- usocklnd_conn_addref(conn);
- conn->uc_peer = peer;
- usocklnd_peer_addref(peer);
- } else {
- conn2 = peer->up_conns[idx];
- pthread_mutex_lock(&conn2->uc_lock);
-
- if (conn2->uc_state == UC_READY) {
- /* conn2 is in READY state, so conn is "duplicated" */
- pthread_mutex_unlock(&conn2->uc_lock);
- pthread_mutex_unlock(&peer->up_lock);
- usocklnd_peer_decref(peer);
- goto passive_hellosent_connkill;
- }
-
- /* uc_state != UC_READY => switch conn and conn2 */
- /* Relink txs and zc_acks from conn2 to conn.
- * We're sure that nobody but us can access to conn,
- * nevertheless we use mutex (if we're wrong yet,
- * deadlock is easy to see that corrupted list */
- list_add(&tx_list, &conn2->uc_tx_list);
- list_del_init(&conn2->uc_tx_list);
- list_add(&zcack_list, &conn2->uc_zcack_list);
- list_del_init(&conn2->uc_zcack_list);
-
- pthread_mutex_lock(&conn->uc_lock);
- list_add_tail(&conn->uc_tx_list, &tx_list);
- list_del_init(&tx_list);
- list_add_tail(&conn->uc_zcack_list, &zcack_list);
- list_del_init(&zcack_list);
- conn->uc_peer = peer;
- pthread_mutex_unlock(&conn->uc_lock);
-
- conn2->uc_peer = NULL; /* make conn2 zombie */
- pthread_mutex_unlock(&conn2->uc_lock);
- usocklnd_conn_decref(conn2);
-
- usocklnd_link_conn_to_peer(conn, peer, idx);
- usocklnd_conn_addref(conn);
- conn->uc_peer = peer;
- }
-
- lnet_ni_decref(conn->uc_ni);
- conn->uc_ni = NULL;
- pthread_mutex_unlock(&peer->up_lock);
- usocklnd_peer_decref(peer);
-
- passive_hellosent_done:
- /* safely transit to UC_READY state */
- /* rc == 0 */
- pthread_mutex_lock(&conn->uc_lock);
- if (conn->uc_state != UC_DEAD) {
- usocklnd_rx_ksmhdr_state_transition(conn);
-
- /* we're ready to recive incoming packets and maybe
- already have smth. to transmit */
- LASSERT (conn->uc_sending == 0);
- if ( list_empty(&conn->uc_tx_list) &&
- list_empty(&conn->uc_zcack_list) ) {
- conn->uc_tx_flag = 0;
- rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST,
- POLLIN);
- } else {
- conn->uc_tx_deadline =
- cfs_time_shift(usock_tuns.ut_timeout);
- conn->uc_tx_flag = 1;
- rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST,
- POLLIN | POLLOUT);
- }
-
- if (rc == 0)
- conn->uc_state = UC_READY;
- }
- pthread_mutex_unlock(&conn->uc_lock);
- return rc;
-
- passive_hellosent_connkill:
- usocklnd_conn_kill(conn);
- return 0;
-}
-
-/* Send as much tx data as possible.
- * Returns 0 or 1 on succsess, <0 if fatal error.
- * 0 means partial send or non-fatal error, 1 - complete.
- * Rely on libcfs_sock_writev() for differentiating fatal and
- * non-fatal errors. An error should be considered as non-fatal if:
- * 1) it still makes sense to continue reading &&
- * 2) anyway, poll() will set up POLLHUP|POLLERR flags */
-int
-usocklnd_send_tx(usock_conn_t *conn, usock_tx_t *tx)
-{
- struct iovec *iov;
- int nob;
- int fd = conn->uc_fd;
- cfs_time_t t;
-
- LASSERT (tx->tx_resid != 0);
-
- do {
- usock_peer_t *peer = conn->uc_peer;
-
- LASSERT (tx->tx_niov > 0);
-
- nob = libcfs_sock_writev(fd, tx->tx_iov, tx->tx_niov);
- if (nob < 0)
- conn->uc_errored = 1;
- if (nob <= 0) /* write queue is flow-controlled or error */
- return nob;
-
- LASSERT (nob <= tx->tx_resid);
- tx->tx_resid -= nob;
- t = cfs_time_current();
- conn->uc_tx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout));
-
- if(peer != NULL)
- peer->up_last_alive = t;
-
- /* "consume" iov */
- iov = tx->tx_iov;
- do {
- LASSERT (tx->tx_niov > 0);
-
- if (nob < iov->iov_len) {
- iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob);
- iov->iov_len -= nob;
- break;
- }
-
- nob -= iov->iov_len;
- tx->tx_iov = ++iov;
- tx->tx_niov--;
- } while (nob != 0);
-
- } while (tx->tx_resid != 0);
-
- return 1; /* send complete */
-}
-
-/* Read from wire as much data as possible.
- * Returns 0 or 1 on succsess, <0 if error or EOF.
- * 0 means partial read, 1 - complete */
-int
-usocklnd_read_data(usock_conn_t *conn)
-{
- struct iovec *iov;
- int nob;
- cfs_time_t t;
-
- LASSERT (conn->uc_rx_nob_wanted != 0);
-
- do {
- usock_peer_t *peer = conn->uc_peer;
-
- LASSERT (conn->uc_rx_niov > 0);
-
- nob = libcfs_sock_readv(conn->uc_fd, conn->uc_rx_iov, conn->uc_rx_niov);
- if (nob <= 0) {/* read nothing or error */
- conn->uc_errored = 1;
- return nob;
- }
-
- LASSERT (nob <= conn->uc_rx_nob_wanted);
- conn->uc_rx_nob_wanted -= nob;
- conn->uc_rx_nob_left -= nob;
- t = cfs_time_current();
- conn->uc_rx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout));
-
- if(peer != NULL)
- peer->up_last_alive = t;
-
- /* "consume" iov */
- iov = conn->uc_rx_iov;
- do {
- LASSERT (conn->uc_rx_niov > 0);
-
- if (nob < iov->iov_len) {
- iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob);
- iov->iov_len -= nob;
- break;
- }
-
- nob -= iov->iov_len;
- conn->uc_rx_iov = ++iov;
- conn->uc_rx_niov--;
- } while (nob != 0);
-
- } while (conn->uc_rx_nob_wanted != 0);
-
- return 1; /* read complete */
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Maxim Patlasov <maxim@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- */
-
-#include "usocklnd.h"
-#include <unistd.h>
-#include <sys/syscall.h>
-
-void
-usocklnd_process_stale_list(usock_pollthread_t *pt_data)
-{
- while (!list_empty(&pt_data->upt_stale_list)) {
- usock_conn_t *conn;
- conn = list_entry(pt_data->upt_stale_list.next,
- usock_conn_t, uc_stale_list);
-
- list_del(&conn->uc_stale_list);
-
- usocklnd_tear_peer_conn(conn);
- usocklnd_conn_decref(conn); /* -1 for idx2conn[idx] or pr */
- }
-}
-
-int
-usocklnd_poll_thread(void *arg)
-{
- int rc = 0;
- usock_pollthread_t *pt_data = (usock_pollthread_t *)arg;
- cfs_time_t current_time;
- cfs_time_t planned_time;
- int idx;
- int idx_start;
- int idx_finish;
- int chunk;
- int saved_nfds;
- int extra;
- int times;
-
- /* mask signals to avoid SIGPIPE, etc */
- sigset_t sigs;
- sigfillset (&sigs);
- pthread_sigmask (SIG_SETMASK, &sigs, 0);
-
- LASSERT(pt_data != NULL);
-
- planned_time = cfs_time_shift(usock_tuns.ut_poll_timeout);
- chunk = usocklnd_calculate_chunk_size(pt_data->upt_nfds);
- saved_nfds = pt_data->upt_nfds;
- idx_start = 1;
-
- /* Main loop */
- while (usock_data.ud_shutdown == 0) {
- rc = 0;
-
- /* Process all enqueued poll requests */
- pthread_mutex_lock(&pt_data->upt_pollrequests_lock);
- while (!list_empty(&pt_data->upt_pollrequests)) {
- usock_pollrequest_t *pr;
- pr = list_entry(pt_data->upt_pollrequests.next,
- usock_pollrequest_t, upr_list);
-
- list_del(&pr->upr_list);
- rc = usocklnd_process_pollrequest(pr, pt_data);
- if (rc)
- break;
- }
- pthread_mutex_unlock(&pt_data->upt_pollrequests_lock);
-
- if (rc)
- break;
-
- /* Delete conns orphaned due to POLL_DEL_REQUESTs */
- usocklnd_process_stale_list(pt_data);
-
- /* Actual polling for events */
- rc = poll(pt_data->upt_pollfd,
- pt_data->upt_nfds,
- usock_tuns.ut_poll_timeout * 1000);
-
- if (rc < 0) {
- CERROR("Cannot poll(2): errno=%d\n", errno);
- break;
- }
-
- if (rc > 0)
- usocklnd_execute_handlers(pt_data);
-
- current_time = cfs_time_current();
-
- if (pt_data->upt_nfds < 2 ||
- cfs_time_before(current_time, planned_time))
- continue;
-
- /* catch up growing pollfd[] */
- if (pt_data->upt_nfds > saved_nfds) {
- extra = pt_data->upt_nfds - saved_nfds;
- saved_nfds = pt_data->upt_nfds;
- } else {
- extra = 0;
- }
-
- times = cfs_duration_sec(cfs_time_sub(current_time, planned_time)) + 1;
- idx_finish = MIN(idx_start + chunk*times + extra, pt_data->upt_nfds);
-
- for (idx = idx_start; idx < idx_finish; idx++) {
- usock_conn_t *conn = pt_data->upt_idx2conn[idx];
- pthread_mutex_lock(&conn->uc_lock);
- if (usocklnd_conn_timed_out(conn, current_time) &&
- conn->uc_state != UC_DEAD) {
- conn->uc_errored = 1;
- usocklnd_conn_kill_locked(conn);
- }
- pthread_mutex_unlock(&conn->uc_lock);
- }
-
- if (idx_finish == pt_data->upt_nfds) {
- chunk = usocklnd_calculate_chunk_size(pt_data->upt_nfds);
- saved_nfds = pt_data->upt_nfds;
- idx_start = 1;
- }
- else {
- idx_start = idx_finish;
- }
-
- planned_time = cfs_time_add(current_time,
- cfs_time_seconds(usock_tuns.ut_poll_timeout));
- }
-
- /* All conns should be deleted by POLL_DEL_REQUESTs while shutdown */
- LASSERT (rc != 0 || pt_data->upt_nfds == 1);
-
- if (rc) {
- pthread_mutex_lock(&pt_data->upt_pollrequests_lock);
-
- /* Block new poll requests to be enqueued */
- pt_data->upt_errno = rc;
-
- while (!list_empty(&pt_data->upt_pollrequests)) {
- usock_pollrequest_t *pr;
- pr = list_entry(pt_data->upt_pollrequests.next,
- usock_pollrequest_t, upr_list);
-
- list_del(&pr->upr_list);
-
- if (pr->upr_type == POLL_ADD_REQUEST) {
- close(pr->upr_conn->uc_fd);
- list_add_tail(&pr->upr_conn->uc_stale_list,
- &pt_data->upt_stale_list);
- } else {
- usocklnd_conn_decref(pr->upr_conn);
- }
-
- LIBCFS_FREE (pr, sizeof(*pr));
- }
- pthread_mutex_unlock(&pt_data->upt_pollrequests_lock);
-
- usocklnd_process_stale_list(pt_data);
-
- for (idx = 1; idx < pt_data->upt_nfds; idx++) {
- usock_conn_t *conn = pt_data->upt_idx2conn[idx];
- LASSERT(conn != NULL);
- close(conn->uc_fd);
- usocklnd_tear_peer_conn(conn);
- usocklnd_conn_decref(conn);
- }
- }
-
- /* unblock usocklnd_shutdown() */
- cfs_complete(&pt_data->upt_completion);
-
- return 0;
-}
-
-/* Returns 0 on success, <0 else */
-int
-usocklnd_add_pollrequest(usock_conn_t *conn, int type, short value)
-{
- int pt_idx = conn->uc_pt_idx;
- usock_pollthread_t *pt = &usock_data.ud_pollthreads[pt_idx];
- usock_pollrequest_t *pr;
-
- LIBCFS_ALLOC(pr, sizeof(*pr));
- if (pr == NULL) {
- CERROR ("Cannot allocate poll request\n");
- return -ENOMEM;
- }
-
- pr->upr_conn = conn;
- pr->upr_type = type;
- pr->upr_value = value;
-
- usocklnd_conn_addref(conn); /* +1 for poll request */
-
- pthread_mutex_lock(&pt->upt_pollrequests_lock);
-
- if (pt->upt_errno) { /* very rare case: errored poll thread */
- int rc = pt->upt_errno;
- pthread_mutex_unlock(&pt->upt_pollrequests_lock);
- usocklnd_conn_decref(conn);
- LIBCFS_FREE(pr, sizeof(*pr));
- return rc;
- }
-
- list_add_tail(&pr->upr_list, &pt->upt_pollrequests);
- pthread_mutex_unlock(&pt->upt_pollrequests_lock);
- return 0;
-}
-
-void
-usocklnd_add_killrequest(usock_conn_t *conn)
-{
- int pt_idx = conn->uc_pt_idx;
- usock_pollthread_t *pt = &usock_data.ud_pollthreads[pt_idx];
- usock_pollrequest_t *pr = conn->uc_preq;
-
- /* Use preallocated poll request because there is no good
- * workaround for ENOMEM error while killing connection */
- if (pr) {
- pr->upr_conn = conn;
- pr->upr_type = POLL_DEL_REQUEST;
- pr->upr_value = 0;
-
- usocklnd_conn_addref(conn); /* +1 for poll request */
-
- pthread_mutex_lock(&pt->upt_pollrequests_lock);
-
- if (pt->upt_errno) { /* very rare case: errored poll thread */
- pthread_mutex_unlock(&pt->upt_pollrequests_lock);
- usocklnd_conn_decref(conn);
- return; /* conn will be killed in poll thread anyway */
- }
-
- list_add_tail(&pr->upr_list, &pt->upt_pollrequests);
- pthread_mutex_unlock(&pt->upt_pollrequests_lock);
-
- conn->uc_preq = NULL;
- }
-}
-
-/* Process poll request. Update poll data.
- * Returns 0 on success, <0 else */
-int
-usocklnd_process_pollrequest(usock_pollrequest_t *pr,
- usock_pollthread_t *pt_data)
-{
- int type = pr->upr_type;
- short value = pr->upr_value;
- usock_conn_t *conn = pr->upr_conn;
- int idx = 0;
- struct pollfd *pollfd = pt_data->upt_pollfd;
- int *fd2idx = pt_data->upt_fd2idx;
- usock_conn_t **idx2conn = pt_data->upt_idx2conn;
- int *skip = pt_data->upt_skip;
-
- LASSERT(conn != NULL);
- LASSERT(conn->uc_fd >=0);
- LASSERT(type == POLL_ADD_REQUEST ||
- conn->uc_fd < pt_data->upt_nfd2idx);
-
- if (type != POLL_ADD_REQUEST) {
- idx = fd2idx[conn->uc_fd];
- if (idx > 0 && idx < pt_data->upt_nfds) { /* hot path */
- LASSERT(pollfd[idx].fd == conn->uc_fd);
- } else { /* unlikely */
- CWARN("Very unlikely event happend: trying to"
- " handle poll request of type %d but idx=%d"
- " is out of range [1 ... %d]. Is shutdown"
- " in progress (%d)?\n",
- type, idx, pt_data->upt_nfds - 1,
- usock_data.ud_shutdown);
-
- LIBCFS_FREE (pr, sizeof(*pr));
- usocklnd_conn_decref(conn);
- return 0;
- }
- }
-
- LIBCFS_FREE (pr, sizeof(*pr));
-
- switch (type) {
- case POLL_ADD_REQUEST:
- if (pt_data->upt_nfds >= pt_data->upt_npollfd) {
- /* resize pollfd[], idx2conn[] and skip[] */
- struct pollfd *new_pollfd;
- int new_npollfd = pt_data->upt_npollfd * 2;
- usock_conn_t **new_idx2conn;
- int *new_skip;
-
- new_pollfd = LIBCFS_REALLOC(pollfd, new_npollfd *
- sizeof(struct pollfd));
- if (new_pollfd == NULL)
- goto process_pollrequest_enomem;
- pt_data->upt_pollfd = pollfd = new_pollfd;
-
- new_idx2conn = LIBCFS_REALLOC(idx2conn, new_npollfd *
- sizeof(usock_conn_t *));
- if (new_idx2conn == NULL)
- goto process_pollrequest_enomem;
- pt_data->upt_idx2conn = idx2conn = new_idx2conn;
-
- new_skip = LIBCFS_REALLOC(skip, new_npollfd *
- sizeof(int));
- if (new_skip == NULL)
- goto process_pollrequest_enomem;
- pt_data->upt_skip = new_skip;
-
- pt_data->upt_npollfd = new_npollfd;
- }
-
- if (conn->uc_fd >= pt_data->upt_nfd2idx) {
- /* resize fd2idx[] */
- int *new_fd2idx;
- int new_nfd2idx = pt_data->upt_nfd2idx * 2;
-
- while (new_nfd2idx <= conn->uc_fd)
- new_nfd2idx *= 2;
-
- new_fd2idx = LIBCFS_REALLOC(fd2idx, new_nfd2idx *
- sizeof(int));
- if (new_fd2idx == NULL)
- goto process_pollrequest_enomem;
-
- pt_data->upt_fd2idx = fd2idx = new_fd2idx;
- memset(fd2idx + pt_data->upt_nfd2idx, 0,
- (new_nfd2idx - pt_data->upt_nfd2idx)
- * sizeof(int));
- pt_data->upt_nfd2idx = new_nfd2idx;
- }
-
- LASSERT(fd2idx[conn->uc_fd] == 0);
-
- idx = pt_data->upt_nfds++;
- idx2conn[idx] = conn;
- fd2idx[conn->uc_fd] = idx;
-
- pollfd[idx].fd = conn->uc_fd;
- pollfd[idx].events = value;
- pollfd[idx].revents = 0;
- break;
- case POLL_DEL_REQUEST:
- fd2idx[conn->uc_fd] = 0; /* invalidate this entry */
-
- --pt_data->upt_nfds;
- if (idx != pt_data->upt_nfds) {
- /* shift last entry into released position */
- memcpy(&pollfd[idx], &pollfd[pt_data->upt_nfds],
- sizeof(struct pollfd));
- idx2conn[idx] = idx2conn[pt_data->upt_nfds];
- fd2idx[pollfd[idx].fd] = idx;
- }
-
- close(conn->uc_fd);
- list_add_tail(&conn->uc_stale_list, &pt_data->upt_stale_list);
- break;
- case POLL_RX_SET_REQUEST:
- pollfd[idx].events = (pollfd[idx].events & ~POLLIN) | value;
- break;
- case POLL_TX_SET_REQUEST:
- pollfd[idx].events = (pollfd[idx].events & ~POLLOUT) | value;
- break;
- case POLL_SET_REQUEST:
- pollfd[idx].events = value;
- break;
- default:
- LBUG(); /* unknown type */
- }
-
- /* In the case of POLL_ADD_REQUEST, idx2conn[idx] takes the
- * reference that poll request possesses */
- if (type != POLL_ADD_REQUEST)
- usocklnd_conn_decref(conn);
-
- return 0;
-
- process_pollrequest_enomem:
- usocklnd_conn_decref(conn);
- return -ENOMEM;
-}
-
-/* Loop on poll data executing handlers repeatedly until
- * fair_limit is reached or all entries are exhausted */
-void
-usocklnd_execute_handlers(usock_pollthread_t *pt_data)
-{
- struct pollfd *pollfd = pt_data->upt_pollfd;
- int nfds = pt_data->upt_nfds;
- usock_conn_t **idx2conn = pt_data->upt_idx2conn;
- int *skip = pt_data->upt_skip;
- int j;
-
- if (pollfd[0].revents & POLLIN)
- while (usocklnd_notifier_handler(pollfd[0].fd) > 0)
- ;
-
- skip[0] = 1; /* always skip notifier fd */
-
- for (j = 0; j < usock_tuns.ut_fair_limit; j++) {
- int prev = 0;
- int i = skip[0];
-
- if (i >= nfds) /* nothing ready */
- break;
-
- do {
- usock_conn_t *conn = idx2conn[i];
- int next;
-
- if (j == 0) /* first pass... */
- next = skip[i] = i+1; /* set skip chain */
- else /* later passes... */
- next = skip[i]; /* skip unready pollfds */
-
- /* kill connection if it's closed by peer and
- * there is no data pending for reading */
- if ((pollfd[i].revents & POLLERR) != 0 ||
- (pollfd[i].revents & POLLHUP) != 0) {
- if ((pollfd[i].events & POLLIN) != 0 &&
- (pollfd[i].revents & POLLIN) == 0)
- usocklnd_conn_kill(conn);
- else
- usocklnd_exception_handler(conn);
- }
-
- if ((pollfd[i].revents & POLLIN) != 0 &&
- usocklnd_read_handler(conn) <= 0)
- pollfd[i].revents &= ~POLLIN;
-
- if ((pollfd[i].revents & POLLOUT) != 0 &&
- usocklnd_write_handler(conn) <= 0)
- pollfd[i].revents &= ~POLLOUT;
-
- if ((pollfd[i].revents & (POLLIN | POLLOUT)) == 0)
- skip[prev] = next; /* skip this entry next pass */
- else
- prev = i;
-
- i = next;
- } while (i < nfds);
- }
-}
-
-int
-usocklnd_calculate_chunk_size(int num)
-{
- const int n = 4;
- const int p = usock_tuns.ut_poll_timeout;
- int chunk = num;
-
- /* chunk should be big enough to detect a timeout on any
- * connection within (n+1)/n times the timeout interval
- * if we checks every 'p' seconds 'chunk' conns */
-
- if (usock_tuns.ut_timeout > n * p)
- chunk = (chunk * n * p) / usock_tuns.ut_timeout;
-
- if (chunk == 0)
- chunk = 1;
-
- return chunk;
-}
-
-void
-usocklnd_wakeup_pollthread(int i)
-{
- usock_pollthread_t *pt = &usock_data.ud_pollthreads[i];
- int notification = 0;
- int rc;
-
- rc = syscall(SYS_write, pt->upt_notifier_fd, ¬ification,
- sizeof(notification));
-
- if (rc != sizeof(notification))
- CERROR("Very unlikely event happend: "
- "cannot write to notifier fd (rc=%d; errno=%d)\n",
- rc, errno);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2007 Cluster File Systems, Inc.
- * Author: Maxim Patlasov <maxim@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- */
-
-#include "usocklnd.h"
-#include <sys/time.h>
-
-lnd_t the_tcplnd = {
- .lnd_type = SOCKLND,
- .lnd_startup = usocklnd_startup,
- .lnd_shutdown = usocklnd_shutdown,
- .lnd_send = usocklnd_send,
- .lnd_recv = usocklnd_recv,
- .lnd_accept = usocklnd_accept,
-};
-
-usock_data_t usock_data;
-usock_tunables_t usock_tuns = {
- .ut_timeout = 50,
- .ut_poll_timeout = 1,
- .ut_fair_limit = 1,
- .ut_npollthreads = 0,
- .ut_min_bulk = 1<<10,
- .ut_txcredits = 256,
- .ut_peertxcredits = 8,
- .ut_socknagle = 0,
- .ut_sockbufsiz = 0,
-};
-
-#define MAX_REASONABLE_TIMEOUT 36000 /* 10 hours */
-#define MAX_REASONABLE_NPT 1000
-
-int
-usocklnd_validate_tunables()
-{
- if (usock_tuns.ut_timeout <= 0 ||
- usock_tuns.ut_timeout > MAX_REASONABLE_TIMEOUT) {
- CERROR("USOCK_TIMEOUT: %d is out of reasonable limits\n",
- usock_tuns.ut_timeout);
- return -1;
- }
-
- if (usock_tuns.ut_poll_timeout <= 0 ||
- usock_tuns.ut_poll_timeout > MAX_REASONABLE_TIMEOUT) {
- CERROR("USOCK_POLL_TIMEOUT: %d is out of reasonable limits\n",
- usock_tuns.ut_poll_timeout);
- return -1;
- }
-
- if (usock_tuns.ut_fair_limit <= 0) {
- CERROR("Invalid USOCK_FAIR_LIMIT: %d (should be >0)\n",
- usock_tuns.ut_fair_limit);
- return -1;
- }
-
- if (usock_tuns.ut_npollthreads < 0 ||
- usock_tuns.ut_npollthreads > MAX_REASONABLE_NPT) {
- CERROR("USOCK_NPOLLTHREADS: %d is out of reasonable limits\n",
- usock_tuns.ut_npollthreads);
- return -1;
- }
-
- if (usock_tuns.ut_txcredits <= 0) {
- CERROR("USOCK_TXCREDITS: %d should be positive\n",
- usock_tuns.ut_txcredits);
- return -1;
- }
-
- if (usock_tuns.ut_peertxcredits <= 0) {
- CERROR("USOCK_PEERTXCREDITS: %d should be positive\n",
- usock_tuns.ut_peertxcredits);
- return -1;
- }
-
- if (usock_tuns.ut_peertxcredits > usock_tuns.ut_txcredits) {
- CERROR("USOCK_PEERTXCREDITS: %d should not be greater"
- " than USOCK_TXCREDITS: %d\n",
- usock_tuns.ut_peertxcredits, usock_tuns.ut_txcredits);
- return -1;
- }
-
- if (usock_tuns.ut_socknagle != 0 &&
- usock_tuns.ut_socknagle != 1) {
- CERROR("USOCK_SOCKNAGLE: %d should be 0 or 1\n",
- usock_tuns.ut_socknagle);
- return -1;
- }
-
- if (usock_tuns.ut_sockbufsiz < 0) {
- CERROR("USOCK_SOCKBUFSIZ: %d should be 0 or positive\n",
- usock_tuns.ut_sockbufsiz);
- return -1;
- }
-
- return 0;
-}
-
-void
-usocklnd_release_poll_states(int n)
-{
- int i;
-
- for (i = 0; i < n; i++) {
- usock_pollthread_t *pt = &usock_data.ud_pollthreads[i];
-
- close(pt->upt_notifier_fd);
- close(pt->upt_pollfd[0].fd);
-
- pthread_mutex_destroy(&pt->upt_pollrequests_lock);
- cfs_fini_completion(&pt->upt_completion);
-
- LIBCFS_FREE (pt->upt_pollfd,
- sizeof(struct pollfd) * pt->upt_npollfd);
- LIBCFS_FREE (pt->upt_idx2conn,
- sizeof(usock_conn_t *) * pt->upt_npollfd);
- LIBCFS_FREE (pt->upt_fd2idx,
- sizeof(int) * pt->upt_nfd2idx);
- }
-}
-
-int
-usocklnd_update_tunables()
-{
- int rc;
-
- rc = cfs_parse_int_tunable(&usock_tuns.ut_timeout,
- "USOCK_TIMEOUT");
- if (rc)
- return rc;
-
- rc = cfs_parse_int_tunable(&usock_tuns.ut_poll_timeout,
- "USOCK_POLL_TIMEOUT");
- if (rc)
- return rc;
-
- rc = cfs_parse_int_tunable(&usock_tuns.ut_npollthreads,
- "USOCK_NPOLLTHREADS");
- if (rc)
- return rc;
-
- rc = cfs_parse_int_tunable(&usock_tuns.ut_fair_limit,
- "USOCK_FAIR_LIMIT");
- if (rc)
- return rc;
-
- rc = cfs_parse_int_tunable(&usock_tuns.ut_min_bulk,
- "USOCK_MIN_BULK");
- if (rc)
- return rc;
-
- rc = cfs_parse_int_tunable(&usock_tuns.ut_txcredits,
- "USOCK_TXCREDITS");
- if (rc)
- return rc;
-
- rc = cfs_parse_int_tunable(&usock_tuns.ut_peertxcredits,
- "USOCK_PEERTXCREDITS");
- if (rc)
- return rc;
-
- rc = cfs_parse_int_tunable(&usock_tuns.ut_socknagle,
- "USOCK_SOCKNAGLE");
- if (rc)
- return rc;
-
- rc = cfs_parse_int_tunable(&usock_tuns.ut_sockbufsiz,
- "USOCK_SOCKBUFSIZ");
- if (rc)
- return rc;
-
- if (usocklnd_validate_tunables())
- return -EINVAL;
-
- if (usock_tuns.ut_npollthreads == 0) {
- usock_tuns.ut_npollthreads = cfs_online_cpus();
-
- if (usock_tuns.ut_npollthreads <= 0) {
- CERROR("Cannot find out the number of online CPUs\n");
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-
-int
-usocklnd_base_startup()
-{
- usock_pollthread_t *pt;
- int i;
- int rc;
-
- rc = usocklnd_update_tunables();
- if (rc)
- return rc;
-
- usock_data.ud_npollthreads = usock_tuns.ut_npollthreads;
-
- LIBCFS_ALLOC (usock_data.ud_pollthreads,
- usock_data.ud_npollthreads *
- sizeof(usock_pollthread_t));
- if (usock_data.ud_pollthreads == NULL)
- return -ENOMEM;
-
- /* Initialize poll thread state structures */
- for (i = 0; i < usock_data.ud_npollthreads; i++) {
- int notifier[2];
-
- pt = &usock_data.ud_pollthreads[i];
-
- rc = -ENOMEM;
-
- LIBCFS_ALLOC (pt->upt_pollfd,
- sizeof(struct pollfd) * UPT_START_SIZ);
- if (pt->upt_pollfd == NULL)
- goto base_startup_failed_0;
-
- LIBCFS_ALLOC (pt->upt_idx2conn,
- sizeof(usock_conn_t *) * UPT_START_SIZ);
- if (pt->upt_idx2conn == NULL)
- goto base_startup_failed_1;
-
- LIBCFS_ALLOC (pt->upt_fd2idx,
- sizeof(int) * UPT_START_SIZ);
- if (pt->upt_fd2idx == NULL)
- goto base_startup_failed_2;
-
- memset(pt->upt_fd2idx, 0,
- sizeof(int) * UPT_START_SIZ);
-
- LIBCFS_ALLOC (pt->upt_skip,
- sizeof(int) * UPT_START_SIZ);
- if (pt->upt_skip == NULL)
- goto base_startup_failed_3;
-
- pt->upt_npollfd = pt->upt_nfd2idx = UPT_START_SIZ;
-
- rc = libcfs_socketpair(notifier);
- if (rc != 0)
- goto base_startup_failed_4;
-
- pt->upt_notifier_fd = notifier[0];
-
- pt->upt_pollfd[0].fd = notifier[1];
- pt->upt_pollfd[0].events = POLLIN;
- pt->upt_pollfd[0].revents = 0;
-
- pt->upt_nfds = 1;
- pt->upt_idx2conn[0] = NULL;
-
- pt->upt_errno = 0;
- CFS_INIT_LIST_HEAD (&pt->upt_pollrequests);
- CFS_INIT_LIST_HEAD (&pt->upt_stale_list);
- pthread_mutex_init(&pt->upt_pollrequests_lock, NULL);
- cfs_init_completion(&pt->upt_completion);
- }
-
- /* Initialize peer hash list */
- for (i = 0; i < UD_PEER_HASH_SIZE; i++)
- CFS_INIT_LIST_HEAD(&usock_data.ud_peers[i]);
-
- pthread_rwlock_init(&usock_data.ud_peers_lock, NULL);
-
- /* Spawn poll threads */
- for (i = 0; i < usock_data.ud_npollthreads; i++) {
- rc = cfs_create_thread(usocklnd_poll_thread,
- &usock_data.ud_pollthreads[i]);
- if (rc) {
- usocklnd_base_shutdown(i);
- return rc;
- }
- }
-
- usock_data.ud_state = UD_STATE_INITIALIZED;
-
- return 0;
-
- base_startup_failed_4:
- LIBCFS_FREE (pt->upt_skip, sizeof(int) * UPT_START_SIZ);
- base_startup_failed_3:
- LIBCFS_FREE (pt->upt_fd2idx, sizeof(int) * UPT_START_SIZ);
- base_startup_failed_2:
- LIBCFS_FREE (pt->upt_idx2conn, sizeof(usock_conn_t *) * UPT_START_SIZ);
- base_startup_failed_1:
- LIBCFS_FREE (pt->upt_pollfd, sizeof(struct pollfd) * UPT_START_SIZ);
- base_startup_failed_0:
- LASSERT(rc != 0);
- usocklnd_release_poll_states(i);
- LIBCFS_FREE (usock_data.ud_pollthreads,
- usock_data.ud_npollthreads *
- sizeof(usock_pollthread_t));
- return rc;
-}
-
-void
-usocklnd_base_shutdown(int n)
-{
- int i;
-
- usock_data.ud_shutdown = 1;
- for (i = 0; i < n; i++) {
- usock_pollthread_t *pt = &usock_data.ud_pollthreads[i];
- usocklnd_wakeup_pollthread(i);
- cfs_wait_for_completion(&pt->upt_completion);
- }
-
- pthread_rwlock_destroy(&usock_data.ud_peers_lock);
-
- usocklnd_release_poll_states(usock_data.ud_npollthreads);
-
- LIBCFS_FREE (usock_data.ud_pollthreads,
- usock_data.ud_npollthreads *
- sizeof(usock_pollthread_t));
-
- usock_data.ud_state = UD_STATE_INIT_NOTHING;
-}
-
-__u64
-usocklnd_new_incarnation()
-{
- struct timeval tv;
- int rc = gettimeofday(&tv, NULL);
- LASSERT (rc == 0);
- return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-}
-
-static int
-usocklnd_assign_ni_nid(lnet_ni_t *ni)
-{
- int rc;
- int up;
- __u32 ipaddr;
-
- /* Find correct IP-address and update ni_nid with it.
- * Two cases are supported:
- * 1) no explicit interfaces are defined. NID will be assigned to
- * first non-lo interface that is up;
- * 2) exactly one explicit interface is defined. For example,
- * LNET_NETWORKS='tcp(eth0)' */
-
- if (ni->ni_interfaces[0] == NULL) {
- char **names;
- int i, n;
-
- n = libcfs_ipif_enumerate(&names);
- if (n <= 0) {
- CERROR("Can't enumerate interfaces: %d\n", n);
- return -1;
- }
-
- for (i = 0; i < n; i++) {
-
- if (!strcmp(names[i], "lo")) /* skip the loopback IF */
- continue;
-
- rc = libcfs_ipif_query(names[i], &up, &ipaddr);
- if (rc != 0) {
- CWARN("Can't get interface %s info: %d\n",
- names[i], rc);
- continue;
- }
-
- if (!up) {
- CWARN("Ignoring interface %s (down)\n",
- names[i]);
- continue;
- }
-
- break; /* one address is quite enough */
- }
-
- libcfs_ipif_free_enumeration(names, n);
-
- if (i >= n) {
- CERROR("Can't find any usable interfaces\n");
- return -1;
- }
-
- CDEBUG(D_NET, "No explicit interfaces defined. "
- "%u.%u.%u.%u used\n", HIPQUAD(ipaddr));
- } else {
- if (ni->ni_interfaces[1] != NULL) {
- CERROR("only one explicit interface is allowed\n");
- return -1;
- }
-
- rc = libcfs_ipif_query(ni->ni_interfaces[0], &up, &ipaddr);
- if (rc != 0) {
- CERROR("Can't get interface %s info: %d\n",
- ni->ni_interfaces[0], rc);
- return -1;
- }
-
- if (!up) {
- CERROR("Explicit interface defined: %s but is down\n",
- ni->ni_interfaces[0]);
- return -1;
- }
-
- CDEBUG(D_NET, "Explicit interface defined: %s. "
- "%u.%u.%u.%u used\n",
- ni->ni_interfaces[0], HIPQUAD(ipaddr));
-
- }
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ipaddr);
-
- return 0;
-}
-
-int
-usocklnd_startup(lnet_ni_t *ni)
-{
- int rc;
- usock_net_t *net;
-
- if (usock_data.ud_state == UD_STATE_INIT_NOTHING) {
- rc = usocklnd_base_startup();
- if (rc != 0)
- return rc;
- }
-
- LIBCFS_ALLOC(net, sizeof(*net));
- if (net == NULL)
- goto startup_failed_0;
-
- memset(net, 0, sizeof(*net));
- net->un_incarnation = usocklnd_new_incarnation();
- pthread_mutex_init(&net->un_lock, NULL);
- pthread_cond_init(&net->un_cond, NULL);
-
- ni->ni_data = net;
-
- if (!(the_lnet.ln_pid & LNET_PID_USERFLAG)) {
- rc = usocklnd_assign_ni_nid(ni);
- if (rc != 0)
- goto startup_failed_1;
- }
-
- LASSERT (ni->ni_lnd == &the_tcplnd);
-
- ni->ni_maxtxcredits = usock_tuns.ut_txcredits;
- ni->ni_peertxcredits = usock_tuns.ut_peertxcredits;
-
- usock_data.ud_nets_count++;
- return 0;
-
- startup_failed_1:
- pthread_mutex_destroy(&net->un_lock);
- pthread_cond_destroy(&net->un_cond);
- LIBCFS_FREE(net, sizeof(*net));
- startup_failed_0:
- if (usock_data.ud_nets_count == 0)
- usocklnd_base_shutdown(usock_data.ud_npollthreads);
-
- return -ENETDOWN;
-}
-
-void
-usocklnd_shutdown(lnet_ni_t *ni)
-{
- usock_net_t *net = ni->ni_data;
-
- net->un_shutdown = 1;
-
- usocklnd_del_all_peers(ni);
-
- /* Wait for all peer state to clean up */
- pthread_mutex_lock(&net->un_lock);
- while (net->un_peercount != 0)
- pthread_cond_wait(&net->un_cond, &net->un_lock);
- pthread_mutex_unlock(&net->un_lock);
-
- /* Release usock_net_t structure */
- pthread_mutex_destroy(&net->un_lock);
- pthread_cond_destroy(&net->un_cond);
- LIBCFS_FREE(net, sizeof(*net));
-
- usock_data.ud_nets_count--;
- if (usock_data.ud_nets_count == 0)
- usocklnd_base_shutdown(usock_data.ud_npollthreads);
-}
-
-void
-usocklnd_del_all_peers(lnet_ni_t *ni)
-{
- struct list_head *ptmp;
- struct list_head *pnxt;
- usock_peer_t *peer;
- int i;
-
- pthread_rwlock_wrlock(&usock_data.ud_peers_lock);
-
- for (i = 0; i < UD_PEER_HASH_SIZE; i++) {
- list_for_each_safe (ptmp, pnxt, &usock_data.ud_peers[i]) {
- peer = list_entry (ptmp, usock_peer_t, up_list);
-
- if (peer->up_ni != ni)
- continue;
-
- usocklnd_del_peer_and_conns(peer);
- }
- }
-
- pthread_rwlock_unlock(&usock_data.ud_peers_lock);
-
- /* wakeup all threads */
- for (i = 0; i < usock_data.ud_npollthreads; i++)
- usocklnd_wakeup_pollthread(i);
-}
-
-void
-usocklnd_del_peer_and_conns(usock_peer_t *peer)
-{
- /* peer cannot disappear because it's still in hash list */
-
- pthread_mutex_lock(&peer->up_lock);
- /* content of conn[] array cannot change now */
- usocklnd_del_conns_locked(peer);
- pthread_mutex_unlock(&peer->up_lock);
-
- /* peer hash list is still protected by the caller */
- list_del(&peer->up_list);
-
- usocklnd_peer_decref(peer); /* peer isn't in hash list anymore */
-}
-
-void
-usocklnd_del_conns_locked(usock_peer_t *peer)
-{
- int i;
-
- for (i=0; i < N_CONN_TYPES; i++) {
- usock_conn_t *conn = peer->up_conns[i];
- if (conn != NULL)
- usocklnd_conn_kill(conn);
- }
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Maxim Patlasov <maxim@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- */
-#define _GNU_SOURCE
-#include <pthread.h>
-#include <poll.h>
-#include <lnet/lib-lnet.h>
-#include <lnet/socklnd.h>
-
-typedef struct {
- struct list_head tx_list; /* neccessary to form tx list */
- lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */
- ksock_msg_t tx_msg; /* buffer for wire header of ksock msg */
- int tx_resid; /* # of residual bytes */
- int tx_nob; /* # of packet bytes */
- int tx_size; /* size of this descriptor */
- struct iovec *tx_iov; /* points to tx_iova[i] */
- int tx_niov; /* # of packet iovec frags */
- struct iovec tx_iova[1]; /* iov for header */
-} usock_tx_t;
-
-struct usock_peer_s;
-
-typedef struct {
- int uc_fd; /* socket */
- int uc_type; /* conn type */
- int uc_activeflag; /* active side of connection? */
- int uc_flip; /* is peer other endian? */
- int uc_state; /* connection state */
- struct usock_peer_s *uc_peer; /* owning peer */
- lnet_process_id_t uc_peerid; /* id of remote peer */
- int uc_pt_idx; /* index in ud_pollthreads[] of
- * owning poll thread */
- lnet_ni_t *uc_ni; /* parent NI while accepting */
- struct usock_preq_s *uc_preq; /* preallocated request */
- __u32 uc_peer_ip; /* IP address of the peer */
- __u16 uc_peer_port; /* port of the peer */
- struct list_head uc_stale_list; /* orphaned connections */
-
- /* Receive state */
- int uc_rx_state; /* message or hello state */
- ksock_hello_msg_t *uc_rx_hello; /* hello buffer */
- struct iovec *uc_rx_iov; /* points to uc_rx_iova[i] */
- struct iovec uc_rx_iova[LNET_MAX_IOV]; /* message frags */
- int uc_rx_niov; /* # frags */
- int uc_rx_nob_left; /* # bytes to next hdr/body */
- int uc_rx_nob_wanted; /* # of bytes actually wanted */
- void *uc_rx_lnetmsg; /* LNET message being received */
- cfs_time_t uc_rx_deadline; /* when to time out */
- int uc_rx_flag; /* deadline valid? */
- ksock_msg_t uc_rx_msg; /* message buffer */
-
- /* Send state */
- struct list_head uc_tx_list; /* pending txs */
- struct list_head uc_zcack_list; /* pending zc_acks */
- cfs_time_t uc_tx_deadline; /* when to time out */
- int uc_tx_flag; /* deadline valid? */
- int uc_sending; /* send op is in progress */
- usock_tx_t *uc_tx_hello; /* fake tx with hello */
-
- cfs_atomic_t uc_refcount; /* # of users */
- pthread_mutex_t uc_lock; /* serialize */
- int uc_errored; /* a flag for lnet_notify() */
-} usock_conn_t;
-
-/* Allowable conn states are: */
-#define UC_CONNECTING 1
-#define UC_SENDING_HELLO 2
-#define UC_RECEIVING_HELLO 3
-#define UC_READY 4
-#define UC_DEAD 5
-
-/* Allowable RX states are: */
-#define UC_RX_HELLO_MAGIC 1
-#define UC_RX_HELLO_VERSION 2
-#define UC_RX_HELLO_BODY 3
-#define UC_RX_HELLO_IPS 4
-#define UC_RX_KSM_HEADER 5
-#define UC_RX_LNET_HEADER 6
-#define UC_RX_PARSE 7
-#define UC_RX_PARSE_WAIT 8
-#define UC_RX_LNET_PAYLOAD 9
-#define UC_RX_SKIPPING 10
-
-#define N_CONN_TYPES 3 /* CONTROL, BULK_IN and BULK_OUT */
-
-typedef struct usock_peer_s {
- struct list_head up_list; /* neccessary to form peer list */
- lnet_process_id_t up_peerid; /* id of remote peer */
- usock_conn_t *up_conns[N_CONN_TYPES]; /* conns that connect us
- * us with the peer */
- lnet_ni_t *up_ni; /* pointer to parent NI */
- __u64 up_incarnation; /* peer's incarnation */
- int up_incrn_is_set; /* 0 if peer's incarnation
- * hasn't been set so far */
- cfs_atomic_t up_refcount; /* # of users */
- pthread_mutex_t up_lock; /* serialize */
- int up_errored; /* a flag for lnet_notify() */
- cfs_time_t up_last_alive; /* when the peer was last alive */
-} usock_peer_t;
-
-typedef struct {
- int upt_notifier_fd; /* notifier fd for writing */
- struct pollfd *upt_pollfd; /* poll fds */
- int upt_nfds; /* active poll fds */
- int upt_npollfd; /* allocated poll fds */
- usock_conn_t **upt_idx2conn; /* conns corresponding to
- * upt_pollfd[idx] */
- int *upt_skip; /* skip chain */
- int *upt_fd2idx; /* index into upt_pollfd[]
- * by fd */
- int upt_nfd2idx; /* # of allocated elements
- * of upt_fd2idx[] */
- struct list_head upt_stale_list; /* list of orphaned conns */
- struct list_head upt_pollrequests; /* list of poll requests */
- pthread_mutex_t upt_pollrequests_lock; /* serialize */
- int upt_errno; /* non-zero if errored */
- struct cfs_completion upt_completion; /* wait/signal facility for
- * syncronizing shutdown */
-} usock_pollthread_t;
-
-/* Number of elements in upt_pollfd[], upt_idx2conn[] and upt_fd2idx[]
- * at initialization time. Will be resized on demand */
-#define UPT_START_SIZ 32
-
-/* # peer lists */
-#define UD_PEER_HASH_SIZE 101
-
-typedef struct {
- int ud_state; /* initialization state */
- int ud_npollthreads; /* # of poll threads */
- usock_pollthread_t *ud_pollthreads; /* their state */
- int ud_shutdown; /* shutdown flag */
- int ud_nets_count; /* # of instances */
- struct list_head ud_peers[UD_PEER_HASH_SIZE]; /* peer hash table */
- pthread_rwlock_t ud_peers_lock; /* serialize */
-} usock_data_t;
-
-extern usock_data_t usock_data;
-
-/* ud_state allowed values */
-#define UD_STATE_INIT_NOTHING 0
-#define UD_STATE_INITIALIZED 1
-
-typedef struct {
- int un_peercount; /* # of peers */
- int un_shutdown; /* shutdown flag */
- __u64 un_incarnation; /* my epoch */
- pthread_cond_t un_cond; /* condvar to wait for notifications */
- pthread_mutex_t un_lock; /* a lock to protect un_cond */
-} usock_net_t;
-
-typedef struct {
- int ut_poll_timeout; /* the third arg for poll(2) (seconds) */
- int ut_timeout; /* "stuck" socket timeout (seconds) */
- int ut_npollthreads; /* number of poll thread to spawn */
- int ut_fair_limit; /* how many packets can we receive or transmit
- * without calling poll(2) */
- int ut_min_bulk; /* smallest "large" message */
- int ut_txcredits; /* # concurrent sends */
- int ut_peertxcredits; /* # concurrent sends to 1 peer */
- int ut_socknagle; /* Is Nagle alg on ? */
- int ut_sockbufsiz; /* size of socket buffers */
-} usock_tunables_t;
-
-extern usock_tunables_t usock_tuns;
-
-typedef struct usock_preq_s {
- int upr_type; /* type of requested action */
- short upr_value; /* bitmask of POLLIN and POLLOUT bits */
- usock_conn_t * upr_conn; /* a conn for the sake of which
- * action will be performed */
- struct list_head upr_list; /* neccessary to form list */
-} usock_pollrequest_t;
-
-/* Allowable poll request types are: */
-#define POLL_ADD_REQUEST 1
-#define POLL_DEL_REQUEST 2
-#define POLL_RX_SET_REQUEST 3
-#define POLL_TX_SET_REQUEST 4
-#define POLL_SET_REQUEST 5
-
-typedef struct {
- struct list_head zc_list; /* neccessary to form zc_ack list */
- __u64 zc_cookie; /* zero-copy cookie */
-} usock_zc_ack_t;
-
-static inline void
-usocklnd_conn_addref(usock_conn_t *conn)
-{
- LASSERT (cfs_atomic_read(&conn->uc_refcount) > 0);
- cfs_atomic_inc(&conn->uc_refcount);
-}
-
-void usocklnd_destroy_conn(usock_conn_t *conn);
-
-static inline void
-usocklnd_conn_decref(usock_conn_t *conn)
-{
- LASSERT (cfs_atomic_read(&conn->uc_refcount) > 0);
- if (cfs_atomic_dec_and_test(&conn->uc_refcount))
- usocklnd_destroy_conn(conn);
-}
-
-static inline void
-usocklnd_peer_addref(usock_peer_t *peer)
-{
- LASSERT (cfs_atomic_read(&peer->up_refcount) > 0);
- cfs_atomic_inc(&peer->up_refcount);
-}
-
-void usocklnd_destroy_peer(usock_peer_t *peer);
-
-static inline void
-usocklnd_peer_decref(usock_peer_t *peer)
-{
- LASSERT (cfs_atomic_read(&peer->up_refcount) > 0);
- if (cfs_atomic_dec_and_test(&peer->up_refcount))
- usocklnd_destroy_peer(peer);
-}
-
-static inline int
-usocklnd_ip2pt_idx(__u32 ip) {
- return ip % usock_data.ud_npollthreads;
-}
-
-static inline struct list_head *
-usocklnd_nid2peerlist(lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % UD_PEER_HASH_SIZE;
-
- return &usock_data.ud_peers[hash];
-}
-
-int usocklnd_startup(lnet_ni_t *ni);
-void usocklnd_shutdown(lnet_ni_t *ni);
-int usocklnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int usocklnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-int usocklnd_accept(lnet_ni_t *ni, int sock_fd);
-
-int usocklnd_poll_thread(void *arg);
-int usocklnd_add_pollrequest(usock_conn_t *conn, int type, short value);
-void usocklnd_add_killrequest(usock_conn_t *conn);
-int usocklnd_process_pollrequest(usock_pollrequest_t *pr,
- usock_pollthread_t *pt_data);
-void usocklnd_execute_handlers(usock_pollthread_t *pt_data);
-int usocklnd_calculate_chunk_size(int num);
-void usocklnd_wakeup_pollthread(int i);
-
-int usocklnd_notifier_handler(int fd);
-void usocklnd_exception_handler(usock_conn_t *conn);
-int usocklnd_read_handler(usock_conn_t *conn);
-int usocklnd_read_msg(usock_conn_t *conn, int *cont_flag);
-int usocklnd_handle_zc_req(usock_peer_t *peer, __u64 cookie);
-int usocklnd_read_hello(usock_conn_t *conn, int *cont_flag);
-int usocklnd_activeconn_hellorecv(usock_conn_t *conn);
-int usocklnd_passiveconn_hellorecv(usock_conn_t *conn);
-int usocklnd_write_handler(usock_conn_t *conn);
-usock_tx_t * usocklnd_try_piggyback(struct list_head *tx_list_p,
- struct list_head *zcack_list_p);
-int usocklnd_activeconn_hellosent(usock_conn_t *conn);
-int usocklnd_passiveconn_hellosent(usock_conn_t *conn);
-int usocklnd_send_tx(usock_conn_t *conn, usock_tx_t *tx);
-int usocklnd_read_data(usock_conn_t *conn);
-
-void usocklnd_release_poll_states(int n);
-int usocklnd_base_startup();
-void usocklnd_base_shutdown(int n);
-__u64 usocklnd_new_incarnation();
-void usocklnd_del_all_peers(lnet_ni_t *ni);
-void usocklnd_del_peer_and_conns(usock_peer_t *peer);
-void usocklnd_del_conns_locked(usock_peer_t *peer);
-
-int usocklnd_conn_timed_out(usock_conn_t *conn, cfs_time_t current_time);
-void usocklnd_conn_kill(usock_conn_t *conn);
-void usocklnd_conn_kill_locked(usock_conn_t *conn);
-usock_conn_t *usocklnd_conn_allocate();
-void usocklnd_conn_free(usock_conn_t *conn);
-void usocklnd_tear_peer_conn(usock_conn_t *conn);
-void usocklnd_check_peer_stale(lnet_ni_t *ni, lnet_process_id_t id);
-int usocklnd_create_passive_conn(lnet_ni_t *ni, int fd, usock_conn_t **connp);
-int usocklnd_create_active_conn(usock_peer_t *peer, int type,
- usock_conn_t **connp);
-int usocklnd_connect_srv_mode(int *fdp, __u32 dst_ip, __u16 dst_port);
-int usocklnd_connect_cli_mode(int *fdp, __u32 dst_ip, __u16 dst_port);
-int usocklnd_set_sock_options(int fd);
-void usocklnd_init_msg(ksock_msg_t *msg, int type);
-usock_tx_t *usocklnd_create_noop_tx(__u64 cookie);
-usock_tx_t *usocklnd_create_tx(lnet_msg_t *lntmsg);
-void usocklnd_init_hello_msg(ksock_hello_msg_t *hello,
- lnet_ni_t *ni, int type, lnet_nid_t peer_nid);
-usock_tx_t *usocklnd_create_hello_tx(lnet_ni_t *ni,
- int type, lnet_nid_t peer_nid);
-usock_tx_t *usocklnd_create_cr_hello_tx(lnet_ni_t *ni,
- int type, lnet_nid_t peer_nid);
-void usocklnd_destroy_tx(lnet_ni_t *ni, usock_tx_t *tx);
-void usocklnd_destroy_txlist(lnet_ni_t *ni, struct list_head *txlist);
-void usocklnd_destroy_zcack_list(struct list_head *zcack_list);
-void usocklnd_destroy_peer (usock_peer_t *peer);
-int usocklnd_get_conn_type(lnet_msg_t *lntmsg);
-int usocklnd_type2idx(int type);
-usock_peer_t *usocklnd_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id);
-int usocklnd_create_peer(lnet_ni_t *ni, lnet_process_id_t id,
- usock_peer_t **peerp);
-int usocklnd_find_or_create_peer(lnet_ni_t *ni, lnet_process_id_t id,
- usock_peer_t **peerp);
-int usocklnd_find_or_create_conn(usock_peer_t *peer, int type,
- usock_conn_t **connp,
- usock_tx_t *tx, usock_zc_ack_t *zc_ack,
- int *send_immediately_flag);
-void usocklnd_link_conn_to_peer(usock_conn_t *conn, usock_peer_t *peer, int idx);
-int usocklnd_invert_type(int type);
-void usocklnd_conn_new_state(usock_conn_t *conn, int new_state);
-void usocklnd_cleanup_stale_conns(usock_peer_t *peer, __u64 incrn,
- usock_conn_t *skip_conn);
-
-void usocklnd_rx_hellomagic_state_transition(usock_conn_t *conn);
-void usocklnd_rx_helloversion_state_transition(usock_conn_t *conn);
-void usocklnd_rx_hellobody_state_transition(usock_conn_t *conn);
-void usocklnd_rx_helloIPs_state_transition(usock_conn_t *conn);
-void usocklnd_rx_lnethdr_state_transition(usock_conn_t *conn);
-void usocklnd_rx_ksmhdr_state_transition(usock_conn_t *conn);
-void usocklnd_rx_skipping_state_transition(usock_conn_t *conn);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Maxim Patlasov <maxim@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- */
-
-#include "usocklnd.h"
-
-static int
-usocklnd_send_tx_immediately(usock_conn_t *conn, usock_tx_t *tx)
-{
- int rc;
- int rc2;
- int partial_send = 0;
- usock_peer_t *peer = conn->uc_peer;
-
- LASSERT (peer != NULL);
-
- /* usocklnd_enqueue_tx() turned it on for us */
- LASSERT(conn->uc_sending);
-
- //counter_imm_start++;
- rc = usocklnd_send_tx(conn, tx);
- if (rc == 0) { /* partial send or connection closed */
- pthread_mutex_lock(&conn->uc_lock);
- list_add(&tx->tx_list, &conn->uc_tx_list);
- conn->uc_sending = 0;
- pthread_mutex_unlock(&conn->uc_lock);
- partial_send = 1;
- } else {
- usocklnd_destroy_tx(peer->up_ni, tx);
- /* NB: lnetmsg was finalized, so we *must* return 0 */
-
- if (rc < 0) { /* real error */
- usocklnd_conn_kill(conn);
- return 0;
- }
-
- /* rc == 1: tx was sent completely */
- rc = 0; /* let's say to caller 'Ok' */
- //counter_imm_complete++;
- }
-
- pthread_mutex_lock(&conn->uc_lock);
- conn->uc_sending = 0;
-
- /* schedule write handler */
- if (partial_send ||
- (conn->uc_state == UC_READY &&
- (!list_empty(&conn->uc_tx_list) ||
- !list_empty(&conn->uc_zcack_list)))) {
- conn->uc_tx_deadline =
- cfs_time_shift(usock_tuns.ut_timeout);
- conn->uc_tx_flag = 1;
- rc2 = usocklnd_add_pollrequest(conn, POLL_TX_SET_REQUEST, POLLOUT);
- if (rc2 != 0)
- usocklnd_conn_kill_locked(conn);
- else
- usocklnd_wakeup_pollthread(conn->uc_pt_idx);
- }
-
- pthread_mutex_unlock(&conn->uc_lock);
-
- return rc;
-}
-
-int
-usocklnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- usock_tx_t *tx;
- lnet_process_id_t target = lntmsg->msg_target;
- usock_peer_t *peer;
- int type;
- int rc;
- usock_conn_t *conn;
- int send_immediately;
-
- tx = usocklnd_create_tx(lntmsg);
- if (tx == NULL)
- return -ENOMEM;
-
- rc = usocklnd_find_or_create_peer(ni, target, &peer);
- if (rc) {
- LIBCFS_FREE (tx, tx->tx_size);
- return rc;
- }
- /* peer cannot disappear now because its refcount was incremented */
-
- type = usocklnd_get_conn_type(lntmsg);
- rc = usocklnd_find_or_create_conn(peer, type, &conn, tx, NULL,
- &send_immediately);
- if (rc != 0) {
- usocklnd_peer_decref(peer);
- usocklnd_check_peer_stale(ni, target);
- LIBCFS_FREE (tx, tx->tx_size);
- return rc;
- }
- /* conn cannot disappear now because its refcount was incremented */
-
- if (send_immediately)
- rc = usocklnd_send_tx_immediately(conn, tx);
-
- usocklnd_conn_decref(conn);
- usocklnd_peer_decref(peer);
- return rc;
-}
-
-int
-usocklnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- int rc = 0;
- usock_conn_t *conn = (usock_conn_t *)private;
-
- /* I don't think that we'll win much concurrency moving lock()
- * call below lnet_extract_iov() */
- pthread_mutex_lock(&conn->uc_lock);
-
- conn->uc_rx_lnetmsg = msg;
- conn->uc_rx_nob_wanted = mlen;
- conn->uc_rx_nob_left = rlen;
- conn->uc_rx_iov = conn->uc_rx_iova;
- conn->uc_rx_niov =
- lnet_extract_iov(LNET_MAX_IOV, conn->uc_rx_iov,
- niov, iov, offset, mlen);
-
- /* the gap between lnet_parse() and usocklnd_recv() happened? */
- if (conn->uc_rx_state == UC_RX_PARSE_WAIT) {
- conn->uc_rx_flag = 1; /* waiting for incoming lnet payload */
- conn->uc_rx_deadline =
- cfs_time_shift(usock_tuns.ut_timeout);
- rc = usocklnd_add_pollrequest(conn, POLL_RX_SET_REQUEST, POLLIN);
- if (rc != 0) {
- usocklnd_conn_kill_locked(conn);
- goto recv_out;
- }
- usocklnd_wakeup_pollthread(conn->uc_pt_idx);
- }
-
- conn->uc_rx_state = UC_RX_LNET_PAYLOAD;
- recv_out:
- pthread_mutex_unlock(&conn->uc_lock);
- usocklnd_conn_decref(conn);
- return rc;
-}
-
-int
-usocklnd_accept(lnet_ni_t *ni, int sock_fd)
-{
- int rc;
- usock_conn_t *conn;
-
- rc = usocklnd_create_passive_conn(ni, sock_fd, &conn);
- if (rc)
- return rc;
- LASSERT(conn != NULL);
-
- /* disable shutdown event temporarily */
- lnet_ni_addref(ni);
-
- rc = usocklnd_add_pollrequest(conn, POLL_ADD_REQUEST, POLLIN);
- if (rc == 0)
- usocklnd_wakeup_pollthread(conn->uc_pt_idx);
-
- /* NB: conn reference counter was incremented while adding
- * poll request if rc == 0 */
-
- usocklnd_conn_decref(conn); /* should destroy conn if rc != 0 */
- return rc;
-}
+++ /dev/null
-Makefile
-Makefile.in
-acceptor
-debugctl
-ptlctl
-.deps
-routerstat
-wirecheck
-gmlndnid
-lst
-lstclient
-.*.cmd
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-## $(srcdir)/../ for <portals/*.h>, ../../ for generated <config.h>
-#COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I../../include
-#LINK = $(CC) -o $@
-
-if LIBLUSTRE
-noinst_LIBRARIES = libuptlctl.a
-endif
-
-libuptlctl_a_SOURCES = portals.c nidstrings.c debug.c l_ioctl.c
-libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS)
-libuptlctl_a_CFLAGS = $(LLCFLAGS) -DLUSTRE_UTILS=1
-
-sbin_PROGRAMS = debugctl
-
-lib_LIBRARIES = libptlctl.a
-
-if LIBLUSTRE
-noinst_LIBRARIES += liblst.a
-liblst_a_SOURCES =
-endif
-
-libptlctl_a_SOURCES = portals.c nidstrings.c debug.c l_ioctl.c parser.c parser.h
-
-if UTILS
-sbin_PROGRAMS += ptlctl routerstat wirecheck lst
-if LIBLUSTRE
-sbin_PROGRAMS += lstclient
-endif
-
-if BUILD_GMLND
-sbin_PROGRAMS += gmlndnid
-endif
-endif
-
-wirecheck_SOURCES = wirecheck.c
-
-gmlndnid_SOURCES = gmlndnid.c
-gmlndnid_CFLAGS = $(GMCPPFLAGS)
-gmlndnid_LDFLAGS = -static
-gmlndnid_LDADD = $(GMLIBS) -lgm
-
-ptlctl_SOURCES = ptlctl.c
-ptlctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
-ptlctl_DEPENDENCIES = libptlctl.a
-
-routerstat_SOURCES = routerstat.c
-
-debugctl_SOURCES = debugctl.c
-debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
-debugctl_DEPENDENCIES = libptlctl.a
-
-lst_SOURCES = lst.c
-lst_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
-lst_DEPENDENCIES = libptlctl.a
-
-LND_LIBS =
-if BUILD_USOCKLND
-LND_LIBS += $(top_builddir)/lnet/ulnds/socklnd/libsocklnd.a
-endif
-if BUILD_UPTLLND
-LND_LIBS += $(top_builddir)/lnet/ulnds/ptllnd/libptllnd.a
-endif
-
-if LIBLUSTRE
-LIB_SELFTEST = $(top_builddir)/lnet/libcfs/libcfs.a $(top_builddir)/lnet/lnet/liblnet.a $(top_builddir)/lnet/selftest/libselftest.a
-liblst.a : $(LIB_SELFTEST) $(LND_LIBS)
- sh $(srcdir)/genlib.sh "$(LIBS)" "$(LND_LIBS)" "$(PTHREAD_LIBS)"
-
-lstclient_SOURCES = lstclient.c
-lstclient_LDADD = -L. -lptlctl -llst $(LIBREADLINE) $(LIBEFENCE) $(PTHREAD_LIBS)
-lstclient_DEPENDENCIES = libptlctl.a liblst.a
-endif
-
-nidstrings.c: @top_srcdir@/lnet/libcfs/nidstrings.c
- ln -sf $< $@
-
-EXTRA_DIST = genlib.sh
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Lustre Networking, http://www.lustre.org.
- *
- * LNET is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * LNET is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with LNET; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Some day I'll split all of this functionality into a cfs_debug module
- * of its own. That day is not today.
- *
- */
-
-#define __USE_FILE_OFFSET64
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#ifdef HAVE_NETDB_H
-#include <netdb.h>
-#endif
-#include <stdlib.h>
-#include <string.h>
-#ifdef HAVE_SYS_IOCTL_H
-#include <sys/ioctl.h>
-#endif
-#ifndef _IOWR
-#include "ioctl.h"
-#endif
-#include <fcntl.h>
-#include <errno.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/utsname.h>
-
-#include <lnet/api-support.h>
-#include <lnet/lnetctl.h>
-#include <libcfs/portals_utils.h>
-#include "parser.h"
-
-#include <time.h>
-
-static char rawbuf[8192];
-static char *buf = rawbuf;
-static int max = 8192;
-/*static int g_pfd = -1;*/
-static int subsystem_mask = ~0;
-static int debug_mask = ~0;
-
-#define MAX_MARK_SIZE 256
-
-static const char *libcfs_debug_subsystems[] =
- {"undefined", "mdc", "mds", "osc",
- "ost", "class", "log", "llite",
- "rpc", "mgmt", "lnet", "lnd",
- "pinger", "filter", "", "echo",
- "ldlm", "lov", "", "",
- "", "", "", "lmv",
- "", "sec", "gss", "",
- "mgc", "mgs", "fid", "fld", NULL};
-static const char *libcfs_debug_masks[] =
- {"trace", "inode", "super", "ext2",
- "malloc", "cache", "info", "ioctl",
- "neterror", "net", "warning", "buffs",
- "other", "dentry", "nettrace", "page",
- "dlmtrace", "error", "emerg", "ha",
- "rpctrace", "vfstrace", "reada", "mmap",
- "config", "console", "quota", "sec", NULL};
-
-struct debug_daemon_cmd {
- char *cmd;
- unsigned int cmdv;
-};
-
-static const struct debug_daemon_cmd libcfs_debug_daemon_cmd[] = {
- {"start", DEBUG_DAEMON_START},
- {"stop", DEBUG_DAEMON_STOP},
- {0, 0}
-};
-
-#ifdef __linux__
-
-#define DAEMON_CTL_NAME "/proc/sys/lnet/daemon_file"
-#define SUBSYS_DEBUG_CTL_NAME "/proc/sys/lnet/subsystem_debug"
-#define DEBUG_CTL_NAME "/proc/sys/lnet/debug"
-#define DUMP_KERNEL_CTL_NAME "/proc/sys/lnet/dump_kernel"
-
-static int
-dbg_open_ctlhandle(const char *str)
-{
- int fd;
- fd = open(str, O_WRONLY);
- if (fd < 0) {
- fprintf(stderr, "open %s failed: %s\n", str,
- strerror(errno));
- return -1;
- }
- return fd;
-}
-
-static void
-dbg_close_ctlhandle(int fd)
-{
- close(fd);
-}
-
-static int
-dbg_write_cmd(int fd, char *str, int len)
-{
- int rc = write(fd, str, len);
-
- return (rc == len ? 0 : 1);
-}
-
-#elif defined(__DARWIN__)
-
-#define DAEMON_CTL_NAME "lnet.trace_daemon"
-#define SUBSYS_DEBUG_CTL_NAME "lnet.subsystem_debug"
-#define DEBUG_CTL_NAME "lnet.debug"
-#define DUMP_KERNEL_CTL_NAME "lnet.trace_dumpkernel"
-
-static char sysctl_name[128];
-static int
-dbg_open_ctlhandle(const char *str)
-{
-
- if (strlen(str)+1 > 128) {
- fprintf(stderr, "sysctl name is too long: %s.\n", str);
- return -1;
- }
- strcpy(sysctl_name, str);
-
- return 0;
-}
-
-static void
-dbg_close_ctlhandle(int fd)
-{
- sysctl_name[0] = '\0';
- return;
-}
-
-static int
-dbg_write_cmd(int fd, char *str, int len)
-{
- int rc;
-
- rc = sysctlbyname(sysctl_name, NULL, NULL, str, len+1);
- if (rc != 0) {
- fprintf(stderr, "sysctl %s with cmd (%s) error: %d\n",
- sysctl_name, str, errno);
- }
- return (rc == 0 ? 0: 1);
-}
-
-#else
-#error - Unknown sysctl convention.
-#endif
-
-static int do_debug_mask(char *name, int enable)
-{
- int found = 0, i;
-
- for (i = 0; libcfs_debug_subsystems[i] != NULL; i++) {
- if (strcasecmp(name, libcfs_debug_subsystems[i]) == 0 ||
- strcasecmp(name, "all_subs") == 0) {
- printf("%s output from subsystem \"%s\"\n",
- enable ? "Enabling" : "Disabling",
- libcfs_debug_subsystems[i]);
- if (enable)
- subsystem_mask |= (1 << i);
- else
- subsystem_mask &= ~(1 << i);
- found = 1;
- }
- }
- for (i = 0; libcfs_debug_masks[i] != NULL; i++) {
- if (strcasecmp(name, libcfs_debug_masks[i]) == 0 ||
- strcasecmp(name, "all_types") == 0) {
- printf("%s output of type \"%s\"\n",
- enable ? "Enabling" : "Disabling",
- libcfs_debug_masks[i]);
- if (enable)
- debug_mask |= (1 << i);
- else
- debug_mask &= ~(1 << i);
- found = 1;
- }
- }
-
- return found;
-}
-
-int dbg_initialize(int argc, char **argv)
-{
- return 0;
-}
-
-int jt_dbg_filter(int argc, char **argv)
-{
- int i;
-
- if (argc < 2) {
- fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n",
- argv[0]);
- return 0;
- }
-
- for (i = 1; i < argc; i++)
- if (!do_debug_mask(argv[i], 0))
- fprintf(stderr, "Unknown subsystem or debug type: %s\n",
- argv[i]);
- return 0;
-}
-
-int jt_dbg_show(int argc, char **argv)
-{
- int i;
-
- if (argc < 2) {
- fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n",
- argv[0]);
- return 0;
- }
-
- for (i = 1; i < argc; i++)
- if (!do_debug_mask(argv[i], 1))
- fprintf(stderr, "Unknown subsystem or debug type: %s\n",
- argv[i]);
-
- return 0;
-}
-
-static int applymask(char* procpath, int value)
-{
- int rc;
- char buf[64];
- int len = snprintf(buf, 64, "%d", value);
-
- int fd = dbg_open_ctlhandle(procpath);
- if (fd == -1) {
- fprintf(stderr, "Unable to open %s: %s\n",
- procpath, strerror(errno));
- return fd;
- }
- rc = dbg_write_cmd(fd, buf, len+1);
- if (rc != 0) {
- fprintf(stderr, "Write to %s failed: %s\n",
- procpath, strerror(errno));
- return rc;
- }
- dbg_close_ctlhandle(fd);
- return 0;
-}
-
-static void applymask_all(unsigned int subs_mask, unsigned int debug_mask)
-{
- if (!dump_filename) {
- applymask(SUBSYS_DEBUG_CTL_NAME, subs_mask);
- applymask(DEBUG_CTL_NAME, debug_mask);
- } else {
- struct libcfs_debug_ioctl_data data;
-
- data.hdr.ioc_len = sizeof(data);
- data.hdr.ioc_version = 0;
- data.subs = subs_mask;
- data.debug = debug_mask;
-
- dump(OBD_DEV_ID, LIBCFS_IOC_DEBUG_MASK, &data);
- }
- printf("Applied subsystem_debug=%d, debug=%d to /proc/sys/lnet\n",
- subs_mask, debug_mask);
-}
-
-int jt_dbg_list(int argc, char **argv)
-{
- int i;
-
- if (argc != 2) {
- fprintf(stderr, "usage: %s <subs || types>\n", argv[0]);
- return 0;
- }
-
- if (strcasecmp(argv[1], "subs") == 0) {
- printf("Subsystems: all_subs");
- for (i = 0; libcfs_debug_subsystems[i] != NULL; i++)
- if (libcfs_debug_subsystems[i][0])
- printf(", %s", libcfs_debug_subsystems[i]);
- printf("\n");
- } else if (strcasecmp(argv[1], "types") == 0) {
- printf("Types: all_types");
- for (i = 0; libcfs_debug_masks[i] != NULL; i++)
- printf(", %s", libcfs_debug_masks[i]);
- printf("\n");
- } else if (strcasecmp(argv[1], "applymasks") == 0) {
- applymask_all(subsystem_mask, debug_mask);
- }
- return 0;
-}
-
-/* all strings nul-terminated; only the struct and hdr need to be freed */
-struct dbg_line {
- struct ptldebug_header *hdr;
- char *file;
- char *fn;
- char *text;
-};
-
-static int cmp_rec(const void *p1, const void *p2)
-{
- struct dbg_line *d1 = *(struct dbg_line **)p1;
- struct dbg_line *d2 = *(struct dbg_line **)p2;
-
- if (d1->hdr->ph_sec < d2->hdr->ph_sec)
- return -1;
- if (d1->hdr->ph_sec == d2->hdr->ph_sec &&
- d1->hdr->ph_usec < d2->hdr->ph_usec)
- return -1;
- if (d1->hdr->ph_sec == d2->hdr->ph_sec &&
- d1->hdr->ph_usec == d2->hdr->ph_usec)
- return 0;
- return 1;
-}
-
-static void print_rec(struct dbg_line **linev, int used, FILE *out)
-{
- int i;
-
- for (i = 0; i < used; i++) {
- struct dbg_line *line = linev[i];
- struct ptldebug_header *hdr = line->hdr;
-
- fprintf(out, "%08x:%08x:%u:%u.%06llu:%u:%u:%u:(%s:%u:%s()) %s",
- hdr->ph_subsys, hdr->ph_mask, hdr->ph_cpu_id,
- hdr->ph_sec, (unsigned long long)hdr->ph_usec,
- hdr->ph_stack, hdr->ph_pid, hdr->ph_extern_pid,
- line->file, hdr->ph_line_num, line->fn, line->text);
- free(line->hdr);
- free(line);
- }
- free(linev);
-}
-
-static int add_rec(struct dbg_line *line, struct dbg_line ***linevp, int *lenp,
- int used)
-{
- struct dbg_line **linev = *linevp;
-
- if (used == *lenp) {
- int nlen = *lenp + 512;
- int nsize = nlen * sizeof(struct dbg_line *);
-
- linev = *linevp ? realloc(*linevp, nsize) : malloc(nsize);
- if (!linev)
- return 0;
- *linevp = linev;
- *lenp = nlen;
- }
- linev[used] = line;
- return 1;
-}
-
-static int parse_buffer(FILE *in, FILE *out)
-{
- struct dbg_line *line;
- struct ptldebug_header *hdr;
- char buf[4097], *p;
- int rc;
- unsigned long dropped = 0, kept = 0;
- struct dbg_line **linev = NULL;
- int linev_len = 0;
-
- while (1) {
- rc = fread(buf, sizeof(hdr->ph_len) + sizeof(hdr->ph_flags), 1, in);
- if (rc <= 0)
- break;
-
- hdr = (void *)buf;
- if (hdr->ph_len == 0)
- break;
- if (hdr->ph_len > 4094) {
- fprintf(stderr, "unexpected large record: %d bytes. "
- "aborting.\n",
- hdr->ph_len);
- break;
- }
-
- rc = fread(buf + sizeof(hdr->ph_len) + sizeof(hdr->ph_flags), 1,
- hdr->ph_len - sizeof(hdr->ph_len) - sizeof(hdr->ph_flags), in);
- if (rc <= 0)
- break;
-
- if (hdr->ph_mask &&
- (!(subsystem_mask & hdr->ph_subsys) ||
- (!(debug_mask & hdr->ph_mask)))) {
- dropped++;
- continue;
- }
-
- line = malloc(sizeof(*line));
- if (line == NULL) {
- fprintf(stderr, "malloc failed; printing accumulated "
- "records and exiting.\n");
- break;
- }
-
- line->hdr = malloc(hdr->ph_len + 1);
- if (line->hdr == NULL) {
- free(line);
- fprintf(stderr, "malloc failed; printing accumulated "
- "records and exiting.\n");
- break;
- }
-
- p = (void *)line->hdr;
- memcpy(line->hdr, buf, hdr->ph_len);
- p[hdr->ph_len] = '\0';
-
- p += sizeof(*hdr);
- line->file = p;
- p += strlen(line->file) + 1;
- line->fn = p;
- p += strlen(line->fn) + 1;
- line->text = p;
-
- if (!add_rec(line, &linev, &linev_len, kept)) {
- fprintf(stderr, "malloc failed; printing accumulated "
- "records and exiting.\n");
- break;
- }
- kept++;
- }
-
- if (linev) {
- qsort(linev, kept, sizeof(struct dbg_line *), cmp_rec);
- print_rec(linev, kept, out);
- }
-
- printf("Debug log: %lu lines, %lu kept, %lu dropped.\n",
- dropped + kept, kept, dropped);
- return 0;
-}
-
-int jt_dbg_debug_kernel(int argc, char **argv)
-{
- char filename[4096];
- struct stat st;
- int rc, raw = 0, fd;
- FILE *in, *out = stdout;
-
- if (argc > 3) {
- fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]);
- return 0;
- }
-
- if (argc > 2) {
- raw = atoi(argv[2]);
- } else if (argc > 1 && (argv[1][0] == '0' || argv[1][0] == '1')) {
- raw = atoi(argv[1]);
- argc--;
- }
-
- /* If we are dumping raw (which means no conversion step to ASCII)
- * then dump directly to any supplied filename, otherwise this is
- * just a temp file and we dump to the real file at convert time. */
- if (argc > 1 && raw)
- strcpy(filename, argv[1]);
- else
- sprintf(filename, "/tmp/lustre-log."CFS_TIME_T".%u",
- time(NULL),getpid());
-
- if (stat(filename, &st) == 0 && S_ISREG(st.st_mode))
- unlink(filename);
-
- fd = dbg_open_ctlhandle(DUMP_KERNEL_CTL_NAME);
- if (fd < 0) {
- fprintf(stderr, "open(dump_kernel) failed: %s\n",
- strerror(errno));
- return 1;
- }
-
- rc = dbg_write_cmd(fd, filename, strlen(filename));
- if (rc != 0) {
- fprintf(stderr, "write(%s) failed: %s\n", filename,
- strerror(errno));
- close(fd);
- return 1;
- }
- dbg_close_ctlhandle(fd);
-
- if (raw)
- return 0;
-
- in = fopen(filename, "r");
- if (in == NULL) {
- if (errno == ENOENT) /* no dump file created */
- return 0;
-
- fprintf(stderr, "fopen(%s) failed: %s\n", filename,
- strerror(errno));
- return 1;
- }
- if (argc > 1) {
- out = fopen(argv[1], "w");
- if (out == NULL) {
- fprintf(stderr, "fopen(%s) failed: %s\n", argv[1],
- strerror(errno));
- fclose(in);
- return 1;
- }
- }
-
- rc = parse_buffer(in, out);
- fclose(in);
- if (argc > 1)
- fclose(out);
- if (rc) {
- fprintf(stderr, "parse_buffer failed; leaving tmp file %s "
- "behind.\n", filename);
- } else {
- rc = unlink(filename);
- if (rc)
- fprintf(stderr, "dumped successfully, but couldn't "
- "unlink tmp file %s: %s\n", filename,
- strerror(errno));
- }
- return rc;
-}
-
-int jt_dbg_debug_file(int argc, char **argv)
-{
- int fdin;
- int fdout;
- FILE *in;
- FILE *out = stdout;
- int rc;
-
- if (argc > 3 || argc < 2) {
- fprintf(stderr, "usage: %s <input> [output]\n", argv[0]);
- return 0;
- }
-
- fdin = open(argv[1], O_RDONLY | O_LARGEFILE);
- if (fdin == -1) {
- fprintf(stderr, "open(%s) failed: %s\n", argv[1],
- strerror(errno));
- return 1;
- }
- in = fdopen(fdin, "r");
- if (in == NULL) {
- fprintf(stderr, "fopen(%s) failed: %s\n", argv[1],
- strerror(errno));
- close(fdin);
- return 1;
- }
- if (argc > 2) {
- fdout = open(argv[2],
- O_CREAT | O_TRUNC | O_WRONLY | O_LARGEFILE,
- 0600);
- if (fdout == -1) {
- fprintf(stderr, "open(%s) failed: %s\n", argv[2],
- strerror(errno));
- fclose(in);
- return 1;
- }
- out = fdopen(fdout, "w");
- if (out == NULL) {
- fprintf(stderr, "fopen(%s) failed: %s\n", argv[2],
- strerror(errno));
- fclose(in);
- close(fdout);
- return 1;
- }
- }
-
- rc = parse_buffer(in, out);
-
- fclose(in);
- if (out != stdout)
- fclose(out);
-
- return rc;
-}
-
-const char debug_daemon_usage[] = "usage: %s {start file [MB]|stop}\n";
-
-int jt_dbg_debug_daemon(int argc, char **argv)
-{
- int rc;
- int fd;
-
- if (argc <= 1) {
- fprintf(stderr, debug_daemon_usage, argv[0]);
- return 1;
- }
-
- fd = dbg_open_ctlhandle(DAEMON_CTL_NAME);
- if (fd < 0)
- return -1;
-
- rc = -1;
- if (strcasecmp(argv[1], "start") == 0) {
- if (argc < 3 || argc > 4 ||
- (argc == 4 && strlen(argv[3]) > 5)) {
- fprintf(stderr, debug_daemon_usage, argv[0]);
- goto out;
- }
- if (argc == 4) {
- char buf[12];
- const long min_size = 10;
- const long max_size = 20480;
- long size;
- char *end;
-
- size = strtoul(argv[3], &end, 0);
- if (size < min_size ||
- size > max_size ||
- *end != 0) {
- fprintf(stderr, "size %s invalid, must be in "
- "the range %ld-%ld MB\n", argv[3],
- min_size, max_size);
- goto out;
- }
- snprintf(buf, sizeof(buf), "size=%ld", size);
- rc = dbg_write_cmd(fd, buf, strlen(buf));
-
- if (rc != 0) {
- fprintf(stderr, "set %s failed: %s\n",
- buf, strerror(errno));
- goto out;
- }
- }
-
- rc = dbg_write_cmd(fd, argv[2], strlen(argv[2]));
- if (rc != 0) {
- fprintf(stderr, "start debug_daemon on %s failed: %s\n",
- argv[2], strerror(errno));
- goto out;
- }
- rc = 0;
- goto out;
- }
- if (strcasecmp(argv[1], "stop") == 0) {
- rc = dbg_write_cmd(fd, "stop", 4);
- if (rc != 0) {
- fprintf(stderr, "stopping debug_daemon failed: %s\n",
- strerror(errno));
- goto out;
- }
-
- rc = 0;
- goto out;
- }
-
- fprintf(stderr, debug_daemon_usage, argv[0]);
- rc = -1;
-out:
- dbg_close_ctlhandle(fd);
- return rc;
-}
-
-int jt_dbg_clear_debug_buf(int argc, char **argv)
-{
- int rc;
- struct libcfs_ioctl_data data;
-
- if (argc != 1) {
- fprintf(stderr, "usage: %s\n", argv[0]);
- return 0;
- }
-
- memset(&data, 0, sizeof(data));
- if (libcfs_ioctl_pack(&data, &buf, max) != 0) {
- fprintf(stderr, "libcfs_ioctl_pack failed.\n");
- return -1;
- }
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_CLEAR_DEBUG, buf);
- if (rc) {
- fprintf(stderr, "IOC_LIBCFS_CLEAR_DEBUG failed: %s\n",
- strerror(errno));
- return -1;
- }
- return 0;
-}
-
-int jt_dbg_mark_debug_buf(int argc, char **argv)
-{
- static char scratch[MAX_MARK_SIZE] = { '\0' };
- int rc, max_size = MAX_MARK_SIZE-1;
- struct libcfs_ioctl_data data = { 0 };
- char *text;
- time_t now = time(NULL);
-
- if (argc > 1) {
- int count;
- text = scratch;
- strncpy(text, argv[1], max_size);
- max_size-=strlen(argv[1]);
- for (count = 2; (count < argc) && (max_size > 0); count++){
- strncat(text, " ", max_size);
- max_size -= 1;
- strncat(text, argv[count], max_size);
- max_size -= strlen(argv[count]);
- }
- } else {
- text = ctime(&now);
- }
-
- data.ioc_inllen1 = strlen(text) + 1;
- data.ioc_inlbuf1 = text;
- if (libcfs_ioctl_pack(&data, &buf, max) != 0) {
- fprintf(stderr, "libcfs_ioctl_pack failed.\n");
- return -1;
- }
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_MARK_DEBUG, buf);
- if (rc) {
- fprintf(stderr, "IOC_LIBCFS_MARK_DEBUG failed: %s\n",
- strerror(errno));
- return -1;
- }
- return 0;
-}
-
-static struct mod_paths {
- char *name, *path;
-} mod_paths[] = {
- {"libcfs", "lnet/libcfs"},
- {"lnet", "lnet/lnet"},
- {"kciblnd", "lnet/klnds/ciblnd"},
- {"kgmlnd", "lnet/klnds/gmlnd"},
- {"kmxlnd", "lnet/klnds/mxlnd"},
- {"kiiblnd", "lnet/klnds/iiblnd"},
- {"ko2iblnd", "lnet/klnds/o2iblnd"},
- {"kopeniblnd", "lnet/klnds/openiblnd"},
- {"kptllnd", "lnet/klnds/ptllnd"},
- {"kqswlnd", "lnet/klnds/qswlnd"},
- {"kralnd", "lnet/klnds/ralnd"},
- {"ksocklnd", "lnet/klnds/socklnd"},
- {"ktdilnd", "lnet/klnds/tdilnd"},
- {"kviblnd", "lnet/klnds/viblnd"},
- {"lvfs", "lustre/lvfs"},
- {"obdclass", "lustre/obdclass"},
- {"llog_test", "lustre/obdclass"},
- {"ptlrpc_gss", "lustre/ptlrpc/gss"},
- {"ptlrpc", "lustre/ptlrpc"},
- {"gks", "lustre/sec/gks"},
- {"gkc", "lustre/sec/gks"},
- {"ost", "lustre/ost"},
- {"osc", "lustre/osc"},
- {"mds", "lustre/mds"},
- {"mdc", "lustre/mdc"},
- {"llite", "lustre/llite"},
- {"lustre", "lustre/llite"},
- {"llite_lloop", "lustre/llite"},
- {"ldiskfs", "ldiskfs/ldiskfs"},
- {"smfs", "lustre/smfs"},
- {"obdecho", "lustre/obdecho"},
- {"ldlm", "lustre/ldlm"},
- {"obdfilter", "lustre/obdfilter"},
- {"lov", "lustre/lov"},
- {"lmv", "lustre/lmv"},
- {"fsfilt_ext3", "lustre/lvfs"},
- {"fsfilt_reiserfs", "lustre/lvfs"},
- {"fsfilt_smfs", "lustre/lvfs"},
- {"fsfilt_ldiskfs", "lustre/lvfs"},
- {"mds_ext3", "lustre/mds"},
- {"cobd", "lustre/cobd"},
- {"cmobd", "lustre/cmobd"},
- {"lquota", "lustre/quota"},
- {"mgs", "lustre/mgs"},
- {"mgc", "lustre/mgc"},
- {"mdt", "lustre/mdt"},
- {"mdd", "lustre/mdd"},
- {"osd", "lustre/osd"},
- {"cmm", "lustre/cmm"},
- {"fid", "lustre/fid"},
- {"fld", "lustre/fld"},
- {NULL, NULL}
-};
-
-static int jt_dbg_modules_2_4(int argc, char **argv)
-{
-#ifdef HAVE_LINUX_VERSION_H
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- struct mod_paths *mp;
- char *path = "";
- char *kernel = "linux";
-
- if (argc >= 2)
- path = argv[1];
- if (argc == 3)
- kernel = argv[2];
- if (argc > 3) {
- printf("%s [path] [kernel]\n", argv[0]);
- return 0;
- }
-
- for (mp = mod_paths; mp->name != NULL; mp++) {
- struct module_info info;
- int rc;
- size_t crap;
- int query_module(const char *name, int which, void *buf,
- size_t bufsize, size_t *ret);
-
- rc = query_module(mp->name, QM_INFO, &info, sizeof(info),
- &crap);
- if (rc < 0) {
- if (errno != ENOENT)
- printf("query_module(%s) failed: %s\n",
- mp->name, strerror(errno));
- } else {
- printf("add-symbol-file %s%s%s/%s.o 0x%0lx\n", path,
- path[0] ? "/" : "", mp->path, mp->name,
- info.addr + sizeof(struct module));
- }
- }
-
- return 0;
-#endif // Headers are 2.6-only
-#endif // !HAVE_LINUX_VERSION_H
- return -EINVAL;
-}
-
-static int jt_dbg_modules_2_5(int argc, char **argv)
-{
- struct mod_paths *mp;
- char *path = "";
- char *kernel = "linux";
- const char *proc = "/proc/modules";
- char modname[128], others[4096];
- long modaddr;
- int rc;
- FILE *file;
-
- if (argc >= 2)
- path = argv[1];
- if (argc == 3)
- kernel = argv[2];
- if (argc > 3) {
- printf("%s [path] [kernel]\n", argv[0]);
- return 0;
- }
-
- file = fopen(proc, "r");
- if (!file) {
- printf("failed open %s: %s\n", proc, strerror(errno));
- return 0;
- }
-
- while ((rc = fscanf(file, "%s %s %s %s %s %lx\n",
- modname, others, others, others, others, &modaddr)) == 6) {
- for (mp = mod_paths; mp->name != NULL; mp++) {
- if (!strcmp(mp->name, modname))
- break;
- }
- if (mp->name) {
- printf("add-symbol-file %s%s%s/%s.o 0x%0lx\n", path,
- path[0] ? "/" : "", mp->path, mp->name, modaddr);
- }
- }
-
- fclose(file);
- return 0;
-}
-
-int jt_dbg_modules(int argc, char **argv)
-{
- int rc = 0;
- struct utsname sysinfo;
-
- rc = uname(&sysinfo);
- if (rc) {
- printf("uname() failed: %s\n", strerror(errno));
- return 0;
- }
-
- if (sysinfo.release[2] > '4') {
- return jt_dbg_modules_2_5(argc, argv);
- } else {
- return jt_dbg_modules_2_4(argc, argv);
- }
-
- return 0;
-}
-
-int jt_dbg_panic(int argc, char **argv)
-{
- int rc;
- struct libcfs_ioctl_data data;
-
- if (argc != 1) {
- fprintf(stderr, "usage: %s\n", argv[0]);
- return 0;
- }
-
- memset(&data, 0, sizeof(data));
- if (libcfs_ioctl_pack(&data, &buf, max) != 0) {
- fprintf(stderr, "libcfs_ioctl_pack failed.\n");
- return -1;
- }
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_PANIC, buf);
- if (rc) {
- fprintf(stderr, "IOC_LIBCFS_PANIC failed: %s\n",
- strerror(errno));
- return -1;
- }
- return 0;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Some day I'll split all of this functionality into a cfs_debug module
- * of its own. That day is not today.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <lnet/api-support.h>
-#include <lnet/lnetctl.h>
-#include "parser.h"
-
-
-command_t list[] = {
- {"debug_kernel", jt_dbg_debug_kernel, 0, "usage: debug_kernel [file] [raw], get debug buffer and print it [to a file]"},
- {"debug_daemon", jt_dbg_debug_daemon, 0, "usage: debug_daemon [start file|stop], control debug daemon to dump debug buffer to a file"},
- {"debug_file", jt_dbg_debug_file, 0, "usage: debug_file <input> [output] [raw], read debug buffer from input and print it [to output]"},
- {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer"},
- {"mark", jt_dbg_mark_debug_buf, 0, "insert a marker into the kernel debug buffer (args: [marker text])"},
- {"filter", jt_dbg_filter, 0, "filter certain messages (args: subsystem/debug ID)\n"},
- {"show", jt_dbg_show, 0, "enable certain messages (args: subsystem/debug ID)\n"},
- {"list", jt_dbg_list, 0, "list subsystem and debug types (args: subs or types)\n"},
- {"modules", jt_dbg_modules, 0, "provide gdb-friendly module info (arg: <path>)"},
- {"panic", jt_dbg_panic, 0, "cause the kernel to panic"},
- {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
- {"help", Parser_help, 0, "help"},
- {"exit", Parser_quit, 0, "quit"},
- {"quit", Parser_quit, 0, "quit"},
- { 0, 0, 0, NULL }
-};
-
-int main(int argc, char **argv)
-{
- if (dbg_initialize(argc, argv) < 0)
- exit(2);
-
- register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH,
- LNET_DEV_MAJOR, LNET_DEV_MINOR);
-
- Parser_init("debugctl > ", list);
- if (argc > 1)
- return Parser_execarg(argc - 1, &argv[1], list);
-
- Parser_commands();
-
- unregister_ioc_dev(LNET_DEV_ID);
- return 0;
-}
+++ /dev/null
-#!/bin/bash
-#set -xv
-set -e
-
-AR=/usr/bin/ar
-LD=/usr/bin/ld
-RANLIB=/usr/bin/ranlib
-
-CWD=`pwd`
-
-LIBS=$1
-LND_LIBS=$2
-PTHREAD_LIBS=$3
-
-# do cleanup at first
-rm -f liblst.so
-
-ALL_OBJS=
-
-build_obj_list() {
- _objs=`$AR -t $1/$2`
- for _lib in $_objs; do
- ALL_OBJS=$ALL_OBJS"$1/$_lib ";
- done;
-}
-
-# lnet components libs
-build_obj_list ../../lnet/libcfs libcfs.a
-if $(echo "$LND_LIBS" | grep "socklnd" >/dev/null) ; then
- build_obj_list ../../lnet/ulnds/socklnd libsocklnd.a
-fi
-if $(echo "$LND_LIBS" | grep "ptllnd" >/dev/null) ; then
- build_obj_list ../../lnet/ulnds/ptllnd libptllnd.a
-fi
-build_obj_list ../../lnet/lnet liblnet.a
-build_obj_list ../../lnet/selftest libselftest.a
-
-# create static lib lustre
-rm -f $CWD/liblst.a
-$AR -cru $CWD/liblst.a $ALL_OBJS
-$RANLIB $CWD/liblst.a
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- *
- * This file is part of Lustre, http://www.lustre.org/
- *
- * This file is free software; you can redistribute it and/or
- * modify it under the terms of version 2.1 of the GNU Lesser General
- * Public License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/tcp.h>
-#include <netdb.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <syslog.h>
-#include <errno.h>
-
-#include <lnet/api-support.h>
-#include <lnet/lib-types.h>
-
-#include <gm.h>
-
-/*
- * portals always uses unit 0
- * Can this be configurable?
- */
-#define GM_UNIT 0
-
-void
-usage(char *prg, int h)
-{
- fprintf(stderr,
- "usage %s -h\n"
- " %s [-l] [-n hostname] [-L] [hostnames]\n", prg);
-
- if (h)
- printf("Print Myrinet Global network ids for specified hosts\n"
- "-l print local host's ID\n"
- "-n hostname print given host's ID\n"
- "-L print Myringet local net ID too\n"
- "[hostnames] print ids of given hosts (local if none)\n");
-}
-
-gm_status_t
-print_gmid(char *name, int name_fieldlen, int show_local_id)
-{
- struct gm_port *gm_port;
- int gm_port_id;
- gm_status_t gm_status;
- unsigned int local_id;
- unsigned int global_id;
-
- gm_status = gm_init();
- if (gm_status != GM_SUCCESS) {
- fprintf(stderr, "gm_init: %s\n", gm_strerror(gm_status));
- return gm_status;
- }
-
- gm_port_id = 2;
- gm_status = gm_open(&gm_port, GM_UNIT, gm_port_id, "gmnalnid",
- GM_API_VERSION);
- if (gm_status != GM_SUCCESS) {
- int num_ports = gm_num_ports(gm_port);
-
- /* Couldn't open port 2, try 4 ... num_ports */
- for (gm_port_id = 4; gm_port_id < num_ports; gm_port_id++) {
- gm_status = gm_open(&gm_port, GM_UNIT, gm_port_id,
- "gmnalnid", GM_API_VERSION);
- if (gm_status == GM_SUCCESS)
- break;
- }
-
- if (gm_status != GM_SUCCESS) {
- fprintf(stderr, "gm_open: %s\n",gm_strerror(gm_status));
- goto out_0;
- }
- }
-
- if (name == NULL) {
- local_id = 1;
- name = "<local>";
- } else {
- gm_status = gm_host_name_to_node_id_ex(gm_port, 1000000, name,
- &local_id);
- if (gm_status != GM_SUCCESS) {
- fprintf(stderr, "gm_host_name_to_node_id_ex(%s): %s\n",
- name, gm_strerror(gm_status));
- goto out_1;
- }
- }
-
- gm_status = gm_node_id_to_global_id(gm_port, local_id, &global_id) ;
- if (gm_status != GM_SUCCESS) {
- fprintf(stderr, "gm_node_id_to_global_id(%s:%d): %s\n",
- name, local_id, gm_strerror(gm_status));
- goto out_1;
- }
-
- if (name_fieldlen > 0)
- printf ("%*s ", name_fieldlen, name);
-
- if (!show_local_id)
- printf("0x%x\n", global_id);
- else
- printf("local 0x%x global 0x%x\n", local_id, global_id);
-
- out_1:
- gm_close(gm_port);
- out_0:
- gm_finalize();
-
- return gm_status;
-}
-
-int
-main (int argc, char **argv)
-{
- int c;
- gm_status_t gmrc;
- int rc;
- int max_namelen = 0;
- int show_local_id = 0;
-
- while ((c = getopt(argc, argv, "n:lLh")) != -1)
- switch(c) {
- case 'h':
- usage(argv[0], 1);
- return 0;
-
- case 'L':
- show_local_id = 1;
- break;
-
- case 'n':
- gmrc = print_gmid(optarg, 0, show_local_id);
- return (gmrc == GM_SUCCESS) ? 0 : 1;
-
- case 'l':
- gmrc = print_gmid(NULL, 0, show_local_id);
- return (gmrc == GM_SUCCESS) ? 0 : 1;
-
- default:
- usage(argv[0], 0);
- return 2;
- }
-
- if (optind == argc) {
- gmrc = print_gmid(NULL, 0, show_local_id);
- return (gmrc == GM_SUCCESS) ? 0 : 1;
- }
-
- if (optind != argc - 1)
- for (c = optind; c < argc; c++)
- if (strlen(argv[c]) > max_namelen)
- max_namelen = strlen(argv[c]);
-
- rc = 0;
-
- for (c = optind; c < argc; c++) {
- gmrc = print_gmid(argv[c], max_namelen, show_local_id);
-
- if (gmrc != GM_SUCCESS)
- rc = 1;
- }
-
- return rc;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#define __USE_FILE_OFFSET64
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include <lnet/api-support.h>
-#include <lnet/lnetctl.h>
-#include <libcfs/portals_utils.h>
-
-
-static ioc_handler_t do_ioctl; /* forward ref */
-static ioc_handler_t *current_ioc_handler = &do_ioctl;
-
-struct ioc_dev {
- const char * dev_name;
- int dev_fd;
- int dev_major;
- int dev_minor;
-};
-
-static struct ioc_dev ioc_dev_list[10];
-
-struct dump_hdr {
- int magic;
- int dev_id;
- unsigned int opc;
-};
-
-char *dump_filename;
-
-void
-set_ioc_handler (ioc_handler_t *handler)
-{
- if (handler == NULL)
- current_ioc_handler = do_ioctl;
- else
- current_ioc_handler = handler;
-}
-
-/* Catamount has no <linux/kdev_t.h>, so just define it here */
-#ifndef MKDEV
-# define MKDEV(a,b) (((a) << 8) | (b))
-#endif
-
-static int
-open_ioc_dev(int dev_id)
-{
- const char * dev_name;
-
- if (dev_id < 0 ||
- dev_id >= sizeof(ioc_dev_list) / sizeof(ioc_dev_list[0]))
- return -EINVAL;
-
- dev_name = ioc_dev_list[dev_id].dev_name;
- if (dev_name == NULL) {
- fprintf(stderr, "unknown device id: %d\n", dev_id);
- return -EINVAL;
- }
-
- if (ioc_dev_list[dev_id].dev_fd < 0) {
- int fd = open(dev_name, O_RDWR);
-
- /* Make the /dev/ node if we need to */
- if (fd < 0 && errno == ENOENT) {
- if (mknod(dev_name,
- S_IFCHR|S_IWUSR|S_IRUSR,
- MKDEV(ioc_dev_list[dev_id].dev_major,
- ioc_dev_list[dev_id].dev_minor)) == 0)
- fd = open(dev_name, O_RDWR);
- else
- fprintf(stderr, "mknod %s failed: %s\n",
- dev_name, strerror(errno));
- }
-
- if (fd < 0) {
- fprintf(stderr, "opening %s failed: %s\n"
- "hint: the kernel modules may not be loaded\n",
- dev_name, strerror(errno));
- return fd;
- }
- ioc_dev_list[dev_id].dev_fd = fd;
- }
-
- return ioc_dev_list[dev_id].dev_fd;
-}
-
-
-static int
-do_ioctl(int dev_id, unsigned int opc, void *buf)
-{
- int fd, rc;
-
- fd = open_ioc_dev(dev_id);
- if (fd < 0)
- return fd;
-
- rc = ioctl(fd, opc, buf);
- return rc;
-
-}
-
-static FILE *
-get_dump_file()
-{
- FILE *fp = NULL;
-
- if (!dump_filename) {
- fprintf(stderr, "no dump filename\n");
- } else
- fp = fopen(dump_filename, "a");
- return fp;
-}
-
-/*
- * The dump file should start with a description of which devices are
- * used, but for now it will assumed whatever app reads the file will
- * know what to do. */
-int
-dump(int dev_id, unsigned int opc, void *buf)
-{
- FILE *fp;
- struct dump_hdr dump_hdr;
- struct libcfs_ioctl_hdr * ioc_hdr = (struct libcfs_ioctl_hdr *) buf;
- int rc;
-
- printf("dumping opc %x to %s\n", opc, dump_filename);
-
-
- dump_hdr.magic = 0xdeadbeef;
- dump_hdr.dev_id = dev_id;
- dump_hdr.opc = opc;
-
- fp = get_dump_file();
- if (fp == NULL) {
- fprintf(stderr, "%s: %s\n", dump_filename,
- strerror(errno));
- return -EINVAL;
- }
-
- rc = fwrite(&dump_hdr, sizeof(dump_hdr), 1, fp);
- if (rc == 1)
- rc = fwrite(buf, ioc_hdr->ioc_len, 1, fp);
- fclose(fp);
- if (rc != 1) {
- fprintf(stderr, "%s: %s\n", dump_filename,
- strerror(errno));
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* register a device to send ioctls to. */
-int
-register_ioc_dev(int dev_id, const char * dev_name, int major, int minor)
-{
-
- if (dev_id < 0 ||
- dev_id >= sizeof(ioc_dev_list) / sizeof(ioc_dev_list[0]))
- return -EINVAL;
-
- unregister_ioc_dev(dev_id);
-
- ioc_dev_list[dev_id].dev_name = dev_name;
- ioc_dev_list[dev_id].dev_fd = -1;
- ioc_dev_list[dev_id].dev_major = major;
- ioc_dev_list[dev_id].dev_minor = minor;
-
- return dev_id;
-}
-
-void
-unregister_ioc_dev(int dev_id)
-{
-
- if (dev_id < 0 ||
- dev_id >= sizeof(ioc_dev_list) / sizeof(ioc_dev_list[0]))
- return;
- if (ioc_dev_list[dev_id].dev_name != NULL &&
- ioc_dev_list[dev_id].dev_fd >= 0)
- close(ioc_dev_list[dev_id].dev_fd);
-
- ioc_dev_list[dev_id].dev_name = NULL;
- ioc_dev_list[dev_id].dev_fd = -1;
-}
-
-/* If this file is set, then all ioctl buffers will be
- appended to the file. */
-int
-set_ioctl_dump(char * file)
-{
- if (dump_filename)
- free(dump_filename);
-
- dump_filename = strdup(file);
- if (dump_filename == NULL)
- abort();
-
- set_ioc_handler(&dump);
- return 0;
-}
-
-int
-l_ioctl(int dev_id, unsigned int opc, void *buf)
-{
- return current_ioc_handler(dev_id, opc, buf);
-}
-
-/* Read an ioctl dump file, and call the ioc_func for each ioctl buffer
- * in the file. For example:
- *
- * parse_dump("lctl.dump", l_ioctl);
- *
- * Note: if using l_ioctl, then you also need to register_ioc_dev() for
- * each device used in the dump.
- */
-int
-parse_dump(char * dump_file, ioc_handler_t ioc_func)
-{
- int line =0;
- struct stat st;
- char *start, *buf, *end;
-#ifndef __CYGWIN__
- int fd;
-#else
- HANDLE fd, hmap;
- DWORD size;
-#endif
-
-#ifndef __CYGWIN__
- fd = syscall(SYS_open, dump_file, O_RDONLY);
- if (fd < 0) {
- fprintf(stderr, "couldn't open %s: %s\n", dump_file,
- strerror(errno));
- exit(1);
- }
-
- if (fstat(fd, &st)) {
- perror("stat fails");
- exit(1);
- }
-
- if (st.st_size < 1) {
- fprintf(stderr, "KML is empty\n");
- exit(1);
- }
-
- start = buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE , fd, 0);
- end = start + st.st_size;
- close(fd);
- if (start == MAP_FAILED) {
- fprintf(stderr, "can't create file mapping\n");
- exit(1);
- }
-#else
- fd = CreateFile(dump_file, GENERIC_READ, FILE_SHARE_READ, NULL,
- OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
- size = GetFileSize(fd, NULL);
- if (size < 1) {
- fprintf(stderr, "KML is empty\n");
- exit(1);
- }
-
- hmap = CreateFileMapping(fd, NULL, PAGE_READONLY, 0,0, NULL);
- start = buf = MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0);
- end = buf + size;
- CloseHandle(fd);
- if (start == NULL) {
- fprintf(stderr, "can't create file mapping\n");
- exit(1);
- }
-#endif /* __CYGWIN__ */
-
- while (buf < end) {
- struct dump_hdr *dump_hdr = (struct dump_hdr *) buf;
- struct libcfs_ioctl_hdr * data;
- char tmp[8096];
- int rc;
-
- line++;
-
- data = (struct libcfs_ioctl_hdr *) (buf + sizeof(*dump_hdr));
- if (buf + data->ioc_len > end ) {
- fprintf(stderr, "dump file overflow, %p + %d > %p\n", buf,
- data->ioc_len, end);
- return -1;
- }
-#if 0
- printf ("dump_hdr: %lx data: %lx\n",
- (unsigned long)dump_hdr - (unsigned long)buf, (unsigned long)data - (unsigned long)buf);
-
- printf("%d: opcode %x len: %d ver: %x ", line, dump_hdr->opc,
- data->ioc_len, data->ioc_version);
-#endif
-
- memcpy(tmp, data, data->ioc_len);
-
- rc = ioc_func(dump_hdr->dev_id, dump_hdr->opc, tmp);
- if (rc) {
- printf("failed: %d\n", rc);
- exit(1);
- }
-
- buf += data->ioc_len + sizeof(*dump_hdr);
- }
-
-#ifndef __CYGWIN__
- munmap(start, end - start);
-#else
- UnmapViewOfFile(start);
- CloseHandle(hmap);
-#endif
-
- return 0;
-}
-
-int
-jt_ioc_dump(int argc, char **argv)
-{
- if (argc > 2) {
- fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
- return 0;
- }
- printf("setting dumpfile to: %s\n", argv[1]);
-
- set_ioctl_dump(argv[1]);
- return 0;
-}
+++ /dev/null
-#!/bin/bash
-
-echo "=== Router Buffers ======="
-test -e /proc/sys/lnet/buffers && cat /proc/sys/lnet/buffers
-echo
-echo "=== NIs ============================================"
-test -e /proc/sys/lnet/nis && cat /proc/sys/lnet/nis
-echo
-echo "=== Peers ============================================================="
-test -e /proc/sys/lnet/peers && cat /proc/sys/lnet/peers
-echo
+++ /dev/null
-#!/bin/sh
-
-lnds=$(echo k{sock,qsw,gm,{open,i,v,o2,c}ib,ra,ptl,mx}lnd)
-
-do_rmmod() {
- mod=$1
- if grep "^$mod" /proc/modules >/dev/null 2>&1; then
- rmmod $mod
- fi
-}
-
-do_rmmod lnet_selftest
-
-if lctl network down > /dev/null 2>&1; then
- for mod in $lnds; do do_rmmod $mod; done
-
- rmmod lnet
- rmmod libcfs
-fi
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <getopt.h>
-#include <errno.h>
-#include <pwd.h>
-#include <lnet/lnetctl.h>
-#include <lnet/lnetst.h>
-#include "parser.h"
-
-static command_t lst_cmdlist[];
-static lst_sid_t session_id;
-static int session_key;
-static lstcon_trans_stat_t trans_stat;
-
-typedef struct list_string {
- struct list_string *lstr_next;
- int lstr_sz;
- char lstr_str[0];
-} lstr_t;
-
-#define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
-
-static int alloc_count = 0;
-static int alloc_nob = 0;
-
-lstr_t *
-alloc_lstr(int sz)
-{
- lstr_t *lstr = malloc(offsetof(lstr_t, lstr_str[sz]));
-
- if (lstr == NULL) {
- fprintf(stderr, "Can't allocate lstr\n");
- abort();
- }
-
- alloc_nob += sz;
- alloc_count++;
-
- lstr->lstr_str[0] = 0;
- lstr->lstr_sz = sz;
- return lstr;
-}
-
-void
-free_lstr(lstr_t *lstr)
-{
- alloc_count--;
- alloc_nob -= lstr->lstr_sz;
- free(lstr);
-}
-
-void
-free_lstrs(lstr_t **list)
-{
- lstr_t *lstr;
-
- while ((lstr = *list) != NULL) {
- *list = lstr->lstr_next;
- free_lstr(lstr);
- }
-}
-
-void
-new_lstrs(lstr_t **list, char *prefix, char *postfix,
- int lo, int hi, int stride)
-{
- int n1 = strlen(prefix);
- int n2 = strlen(postfix);
- int sz = n1 + 20 + n2 + 1;
-
- do {
- lstr_t *n = alloc_lstr(sz);
-
- snprintf(n->lstr_str, sz - 1, "%s%u%s",
- prefix, lo, postfix);
-
- n->lstr_next = *list;
- *list = n;
-
- lo += stride;
- } while (lo <= hi);
-}
-
-int
-expand_lstr(lstr_t **list, lstr_t *l)
-{
- int nob = strlen(l->lstr_str);
- char *b1;
- char *b2;
- char *expr;
- char *sep;
- int x;
- int y;
- int z;
- int n;
-
- b1 = strchr(l->lstr_str, '[');
- if (b1 == NULL) {
- l->lstr_next = *list;
- *list = l;
- return 0;
- }
-
- b2 = strchr(b1, ']');
- if (b2 == NULL || b2 == b1 + 1)
- return -1;
-
- *b1++ = 0;
- *b2++ = 0;
- expr = b1;
- do {
-
- sep = strchr(expr, ',');
- if (sep != NULL)
- *sep++ = 0;
-
- nob = strlen(expr);
- n = nob;
- if (sscanf(expr, "%u%n", &x, &n) >= 1 && n == nob) {
- /* simple number */
- new_lstrs(list, l->lstr_str, b2, x, x, 1);
- continue;
- }
-
- n = nob;
- if (sscanf(expr, "%u-%u%n", &x, &y, &n) >= 2 && n == nob &&
- x < y) {
- /* simple range */
- new_lstrs(list, l->lstr_str, b2, x, y, 1);
- continue;
- }
-
- n = nob;
- if (sscanf(expr, "%u-%u/%u%n", &x, &y, &z, &n) >= 3 && n == nob &&
- x < y) {
- /* strided range */
- new_lstrs(list, l->lstr_str, b2, x, y, z);
- continue;
- }
-
- /* syntax error */
- return -1;
- } while ((expr = sep) != NULL);
-
- free_lstr(l);
-
- return 1;
-}
-
-int
-expand_strs(char *str, lstr_t **head)
-{
- lstr_t *list = NULL;
- lstr_t *nlist;
- lstr_t *l;
- int rc = 0;
- int expanded;
-
- l = alloc_lstr(strlen(str) + 1);
- memcpy(l->lstr_str, str, strlen(str) + 1);
- l->lstr_next = NULL;
- list = l;
-
- do {
- expanded = 0;
- nlist = NULL;
-
- while ((l = list) != NULL) {
- list = l->lstr_next;
-
- rc = expand_lstr(&nlist, l);
- if (rc < 0) {
- fprintf(stderr, "Syntax error in \"%s\"\n", str);
- free_lstr(l);
- break;
- }
-
- expanded |= rc > 0;
- }
-
- /* re-order onto 'list' */
- while ((l = nlist) != NULL) {
- nlist = l->lstr_next;
- l->lstr_next = list;
- list = l;
- }
-
- } while (expanded && rc > 0);
-
- if (rc >= 0) {
- *head = list;
- return 0;
- }
-
- while ((l = list) != NULL) {
- list = l->lstr_next;
-
- free_lstr(l);
- }
- return rc;
-}
-
-int
-lst_parse_nids(char *str, int *countp, lnet_process_id_t **idspp)
-{
- lstr_t *head = NULL;
- lstr_t *l;
- int c = 0;
- int i;
- int rc;
-
- rc = expand_strs(str, &head);
- if (rc != 0)
- goto out;
-
- l = head;
- while (l != NULL) {
- l = l->lstr_next;
- c++;
- }
-
- *idspp = malloc(c * sizeof(lnet_process_id_t));
- if (*idspp == NULL) {
- fprintf(stderr, "Out of memory\n");
- rc = -1;
- }
-
- *countp = c;
-out:
- i = 0;
- while ((l = head) != NULL) {
- head = l->lstr_next;
-
- if (rc == 0) {
- (*idspp)[i].nid = libcfs_str2nid(l->lstr_str);
- if ((*idspp)[i].nid == LNET_NID_ANY) {
- fprintf(stderr, "Invalid nid: %s\n",
- l->lstr_str);
- rc = -1;
- }
-
- (*idspp)[i].pid = LUSTRE_LNET_PID;
- i++;
- }
-
- free_lstr(l);
- }
-
- if (rc == 0)
- return 0;
-
- free(*idspp);
- *idspp = NULL;
-
- return rc;
-}
-
-char *
-lst_node_state2str(int state)
-{
- if (state == LST_NODE_ACTIVE)
- return "Active";
- if (state == LST_NODE_BUSY)
- return "Busy";
- if (state == LST_NODE_DOWN)
- return "Down";
-
- return "Unknown";
-}
-
-int
-lst_node_str2state(char *str)
-{
- if (strcasecmp(str, "active") == 0)
- return LST_NODE_ACTIVE;
- if (strcasecmp(str, "busy") == 0)
- return LST_NODE_BUSY;
- if (strcasecmp(str, "down") == 0)
- return LST_NODE_DOWN;
- if (strcasecmp(str, "unknown") == 0)
- return LST_NODE_UNKNOWN;
- if (strcasecmp(str, "invalid") == 0)
- return (LST_NODE_UNKNOWN | LST_NODE_DOWN | LST_NODE_BUSY);
-
- return -1;
-}
-
-char *
-lst_test_type2name(int type)
-{
- if (type == LST_TEST_PING)
- return "ping";
- if (type == LST_TEST_BULK)
- return "brw";
-
- return "unknown";
-}
-
-int
-lst_test_name2type(char *name)
-{
- if (strcasecmp(name, "ping") == 0)
- return LST_TEST_PING;
- if (strcasecmp(name, "brw") == 0)
- return LST_TEST_BULK;
-
- return -1;
-}
-
-void
-lst_print_usage(char *cmd)
-{
- Parser_printhelp(cmd);
-}
-
-void
-lst_print_error(char *sub, const char *def_format, ...)
-{
- va_list ap;
-
- /* local error returned from kernel */
- switch (errno) {
- case ESRCH:
- fprintf(stderr, "No session exists\n");
- return;
- case ESHUTDOWN:
- fprintf(stderr, "Session is shutting down\n");
- return;
- case EACCES:
- fprintf(stderr, "Unmatched session key or not root\n");
- return;
- case ENOENT:
- fprintf(stderr, "Can't find %s in current session\n", sub);
- return;
- case EINVAL:
- fprintf(stderr, "Invalid parameters list in command line\n");
- return;
- case EFAULT:
- fprintf(stderr, "Bad parameter address\n");
- return;
- case EEXIST:
- fprintf(stderr, "%s already exists\n", sub);
- return;
- default:
- va_start(ap, def_format);
- vfprintf(stderr, def_format, ap);
- va_end(ap);
-
- return;
- }
-}
-
-void
-lst_free_rpcent(struct list_head *head)
-{
- lstcon_rpc_ent_t *ent;
-
- while (!list_empty(head)) {
- ent = list_entry(head->next, lstcon_rpc_ent_t, rpe_link);
-
- list_del(&ent->rpe_link);
- free(ent);
- }
-}
-
-void
-lst_reset_rpcent(struct list_head *head)
-{
- lstcon_rpc_ent_t *ent;
-
- list_for_each_entry(ent, head, rpe_link) {
- ent->rpe_sid = LST_INVALID_SID;
- ent->rpe_peer.nid = LNET_NID_ANY;
- ent->rpe_peer.pid = LNET_PID_ANY;
- ent->rpe_rpc_errno = ent->rpe_fwk_errno = 0;
- }
-}
-
-int
-lst_alloc_rpcent(struct list_head *head, int count, int offset)
-{
- lstcon_rpc_ent_t *ent;
- int i;
-
- for (i = 0; i < count; i++) {
- ent = malloc(offsetof(lstcon_rpc_ent_t, rpe_payload[offset]));
- if (ent == NULL) {
- lst_free_rpcent(head);
- return -1;
- }
-
- memset(ent, 0, offsetof(lstcon_rpc_ent_t, rpe_payload[offset]));
-
- ent->rpe_sid = LST_INVALID_SID;
- ent->rpe_peer.nid = LNET_NID_ANY;
- ent->rpe_peer.pid = LNET_PID_ANY;
- list_add(&ent->rpe_link, head);
- }
-
- return 0;
-}
-
-void
-lst_print_transerr(struct list_head *head, char *optstr)
-{
- lstcon_rpc_ent_t *ent;
-
- list_for_each_entry(ent, head, rpe_link) {
- if (ent->rpe_rpc_errno == 0 && ent->rpe_fwk_errno == 0)
- continue;
-
- if (ent->rpe_rpc_errno != 0) {
- fprintf(stderr, "%s RPC failed on %s: %s\n",
- optstr, libcfs_id2str(ent->rpe_peer),
- strerror(ent->rpe_rpc_errno));
- continue;
- }
-
- fprintf(stderr, "%s failed on %s: %s\n",
- optstr, libcfs_id2str(ent->rpe_peer),
- strerror(ent->rpe_fwk_errno));
- }
-}
-
-int lst_info_batch_ioctl(char *batch, int test, int server,
- lstcon_test_batch_ent_t *entp, int *idxp,
- int *ndentp, lstcon_node_ent_t *dentsp);
-
-int lst_info_group_ioctl(char *name, lstcon_ndlist_ent_t *gent,
- int *idx, int *count, lstcon_node_ent_t *dents);
-
-int lst_query_batch_ioctl(char *batch, int test, int server,
- int timeout, struct list_head *head);
-
-int
-lst_ioctl(unsigned int opc, void *buf, int len)
-{
- struct libcfs_ioctl_data data;
- int rc;
-
- LIBCFS_IOC_INIT (data);
- data.ioc_u32[0] = opc;
- data.ioc_plen1 = len;
- data.ioc_pbuf1 = (char *)buf;
- data.ioc_plen2 = sizeof(trans_stat);
- data.ioc_pbuf2 = (char *)&trans_stat;
-
- memset(&trans_stat, 0, sizeof(trans_stat));
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNETST, &data);
-
- /* local error, no valid RPC result */
- if (rc != 0)
- return -1;
-
- /* RPC error */
- if (trans_stat.trs_rpc_errno != 0)
- return -2;
-
- /* Framework error */
- if (trans_stat.trs_fwk_errno != 0)
- return -3;
-
- return 0;
-}
-
-int
-lst_new_session_ioctl (char *name, int timeout, int force, lst_sid_t *sid)
-{
- lstio_session_new_args_t args = {
- .lstio_ses_key = session_key,
- .lstio_ses_timeout = timeout,
- .lstio_ses_force = force,
- .lstio_ses_idp = sid,
- .lstio_ses_namep = name,
- .lstio_ses_nmlen = strlen(name),
- };
-
- return lst_ioctl (LSTIO_SESSION_NEW, &args, sizeof(args));
-}
-
-int
-jt_lst_new_session(int argc, char **argv)
-{
- char buf[LST_NAME_SIZE];
- char *name;
- int optidx = 0;
- int timeout = 300;
- int force = 0;
- int c;
- int rc;
-
- static struct option session_opts[] =
- {
- {"timeout", required_argument, 0, 't' },
- {"force", no_argument, 0, 'f' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
-
- c = getopt_long(argc, argv, "ft:",
- session_opts, &optidx);
-
- if (c == -1)
- break;
-
- switch (c) {
- case 'f':
- force = 1;
- break;
- case 't':
- timeout = atoi(optarg);
- break;
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- if (timeout <= 0) {
- fprintf(stderr, "Invalid timeout value\n");
- return -1;
- }
-
- if (optind == argc - 1) {
- name = argv[optind ++];
- if (strlen(name) >= LST_NAME_SIZE) {
- fprintf(stderr, "Name size is limited to %d\n",
- LST_NAME_SIZE - 1);
- return -1;
- }
-
- } else if (optind == argc) {
- char user[LST_NAME_SIZE];
- char host[LST_NAME_SIZE];
- struct passwd *pw = getpwuid(getuid());
-
- if (pw == NULL)
- snprintf(user, sizeof(user), "%d", (int)getuid());
- else
- snprintf(user, sizeof(user), "%s", pw->pw_name);
-
- rc = gethostname(host, sizeof(host));
- if (rc != 0)
- snprintf(host, sizeof(host), "unknown_host");
-
- snprintf(buf, LST_NAME_SIZE, "%s@%s", user, host);
- name = buf;
-
- } else {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- rc = lst_new_session_ioctl(name, timeout, force, &session_id);
-
- if (rc != 0) {
- lst_print_error("session", "Failed to create session: %s\n",
- strerror(errno));
- return rc;
- }
-
- fprintf(stdout, "SESSION: %s TIMEOUT: %d FORCE: %s\n",
- name, timeout, force ? "Yes": "No");
-
- return rc;
-}
-
-int
-lst_session_info_ioctl(char *name, int len, int *key,
- lst_sid_t *sid, lstcon_ndlist_ent_t *ndinfo)
-{
- lstio_session_info_args_t args = {
- .lstio_ses_keyp = key,
- .lstio_ses_idp = sid,
- .lstio_ses_ndinfo = ndinfo,
- .lstio_ses_nmlen = len,
- .lstio_ses_namep = name,
- };
-
- return lst_ioctl(LSTIO_SESSION_INFO, &args, sizeof(args));
-}
-
-int
-jt_lst_show_session(int argc, char **argv)
-{
- lstcon_ndlist_ent_t ndinfo;
- lst_sid_t sid;
- char name[LST_NAME_SIZE];
- int key;
- int rc;
-
- rc = lst_session_info_ioctl(name, LST_NAME_SIZE, &key, &sid, &ndinfo);
-
- if (rc != 0) {
- lst_print_error("session", "Failed to show session: %s\n",
- strerror(errno));
- return -1;
- }
-
- fprintf(stdout, "%s ID: %Lu@%s, KEY: %d NODES: %d\n",
- name, sid.ses_stamp, libcfs_nid2str(sid.ses_nid),
- key, ndinfo.nle_nnode);
-
- return 0;
-}
-
-int
-lst_end_session_ioctl(void)
-{
- lstio_session_end_args_t args = {
- .lstio_ses_key = session_key,
- };
-
- return lst_ioctl (LSTIO_SESSION_END, &args, sizeof(args));
-}
-
-int
-jt_lst_end_session(int argc, char **argv)
-{
- int rc;
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- rc = lst_end_session_ioctl();
-
- if (rc == 0) {
- fprintf(stdout, "session is ended\n");
- return 0;
- }
-
- if (rc == -1) {
- lst_print_error("session", "Failed to end session: %s\n",
- strerror(errno));
- return rc;
- }
-
- if (trans_stat.trs_rpc_errno != 0) {
- fprintf(stderr,
- "[RPC] Failed to send %d session RPCs: %s\n",
- lstcon_rpc_stat_failure(&trans_stat, 0),
- strerror(trans_stat.trs_rpc_errno));
- }
-
- if (trans_stat.trs_fwk_errno != 0) {
- fprintf(stderr,
- "[FWK] Failed to end session on %d nodes: %s\n",
- lstcon_sesop_stat_failure(&trans_stat, 0),
- strerror(trans_stat.trs_fwk_errno));
- }
-
- return rc;
-}
-
-int
-lst_ping_ioctl(char *str, int type, int timeout,
- int count, lnet_process_id_t *ids, struct list_head *head)
-{
- lstio_debug_args_t args = {
- .lstio_dbg_key = session_key,
- .lstio_dbg_type = type,
- .lstio_dbg_flags = 0,
- .lstio_dbg_timeout = timeout,
- .lstio_dbg_nmlen = (str == NULL) ? 0: strlen(str),
- .lstio_dbg_namep = str,
- .lstio_dbg_count = count,
- .lstio_dbg_idsp = ids,
- .lstio_dbg_resultp = head,
- };
-
- return lst_ioctl (LSTIO_DEBUG, &args, sizeof(args));
-}
-
-int
-lst_get_node_count(int type, char *str, int *countp, lnet_process_id_t **idspp)
-{
- char buf[LST_NAME_SIZE];
- lstcon_test_batch_ent_t ent;
- lstcon_ndlist_ent_t *entp = &ent.tbe_cli_nle;
- lst_sid_t sid;
- int key;
- int rc;
-
- switch (type) {
- case LST_OPC_SESSION:
- rc = lst_session_info_ioctl(buf, LST_NAME_SIZE,
- &key, &sid, entp);
- break;
-
- case LST_OPC_BATCHSRV:
- entp = &ent.tbe_srv_nle;
- case LST_OPC_BATCHCLI:
- rc = lst_info_batch_ioctl(str, 0, 0, &ent, NULL, NULL, NULL);
- break;
-
- case LST_OPC_GROUP:
- rc = lst_info_group_ioctl(str, entp, NULL, NULL, NULL);
- break;
-
- case LST_OPC_NODES:
- rc = lst_parse_nids(str, &entp->nle_nnode, idspp) < 0 ? -1 : 0;
- break;
-
- default:
- rc = -1;
- break;
- }
-
- if (rc == 0)
- *countp = entp->nle_nnode;
-
- return rc;
-}
-
-int
-jt_lst_ping(int argc, char **argv)
-{
- struct list_head head;
- lnet_process_id_t *ids = NULL;
- lstcon_rpc_ent_t *ent = NULL;
- char *str = NULL;
- int optidx = 0;
- int server = 0;
- int timeout = 5;
- int count = 0;
- int type = 0;
- int rc = 0;
- int c;
-
- static struct option ping_opts[] =
- {
- {"session", no_argument, 0, 's' },
- {"server", no_argument, 0, 'v' },
- {"batch", required_argument, 0, 'b' },
- {"group", required_argument, 0, 'g' },
- {"nodes", required_argument, 0, 'n' },
- {"timeout", required_argument, 0, 't' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
-
- c = getopt_long(argc, argv, "g:b:n:t:sv",
- ping_opts, &optidx);
-
- if (c == -1)
- break;
-
- switch (c) {
- case 's':
- type = LST_OPC_SESSION;
- break;
-
- case 'g':
- type = LST_OPC_GROUP;
- str = optarg;
- break;
-
- case 'b':
- type = LST_OPC_BATCHCLI;
- str = optarg;
- break;
-
- case 'n':
- type = LST_OPC_NODES;
- str = optarg;
- break;
-
- case 't':
- timeout = atoi(optarg);
- break;
-
- case 'v':
- server = 1;
- break;
-
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- if (type == 0 || timeout <= 0 || optind != argc) {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- if (type == LST_OPC_BATCHCLI && server)
- type = LST_OPC_BATCHSRV;
-
- rc = lst_get_node_count(type, str, &count, &ids);
- if (rc < 0) {
- fprintf(stderr, "Failed to get count of nodes from %s: %s\n",
- (str == NULL) ? "session" : str, strerror(errno));
- return -1;
- }
-
- CFS_INIT_LIST_HEAD(&head);
-
- rc = lst_alloc_rpcent(&head, count, LST_NAME_SIZE);
- if (rc != 0) {
- fprintf(stderr, "Out of memory\n");
- goto out;
- }
-
- if (count == 0) {
- fprintf(stdout, "Target %s is empty\n",
- (str == NULL) ? "session" : str);
- goto out;
- }
-
- rc = lst_ping_ioctl(str, type, timeout, count, ids, &head);
- if (rc == -1) { /* local failure */
- lst_print_error("debug", "Failed to ping %s: %s\n",
- (str == NULL) ? "session" : str,
- strerror(errno));
- rc = -1;
- goto out;
- }
-
- /* ignore RPC errors and framwork errors */
- list_for_each_entry(ent, &head, rpe_link) {
- fprintf(stdout, "\t%s: %s [session: %s id: %s]\n",
- libcfs_id2str(ent->rpe_peer),
- lst_node_state2str(ent->rpe_state),
- (ent->rpe_state == LST_NODE_ACTIVE ||
- ent->rpe_state == LST_NODE_BUSY)?
- (ent->rpe_rpc_errno == 0 ?
- &ent->rpe_payload[0] : "Unknown") :
- "<NULL>", libcfs_nid2str(ent->rpe_sid.ses_nid));
- }
-
-out:
- lst_free_rpcent(&head);
-
- if (ids != NULL)
- free(ids);
-
- return rc;
-
-}
-
-int
-lst_add_nodes_ioctl (char *name, int count, lnet_process_id_t *ids,
- struct list_head *resultp)
-{
- lstio_group_nodes_args_t args = {
- .lstio_grp_key = session_key,
- .lstio_grp_nmlen = strlen(name),
- .lstio_grp_namep = name,
- .lstio_grp_count = count,
- .lstio_grp_idsp = ids,
- .lstio_grp_resultp = resultp,
- };
-
- return lst_ioctl(LSTIO_NODES_ADD, &args, sizeof(args));
-}
-
-int
-lst_add_group_ioctl (char *name)
-{
- lstio_group_add_args_t args = {
- .lstio_grp_key = session_key,
- .lstio_grp_nmlen = strlen(name),
- .lstio_grp_namep = name,
- };
-
- return lst_ioctl(LSTIO_GROUP_ADD, &args, sizeof(args));
-}
-
-int
-jt_lst_add_group(int argc, char **argv)
-{
- struct list_head head;
- lnet_process_id_t *ids;
- char *name;
- int count;
- int rc;
- int i;
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- if (argc < 3) {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- name = argv[1];
- if (strlen(name) >= LST_NAME_SIZE) {
- fprintf(stderr, "Name length is limited to %d\n",
- LST_NAME_SIZE - 1);
- return -1;
- }
-
- rc = lst_add_group_ioctl(name);
- if (rc != 0) {
- lst_print_error("group", "Failed to add group %s: %s\n",
- name, strerror(errno));
- return -1;
- }
-
- CFS_INIT_LIST_HEAD(&head);
-
- for (i = 2; i < argc; i++) {
- /* parse address list */
- rc = lst_parse_nids(argv[i], &count, &ids);
- if (rc < 0) {
- fprintf(stderr, "Ignore invalid id list %s\n",
- argv[i]);
- continue;
- }
-
- if (count == 0)
- continue;
-
- rc = lst_alloc_rpcent(&head, count, 0);
- if (rc != 0) {
- fprintf(stderr, "Out of memory\n");
- break;
- }
-
- rc = lst_add_nodes_ioctl(name, count, ids, &head);
-
- free(ids);
-
- if (rc == 0) {
- lst_free_rpcent(&head);
- fprintf(stderr, "%s are added to session\n", argv[i]);
- continue;
- }
-
- if (rc == -1) {
- lst_free_rpcent(&head);
- lst_print_error("group", "Failed to add nodes %s: %s\n",
- argv[i], strerror(errno));
- break;
- }
-
- lst_print_transerr(&head, "create session");
- lst_free_rpcent(&head);
- }
-
- return rc;
-}
-
-int
-lst_del_group_ioctl (char *name)
-{
- lstio_group_del_args_t args = {
- .lstio_grp_key = session_key,
- .lstio_grp_nmlen = strlen(name),
- .lstio_grp_namep = name,
- };
-
- return lst_ioctl(LSTIO_GROUP_DEL, &args, sizeof(args));
-}
-
-int
-jt_lst_del_group(int argc, char **argv)
-{
- int rc;
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- if (argc != 2) {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- rc = lst_del_group_ioctl(argv[1]);
- if (rc == 0) {
- fprintf(stdout, "Group is deleted\n");
- return 0;
- }
-
- if (rc == -1) {
- lst_print_error("group", "Failed to delete group: %s\n",
- strerror(errno));
- return rc;
- }
-
- fprintf(stderr, "Group is deleted with some errors\n");
-
- if (trans_stat.trs_rpc_errno != 0) {
- fprintf(stderr, "[RPC] Failed to send %d end session RPCs: %s\n",
- lstcon_rpc_stat_failure(&trans_stat, 0),
- strerror(trans_stat.trs_rpc_errno));
- }
-
- if (trans_stat.trs_fwk_errno != 0) {
- fprintf(stderr,
- "[FWK] Failed to end session on %d nodes: %s\n",
- lstcon_sesop_stat_failure(&trans_stat, 0),
- strerror(trans_stat.trs_fwk_errno));
- }
-
- return -1;
-}
-
-int
-lst_update_group_ioctl(int opc, char *name, int clean, int count,
- lnet_process_id_t *ids, struct list_head *resultp)
-{
- lstio_group_update_args_t args = {
- .lstio_grp_key = session_key,
- .lstio_grp_opc = opc,
- .lstio_grp_args = clean,
- .lstio_grp_nmlen = strlen(name),
- .lstio_grp_namep = name,
- .lstio_grp_count = count,
- .lstio_grp_idsp = ids,
- .lstio_grp_resultp = resultp,
- };
-
- return lst_ioctl(LSTIO_GROUP_UPDATE, &args, sizeof(args));
-}
-
-int
-jt_lst_update_group(int argc, char **argv)
-{
- struct list_head head;
- lnet_process_id_t *ids = NULL;
- char *str = NULL;
- char *grp = NULL;
- int optidx = 0;
- int count = 0;
- int clean = 0;
- int opc = 0;
- int rc;
- int c;
-
- static struct option update_group_opts[] =
- {
- {"refresh", no_argument, 0, 'f' },
- {"clean", required_argument, 0, 'c' },
- {"remove", required_argument, 0, 'r' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
- c = getopt_long(argc, argv, "fc:r:",
- update_group_opts, &optidx);
-
- /* Detect the end of the options. */
- if (c == -1)
- break;
-
- switch (c) {
- case 'f':
- if (opc != 0) {
- lst_print_usage(argv[0]);
- return -1;
- }
- opc = LST_GROUP_REFRESH;
- break;
-
- case 'r':
- if (opc != 0) {
- lst_print_usage(argv[0]);
- return -1;
- }
- opc = LST_GROUP_RMND;
- str = optarg;
- break;
-
- case 'c':
- clean = lst_node_str2state(optarg);
- if (opc != 0 || clean <= 0) {
- lst_print_usage(argv[0]);
- return -1;
- }
- opc = LST_GROUP_CLEAN;
- break;
-
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- /* no OPC or group is specified */
- if (opc == 0 || optind != argc - 1) {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- grp = argv[optind];
-
- CFS_INIT_LIST_HEAD(&head);
-
- if (opc == LST_GROUP_RMND || opc == LST_GROUP_REFRESH) {
- rc = lst_get_node_count(opc == LST_GROUP_RMND ? LST_OPC_NODES :
- LST_OPC_GROUP,
- opc == LST_GROUP_RMND ? str : grp,
- &count, &ids);
-
- if (rc != 0) {
- fprintf(stderr, "Can't get count of nodes from %s: %s\n",
- opc == LST_GROUP_RMND ? str : grp,
- strerror(errno));
- return -1;
- }
-
- rc = lst_alloc_rpcent(&head, count, 0);
- if (rc != 0) {
- fprintf(stderr, "Out of memory\n");
- free(ids);
- return -1;
- }
-
- }
-
- rc = lst_update_group_ioctl(opc, grp, clean, count, ids, &head);
-
- if (ids != NULL)
- free(ids);
-
- if (rc == 0) {
- lst_free_rpcent(&head);
- return 0;
- }
-
- if (rc == -1) {
- lst_free_rpcent(&head);
- lst_print_error("group", "Failed to update group: %s\n",
- strerror(errno));
- return rc;
- }
-
- lst_print_transerr(&head, "Updating group");
-
- lst_free_rpcent(&head);
-
- return rc;
-}
-
-int
-lst_list_group_ioctl(int len, char *name, int idx)
-{
- lstio_group_list_args_t args = {
- .lstio_grp_key = session_key,
- .lstio_grp_idx = idx,
- .lstio_grp_nmlen = len,
- .lstio_grp_namep = name,
- };
-
- return lst_ioctl(LSTIO_GROUP_LIST, &args, sizeof(args));
-}
-
-int
-lst_info_group_ioctl(char *name, lstcon_ndlist_ent_t *gent,
- int *idx, int *count, lstcon_node_ent_t *dents)
-{
- lstio_group_info_args_t args = {
- .lstio_grp_key = session_key,
- .lstio_grp_nmlen = strlen(name),
- .lstio_grp_namep = name,
- .lstio_grp_entp = gent,
- .lstio_grp_idxp = idx,
- .lstio_grp_ndentp = count,
- .lstio_grp_dentsp = dents,
- };
-
- return lst_ioctl(LSTIO_GROUP_INFO, &args, sizeof(args));
-}
-
-int
-lst_list_group_all(void)
-{
- char name[LST_NAME_SIZE];
- int rc;
- int i;
-
- /* no group is specified, list name of all groups */
- for (i = 0; ; i++) {
- rc = lst_list_group_ioctl(LST_NAME_SIZE, name, i);
- if (rc == 0) {
- fprintf(stdout, "%d) %s\n", i + 1, name);
- continue;
- }
-
- if (errno == ENOENT)
- break;
-
- lst_print_error("group", "Failed to list group: %s\n",
- strerror(errno));
- return -1;
- }
-
- fprintf(stdout, "Total %d groups\n", i);
-
- return 0;
-}
-
-#define LST_NODES_TITLE "\tACTIVE\tBUSY\tDOWN\tUNKNOWN\tTOTAL\n"
-
-int
-jt_lst_list_group(int argc, char **argv)
-{
- lstcon_ndlist_ent_t gent;
- lstcon_node_ent_t *dents;
- int optidx = 0;
- int verbose = 0;
- int active = 0;
- int busy = 0;
- int down = 0;
- int unknown = 0;
- int all = 0;
- int count;
- int index;
- int i;
- int j;
- int c;
- int rc = 0;
-
- static struct option list_group_opts[] =
- {
- {"active", no_argument, 0, 'a' },
- {"busy", no_argument, 0, 'b' },
- {"down", no_argument, 0, 'd' },
- {"unknown", no_argument, 0, 'u' },
- {"all", no_argument, 0, 'l' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
- c = getopt_long(argc, argv, "abdul",
- list_group_opts, &optidx);
-
- if (c == -1)
- break;
-
- switch (c) {
- case 'a':
- verbose = active = 1;
- all = 0;
- break;
- case 'b':
- verbose = busy = 1;
- all = 0;
- break;
- case 'd':
- verbose = down = 1;
- all = 0;
- break;
- case 'u':
- verbose = unknown = 1;
- all = 0;
- break;
- case 'l':
- verbose = all = 1;
- break;
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- if (optind == argc) {
- /* no group is specified, list name of all groups */
- rc = lst_list_group_all();
-
- return rc;
- }
-
- if (!verbose)
- fprintf(stdout, LST_NODES_TITLE);
-
- /* list nodes in specified groups */
- for (i = optind; i < argc; i++) {
- rc = lst_info_group_ioctl(argv[i], &gent, NULL, NULL, NULL);
- if (rc != 0) {
- if (errno == ENOENT) {
- rc = 0;
- break;
- }
-
- lst_print_error("group", "Failed to list group\n",
- strerror(errno));
- break;
- }
-
- if (!verbose) {
- fprintf(stdout, "\t%d\t%d\t%d\t%d\t%d\t%s\n",
- gent.nle_nactive, gent.nle_nbusy,
- gent.nle_ndown, gent.nle_nunknown,
- gent.nle_nnode, argv[i]);
- continue;
- }
-
- fprintf(stdout, "Group [ %s ]\n", argv[i]);
-
- if (gent.nle_nnode == 0) {
- fprintf(stdout, "No nodes found [ %s ]\n", argv[i]);
- continue;
- }
-
- count = gent.nle_nnode;
-
- dents = malloc(count * sizeof(lstcon_node_ent_t));
- if (dents == NULL) {
- fprintf(stderr, "Failed to malloc: %s\n",
- strerror(errno));
- return -1;
- }
-
- index = 0;
- rc = lst_info_group_ioctl(argv[i], &gent, &index, &count, dents);
- if (rc != 0) {
- lst_print_error("group", "Failed to list group: %s\n",
- strerror(errno));
- free(dents);
- return -1;
- }
-
- for (j = 0, c = 0; j < count; j++) {
- if (all ||
- ((active && dents[j].nde_state == LST_NODE_ACTIVE) ||
- (busy && dents[j].nde_state == LST_NODE_BUSY) ||
- (down && dents[j].nde_state == LST_NODE_DOWN) ||
- (unknown && dents[j].nde_state == LST_NODE_UNKNOWN))) {
-
- fprintf(stdout, "\t%s: %s\n",
- libcfs_id2str(dents[j].nde_id),
- lst_node_state2str(dents[j].nde_state));
- c++;
- }
- }
-
- fprintf(stdout, "Total %d nodes [ %s ]\n", c, argv[i]);
-
- free(dents);
- }
-
- return rc;
-}
-
-int
-lst_stat_ioctl (char *name, int count, lnet_process_id_t *idsp,
- int timeout, struct list_head *resultp)
-{
- lstio_stat_args_t args = {
- .lstio_sta_key = session_key,
- .lstio_sta_timeout = timeout,
- .lstio_sta_nmlen = strlen(name),
- .lstio_sta_namep = name,
- .lstio_sta_count = count,
- .lstio_sta_idsp = idsp,
- .lstio_sta_resultp = resultp,
- };
-
- return lst_ioctl (LSTIO_STAT_QUERY, &args, sizeof(args));
-}
-
-typedef struct {
- struct list_head srp_link;
- int srp_count;
- char *srp_name;
- lnet_process_id_t *srp_ids;
- struct list_head srp_result[2];
-} lst_stat_req_param_t;
-
-static void
-lst_stat_req_param_free(lst_stat_req_param_t *srp)
-{
- int i;
-
- for (i = 0; i < 2; i++)
- lst_free_rpcent(&srp->srp_result[i]);
-
- if (srp->srp_ids != NULL)
- free(srp->srp_ids);
-
- free(srp);
-}
-
-static int
-lst_stat_req_param_alloc(char *name, lst_stat_req_param_t **srpp, int save_old)
-{
- lst_stat_req_param_t *srp = NULL;
- int count = save_old ? 2 : 1;
- int rc;
- int i;
-
- srp = malloc(sizeof(*srp));
- if (srp == NULL)
- return -ENOMEM;
-
- memset(srp, 0, sizeof(*srp));
- CFS_INIT_LIST_HEAD(&srp->srp_result[0]);
- CFS_INIT_LIST_HEAD(&srp->srp_result[1]);
-
- rc = lst_get_node_count(LST_OPC_GROUP, name,
- &srp->srp_count, NULL);
- if (rc != 0 && errno == ENOENT) {
- rc = lst_get_node_count(LST_OPC_NODES, name,
- &srp->srp_count, &srp->srp_ids);
- }
-
- if (rc != 0) {
- fprintf(stderr,
- "Failed to get count of nodes from %s: %s\n",
- name, strerror(errno));
- lst_stat_req_param_free(srp);
-
- return rc;
- }
-
- srp->srp_name = name;
-
- for (i = 0; i < count; i++) {
- rc = lst_alloc_rpcent(&srp->srp_result[i], srp->srp_count,
- sizeof(sfw_counters_t) +
- sizeof(srpc_counters_t) +
- sizeof(lnet_counters_t));
- if (rc != 0) {
- fprintf(stderr, "Out of memory\n");
- break;
- }
- }
-
- if (rc == 0) {
- *srpp = srp;
- return 0;
- }
-
- lst_stat_req_param_free(srp);
-
- return rc;
-}
-
-typedef struct {
- /* TODO */
-} lst_srpc_stat_result;
-
-#define LST_LNET_AVG 0
-#define LST_LNET_MIN 1
-#define LST_LNET_MAX 2
-
-typedef struct {
- float lnet_avg_sndrate;
- float lnet_min_sndrate;
- float lnet_max_sndrate;
- float lnet_total_sndrate;
-
- float lnet_avg_rcvrate;
- float lnet_min_rcvrate;
- float lnet_max_rcvrate;
- float lnet_total_rcvrate;
-
- float lnet_avg_sndperf;
- float lnet_min_sndperf;
- float lnet_max_sndperf;
- float lnet_total_sndperf;
-
- float lnet_avg_rcvperf;
- float lnet_min_rcvperf;
- float lnet_max_rcvperf;
- float lnet_total_rcvperf;
-
- int lnet_stat_count;
-} lst_lnet_stat_result_t;
-
-lst_lnet_stat_result_t lnet_stat_result;
-
-static float
-lst_lnet_stat_value(int bw, int send, int off)
-{
- float *p;
-
- p = bw ? &lnet_stat_result.lnet_avg_sndperf :
- &lnet_stat_result.lnet_avg_sndrate;
-
- if (!send)
- p += 4;
-
- p += off;
-
- return *p;
-}
-
-static void
-lst_timeval_diff(struct timeval *tv1,
- struct timeval *tv2, struct timeval *df)
-{
- if (tv1->tv_usec >= tv2->tv_usec) {
- df->tv_sec = tv1->tv_sec - tv2->tv_sec;
- df->tv_usec = tv1->tv_usec - tv2->tv_usec;
- return;
- }
-
- df->tv_sec = tv1->tv_sec - 1 - tv2->tv_sec;
- df->tv_usec = tv1->tv_sec + 1000000 - tv2->tv_usec;
-
- return;
-}
-
-void
-lst_cal_lnet_stat(float delta, lnet_counters_t *lnet_new,
- lnet_counters_t *lnet_old)
-{
- float perf;
- float rate;
-
- perf = (float)(lnet_new->send_length -
- lnet_old->send_length) / (1024 * 1024) / delta;
- lnet_stat_result.lnet_total_sndperf += perf;
-
- if (lnet_stat_result.lnet_min_sndperf > perf ||
- lnet_stat_result.lnet_min_sndperf == 0)
- lnet_stat_result.lnet_min_sndperf = perf;
-
- if (lnet_stat_result.lnet_max_sndperf < perf)
- lnet_stat_result.lnet_max_sndperf = perf;
-
- perf = (float)(lnet_new->recv_length -
- lnet_old->recv_length) / (1024 * 1024) / delta;
- lnet_stat_result.lnet_total_rcvperf += perf;
-
- if (lnet_stat_result.lnet_min_rcvperf > perf ||
- lnet_stat_result.lnet_min_rcvperf == 0)
- lnet_stat_result.lnet_min_rcvperf = perf;
-
- if (lnet_stat_result.lnet_max_rcvperf < perf)
- lnet_stat_result.lnet_max_rcvperf = perf;
-
- rate = (lnet_new->send_count - lnet_old->send_count) / delta;
- lnet_stat_result.lnet_total_sndrate += rate;
-
- if (lnet_stat_result.lnet_min_sndrate > rate ||
- lnet_stat_result.lnet_min_sndrate == 0)
- lnet_stat_result.lnet_min_sndrate = rate;
-
- if (lnet_stat_result.lnet_max_sndrate < rate)
- lnet_stat_result.lnet_max_sndrate = rate;
-
- rate = (lnet_new->recv_count - lnet_old->recv_count) / delta;
- lnet_stat_result.lnet_total_rcvrate += rate;
-
- if (lnet_stat_result.lnet_min_rcvrate > rate ||
- lnet_stat_result.lnet_min_rcvrate == 0)
- lnet_stat_result.lnet_min_rcvrate = rate;
-
- if (lnet_stat_result.lnet_max_rcvrate < rate)
- lnet_stat_result.lnet_max_rcvrate = rate;
-
- lnet_stat_result.lnet_stat_count ++;
-
- lnet_stat_result.lnet_avg_sndrate = lnet_stat_result.lnet_total_sndrate /
- lnet_stat_result.lnet_stat_count;
- lnet_stat_result.lnet_avg_rcvrate = lnet_stat_result.lnet_total_rcvrate /
- lnet_stat_result.lnet_stat_count;
-
- lnet_stat_result.lnet_avg_sndperf = lnet_stat_result.lnet_total_sndperf /
- lnet_stat_result.lnet_stat_count;
- lnet_stat_result.lnet_avg_rcvperf = lnet_stat_result.lnet_total_rcvperf /
- lnet_stat_result.lnet_stat_count;
-
-}
-
-void
-lst_print_lnet_stat(char *name, int bwrt, int rdwr, int type)
-{
- int start1 = 0;
- int end1 = 1;
- int start2 = 0;
- int end2 = 1;
- int i;
- int j;
-
- if (lnet_stat_result.lnet_stat_count == 0)
- return;
-
- if (bwrt == 1) /* bw only */
- start1 = 1;
-
- if (bwrt == 2) /* rates only */
- end1 = 0;
-
- if (rdwr == 1) /* recv only */
- start2 = 1;
-
- if (rdwr == 2) /* send only */
- end2 = 0;
-
- for (i = start1; i <= end1; i++) {
- fprintf(stdout, "[LNet %s of %s]\n",
- i == 0 ? "Rates" : "Bandwidth", name);
-
- for (j = start2; j <= end2; j++) {
- fprintf(stdout, "[%c] ", j == 0 ? 'R' : 'W');
-
- if ((type & 1) != 0) {
- fprintf(stdout, i == 0 ? "Avg: %-8.0f RPC/s " :
- "Avg: %-8.2f MB/s ",
- lst_lnet_stat_value(i, j, 0));
- }
-
- if ((type & 2) != 0) {
- fprintf(stdout, i == 0 ? "Min: %-8.0f RPC/s " :
- "Min: %-8.2f MB/s ",
- lst_lnet_stat_value(i, j, 1));
- }
-
- if ((type & 4) != 0) {
- fprintf(stdout, i == 0 ? "Max: %-8.0f RPC/s" :
- "Max: %-8.2f MB/s",
- lst_lnet_stat_value(i, j, 2));
- }
-
- fprintf(stdout, "\n");
- }
- }
-}
-
-void
-lst_print_stat(char *name, struct list_head *resultp,
- int idx, int lnet, int bwrt, int rdwr, int type)
-{
- struct list_head tmp[2];
- lstcon_rpc_ent_t *new;
- lstcon_rpc_ent_t *old;
- sfw_counters_t *sfwk_new;
- sfw_counters_t *sfwk_old;
- srpc_counters_t *srpc_new;
- srpc_counters_t *srpc_old;
- lnet_counters_t *lnet_new;
- lnet_counters_t *lnet_old;
- struct timeval tv;
- float delta;
- int errcount = 0;
-
- CFS_INIT_LIST_HEAD(&tmp[0]);
- CFS_INIT_LIST_HEAD(&tmp[1]);
-
- memset(&lnet_stat_result, 0, sizeof(lnet_stat_result));
-
- while (!list_empty(&resultp[idx])) {
- if (list_empty(&resultp[1 - idx])) {
- fprintf(stderr, "Group is changed, re-run stat\n");
- break;
- }
-
- new = list_entry(resultp[idx].next, lstcon_rpc_ent_t, rpe_link);
- old = list_entry(resultp[1 - idx].next, lstcon_rpc_ent_t, rpe_link);
-
- /* first time get stats result, can't calculate diff */
- if (new->rpe_peer.nid == LNET_NID_ANY)
- break;
-
- if (new->rpe_peer.nid != old->rpe_peer.nid ||
- new->rpe_peer.pid != old->rpe_peer.pid) {
- /* Something wrong. i.e, somebody change the group */
- break;
- }
-
- list_del(&new->rpe_link);
- list_add_tail(&new->rpe_link, &tmp[idx]);
-
- list_del(&old->rpe_link);
- list_add_tail(&old->rpe_link, &tmp[1 - idx]);
-
- if (new->rpe_rpc_errno != 0 || new->rpe_fwk_errno != 0 ||
- old->rpe_rpc_errno != 0 || old->rpe_fwk_errno != 0) {
- errcount ++;
- continue;
- }
-
- sfwk_new = (sfw_counters_t *)&new->rpe_payload[0];
- sfwk_old = (sfw_counters_t *)&old->rpe_payload[0];
-
- srpc_new = (srpc_counters_t *)((char *)sfwk_new + sizeof(*sfwk_new));
- srpc_old = (srpc_counters_t *)((char *)sfwk_old + sizeof(*sfwk_old));
-
- lnet_new = (lnet_counters_t *)((char *)srpc_new + sizeof(*srpc_new));
- lnet_old = (lnet_counters_t *)((char *)srpc_old + sizeof(*srpc_old));
-
- lst_timeval_diff(&new->rpe_stamp, &old->rpe_stamp, &tv);
-
- delta = tv.tv_sec + (float)tv.tv_usec/1000000;
-
- if (!lnet) /* TODO */
- continue;
-
- lst_cal_lnet_stat(delta, lnet_new, lnet_old);
- }
-
- list_splice(&tmp[idx], &resultp[idx]);
- list_splice(&tmp[1 - idx], &resultp[1 - idx]);
-
- if (errcount > 0)
- fprintf(stdout, "Failed to stat on %d nodes\n", errcount);
-
- if (!lnet) /* TODO */
- return;
-
- lst_print_lnet_stat(name, bwrt, rdwr, type);
-}
-
-int
-jt_lst_stat(int argc, char **argv)
-{
- struct list_head head;
- lst_stat_req_param_t *srp;
- time_t last = 0;
- int optidx = 0;
- int timeout = 5; /* default timeout, 5 sec */
- int delay = 5; /* default delay, 5 sec */
- int lnet = 1; /* lnet stat by default */
- int bwrt = 0;
- int rdwr = 0;
- int type = -1;
- int idx = 0;
- int rc;
- int c;
-
- static struct option stat_opts[] =
- {
- {"timeout", required_argument, 0, 't' },
- {"delay" , required_argument, 0, 'd' },
- {"lnet" , no_argument, 0, 'l' },
- {"rpc" , no_argument, 0, 'c' },
- {"bw" , no_argument, 0, 'b' },
- {"rate" , no_argument, 0, 'a' },
- {"read" , no_argument, 0, 'r' },
- {"write" , no_argument, 0, 'w' },
- {"avg" , no_argument, 0, 'g' },
- {"min" , no_argument, 0, 'n' },
- {"max" , no_argument, 0, 'x' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
- c = getopt_long(argc, argv, "t:d:lcbarwgnx", stat_opts, &optidx);
-
- if (c == -1)
- break;
-
- switch (c) {
- case 't':
- timeout = atoi(optarg);
- break;
- case 'd':
- delay = atoi(optarg);
- break;
- case 'l':
- lnet = 1;
- break;
- case 'c':
- lnet = 0;
- break;
- case 'b':
- bwrt |= 1;
- break;
- case 'a':
- bwrt |= 2;
- break;
- case 'r':
- rdwr |= 1;
- break;
- case 'w':
- rdwr |= 2;
- break;
- case 'g':
- if (type == -1) {
- type = 1;
- break;
- }
- type |= 1;
- break;
- case 'n':
- if (type == -1) {
- type = 2;
- break;
- }
- type |= 2;
- break;
- case 'x':
- if (type == -1) {
- type = 4;
- break;
- }
- type |= 4;
- break;
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- if (optind == argc) {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- if (timeout <= 0 || delay <= 0) {
- fprintf(stderr, "Invalid timeout or delay value\n");
- return -1;
- }
-
- CFS_INIT_LIST_HEAD(&head);
-
- while (optind < argc) {
- rc = lst_stat_req_param_alloc(argv[optind++], &srp, 1);
- if (rc != 0)
- goto out;
-
- list_add_tail(&srp->srp_link, &head);
- }
-
- while (1) {
- time_t now = time(NULL);
-
- if (now - last < delay) {
- sleep(delay - now + last);
- time(&now);
- }
-
- last = now;
-
- list_for_each_entry(srp, &head, srp_link) {
- rc = lst_stat_ioctl(srp->srp_name,
- srp->srp_count, srp->srp_ids,
- timeout, &srp->srp_result[idx]);
- if (rc == -1) {
- lst_print_error("stat", "Failed to stat %s: %s\n",
- srp->srp_name, strerror(errno));
- goto out;
- }
-
- lst_print_stat(srp->srp_name, srp->srp_result,
- idx, lnet, bwrt, rdwr, type);
-
- lst_reset_rpcent(&srp->srp_result[1 - idx]);
- }
-
- idx = 1 - idx;
- }
-
-out:
- while (!list_empty(&head)) {
- srp = list_entry(head.next, lst_stat_req_param_t, srp_link);
-
- list_del(&srp->srp_link);
- lst_stat_req_param_free(srp);
- }
-
- return rc;
-}
-
-int
-jt_lst_show_error(int argc, char **argv)
-{
- struct list_head head;
- lst_stat_req_param_t *srp;
- lstcon_rpc_ent_t *ent;
- sfw_counters_t *sfwk;
- srpc_counters_t *srpc;
- lnet_counters_t *lnet;
- int show_rpc = 1;
- int optidx = 0;
- int rc = 0;
- int ecount;
- int c;
-
- static struct option show_error_opts[] =
- {
- {"session", no_argument, 0, 's' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
- c = getopt_long(argc, argv, "s", show_error_opts, &optidx);
-
- if (c == -1)
- break;
-
- switch (c) {
- case 's':
- show_rpc = 0;
- break;
-
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- if (optind == argc) {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- CFS_INIT_LIST_HEAD(&head);
-
- while (optind < argc) {
- rc = lst_stat_req_param_alloc(argv[optind++], &srp, 0);
- if (rc != 0)
- goto out;
-
- list_add_tail(&srp->srp_link, &head);
- }
-
- list_for_each_entry(srp, &head, srp_link) {
- rc = lst_stat_ioctl(srp->srp_name, srp->srp_count,
- srp->srp_ids, 5, &srp->srp_result[0]);
-
- if (rc == -1) {
- lst_print_error(srp->srp_name, "Failed to show errors of %s: %s\n",
- srp->srp_name, strerror(errno));
- goto out;
- }
-
- fprintf(stdout, "%s:\n", srp->srp_name);
-
- ecount = 0;
-
- list_for_each_entry(ent, &srp->srp_result[0], rpe_link) {
- if (ent->rpe_rpc_errno != 0) {
- ecount ++;
- fprintf(stderr, "RPC failure, can't show error on %s\n",
- libcfs_id2str(ent->rpe_peer));
- continue;
- }
-
- if (ent->rpe_fwk_errno != 0) {
- ecount ++;
- fprintf(stderr, "Framework failure, can't show error on %s\n",
- libcfs_id2str(ent->rpe_peer));
- continue;
- }
-
- sfwk = (sfw_counters_t *)&ent->rpe_payload[0];
- srpc = (srpc_counters_t *)((char *)sfwk + sizeof(*sfwk));
- lnet = (lnet_counters_t *)((char *)srpc + sizeof(*srpc));
-
- if (srpc->errors == 0 &&
- sfwk->brw_errors == 0 && sfwk->ping_errors == 0)
- continue;
-
- if (!show_rpc &&
- sfwk->brw_errors == 0 && sfwk->ping_errors == 0)
- continue;
-
- ecount ++;
-
- fprintf(stderr, "%s: [Session %d brw errors, %d ping errors]%c",
- libcfs_id2str(ent->rpe_peer),
- sfwk->brw_errors, sfwk->ping_errors,
- show_rpc ? ' ' : '\n');
-
- if (!show_rpc)
- continue;
-
- fprintf(stderr, "[RPC: %d errors, %d dropped, %d expired]\n",
- srpc->errors, srpc->rpcs_dropped, srpc->rpcs_expired);
- }
-
- fprintf(stdout, "Total %d error nodes in %s\n", ecount, srp->srp_name);
- }
-out:
- while (!list_empty(&head)) {
- srp = list_entry(head.next, lst_stat_req_param_t, srp_link);
-
- list_del(&srp->srp_link);
- lst_stat_req_param_free(srp);
- }
-
- return rc;
-}
-
-int
-lst_add_batch_ioctl (char *name)
-{
- lstio_batch_add_args_t args = {
- .lstio_bat_key = session_key,
- .lstio_bat_nmlen = strlen(name),
- .lstio_bat_namep = name,
- };
-
- return lst_ioctl (LSTIO_BATCH_ADD, &args, sizeof(args));
-}
-
-int
-jt_lst_add_batch(int argc, char **argv)
-{
- char *name;
- int rc;
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- if (argc != 2) {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- name = argv[1];
- if (strlen(name) >= LST_NAME_SIZE) {
- fprintf(stderr, "Name length is limited to %d\n",
- LST_NAME_SIZE - 1);
- return -1;
- }
-
- rc = lst_add_batch_ioctl(name);
- if (rc == 0)
- return 0;
-
- lst_print_error("batch", "Failed to create batch: %s\n",
- strerror(errno));
-
- return -1;
-}
-
-int
-lst_start_batch_ioctl (char *name, int timeout, struct list_head *resultp)
-{
- lstio_batch_run_args_t args = {
- .lstio_bat_key = session_key,
- .lstio_bat_timeout = timeout,
- .lstio_bat_nmlen = strlen(name),
- .lstio_bat_namep = name,
- .lstio_bat_resultp = resultp,
- };
-
- return lst_ioctl(LSTIO_BATCH_START, &args, sizeof(args));
-}
-
-int
-jt_lst_start_batch(int argc, char **argv)
-{
- struct list_head head;
- char *batch;
- int optidx = 0;
- int timeout = 0;
- int count = 0;
- int rc;
- int c;
-
- static struct option start_batch_opts[] =
- {
- {"timeout", required_argument, 0, 't' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
- c = getopt_long(argc, argv, "t:",
- start_batch_opts, &optidx);
-
- /* Detect the end of the options. */
- if (c == -1)
- break;
-
- switch (c) {
- case 't':
- timeout = atoi(optarg);
- break;
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- if (optind == argc) {
- batch = LST_DEFAULT_BATCH;
-
- } else if (optind == argc - 1) {
- batch = argv[optind];
-
- } else {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- rc = lst_get_node_count(LST_OPC_BATCHCLI, batch, &count, NULL);
- if (rc != 0) {
- fprintf(stderr, "Failed to get count of nodes from %s: %s\n",
- batch, strerror(errno));
- return -1;
- }
-
- CFS_INIT_LIST_HEAD(&head);
-
- rc = lst_alloc_rpcent(&head, count, 0);
- if (rc != 0) {
- fprintf(stderr, "Out of memory\n");
- return -1;
- }
-
- rc = lst_start_batch_ioctl(batch, timeout, &head);
-
- if (rc == 0) {
- fprintf(stdout, "%s is running now\n", batch);
- lst_free_rpcent(&head);
- return 0;
- }
-
- if (rc == -1) {
- lst_print_error("batch", "Failed to start batch: %s\n",
- strerror(errno));
- lst_free_rpcent(&head);
- return rc;
- }
-
- lst_print_transerr(&head, "Run batch");
-
- lst_free_rpcent(&head);
-
- return rc;
-}
-
-int
-lst_stop_batch_ioctl(char *name, int force, struct list_head *resultp)
-{
- lstio_batch_stop_args_t args = {
- .lstio_bat_key = session_key,
- .lstio_bat_force = force,
- .lstio_bat_nmlen = strlen(name),
- .lstio_bat_namep = name,
- .lstio_bat_resultp = resultp,
- };
-
- return lst_ioctl(LSTIO_BATCH_STOP, &args, sizeof(args));
-}
-
-int
-jt_lst_stop_batch(int argc, char **argv)
-{
- struct list_head head;
- char *batch;
- int force = 0;
- int optidx;
- int count;
- int rc;
- int c;
-
- static struct option stop_batch_opts[] =
- {
- {"force", no_argument, 0, 'f' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
- c = getopt_long(argc, argv, "f",
- stop_batch_opts, &optidx);
-
- /* Detect the end of the options. */
- if (c == -1)
- break;
-
- switch (c) {
- case 'f':
- force = 1;
- break;
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- if (optind == argc) {
- batch = LST_DEFAULT_BATCH;
-
- } else if (optind == argc - 1) {
- batch = argv[optind];
-
- } else {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- rc = lst_get_node_count(LST_OPC_BATCHCLI, batch, &count, NULL);
- if (rc != 0) {
- fprintf(stderr, "Failed to get count of nodes from %s: %s\n",
- batch, strerror(errno));
- return -1;
- }
-
- CFS_INIT_LIST_HEAD(&head);
-
- rc = lst_alloc_rpcent(&head, count, 0);
- if (rc != 0) {
- fprintf(stderr, "Out of memory\n");
- return -1;
- }
-
- rc = lst_stop_batch_ioctl(batch, force, &head);
- if (rc != 0)
- goto out;
-
- while (1) {
- lst_reset_rpcent(&head);
-
- rc = lst_query_batch_ioctl(batch, 0, 0, 30, &head);
- if (rc != 0)
- goto out;
-
- if (lstcon_tsbqry_stat_run(&trans_stat, 0) == 0 &&
- lstcon_tsbqry_stat_failure(&trans_stat, 0) == 0)
- break;
-
- fprintf(stdout, "%d batch in stopping\n",
- lstcon_tsbqry_stat_run(&trans_stat, 0));
- sleep(1);
- }
-
- fprintf(stdout, "Batch is stopped\n");
- lst_free_rpcent(&head);
-
- return 0;
-out:
- if (rc == -1) {
- lst_print_error("batch", "Failed to stop batch: %s\n",
- strerror(errno));
- lst_free_rpcent(&head);
- return -1;
- }
-
- lst_print_transerr(&head, "stop batch");
-
- lst_free_rpcent(&head);
-
- return rc;
-}
-
-int
-lst_list_batch_ioctl(int len, char *name, int index)
-{
- lstio_batch_list_args_t args = {
- .lstio_bat_key = session_key,
- .lstio_bat_idx = index,
- .lstio_bat_nmlen = len,
- .lstio_bat_namep = name,
- };
-
- return lst_ioctl(LSTIO_BATCH_LIST, &args, sizeof(args));
-}
-
-int
-lst_info_batch_ioctl(char *batch, int test, int server,
- lstcon_test_batch_ent_t *entp, int *idxp,
- int *ndentp, lstcon_node_ent_t *dentsp)
-{
- lstio_batch_info_args_t args = {
- .lstio_bat_key = session_key,
- .lstio_bat_nmlen = strlen(batch),
- .lstio_bat_namep = batch,
- .lstio_bat_server = server,
- .lstio_bat_testidx = test,
- .lstio_bat_entp = entp,
- .lstio_bat_idxp = idxp,
- .lstio_bat_ndentp = ndentp,
- .lstio_bat_dentsp = dentsp,
- };
-
- return lst_ioctl(LSTIO_BATCH_INFO, &args, sizeof(args));
-}
-
-int
-lst_list_batch_all(void)
-{
- char name[LST_NAME_SIZE];
- int rc;
- int i;
-
- for (i = 0; ; i++) {
- rc = lst_list_batch_ioctl(LST_NAME_SIZE, name, i);
- if (rc == 0) {
- fprintf(stdout, "%d) %s\n", i + 1, name);
- continue;
- }
-
- if (errno == ENOENT)
- break;
-
- lst_print_error("batch", "Failed to list batch: %s\n",
- strerror(errno));
- return rc;
- }
-
- fprintf(stdout, "Total %d batches\n", i);
-
- return 0;
-}
-
-int
-lst_list_tsb_nodes(char *batch, int test, int server,
- int count, int active, int invalid)
-{
- lstcon_node_ent_t *dents;
- int index = 0;
- int rc;
- int c;
- int i;
-
- if (count == 0)
- return 0;
-
- /* verbose list, show nodes in batch or test */
- dents = malloc(count * sizeof(lstcon_node_ent_t));
- if (dents == NULL) {
- fprintf(stdout, "Can't allocate memory\n");
- return -1;
- }
-
- rc = lst_info_batch_ioctl(batch, test, server,
- NULL, &index, &count, dents);
- if (rc != 0) {
- free(dents);
- lst_print_error((test > 0) ? "test" : "batch",
- (test > 0) ? "Failed to query test: %s\n" :
- "Failed to query batch: %s\n",
- strerror(errno));
- return -1;
- }
-
- for (i = 0, c = 0; i < count; i++) {
- if ((!active && dents[i].nde_state == LST_NODE_ACTIVE) ||
- (!invalid && (dents[i].nde_state == LST_NODE_BUSY ||
- dents[i].nde_state == LST_NODE_DOWN ||
- dents[i].nde_state == LST_NODE_UNKNOWN)))
- continue;
-
- fprintf(stdout, "\t%s: %s\n",
- libcfs_id2str(dents[i].nde_id),
- lst_node_state2str(dents[i].nde_state));
- c++;
- }
-
- fprintf(stdout, "Total %d nodes\n", c);
- free(dents);
-
- return 0;
-}
-
-int
-jt_lst_list_batch(int argc, char **argv)
-{
- lstcon_test_batch_ent_t ent;
- char *batch = NULL;
- int optidx = 0;
- int verbose = 0; /* list nodes in batch or test */
- int invalid = 0;
- int active = 0;
- int server = 0;
- int ntest = 0;
- int test = 0;
- int c = 0;
- int rc;
-
- static struct option list_batch_opts[] =
- {
- {"test", required_argument, 0, 't' },
- {"invalid", no_argument, 0, 'i' },
- {"active", no_argument, 0, 'a' },
- {"all", no_argument, 0, 'l' },
- {"server", no_argument, 0, 's' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
- c = getopt_long(argc, argv, "ailst:",
- list_batch_opts, &optidx);
-
- if (c == -1)
- break;
-
- switch (c) {
- case 'a':
- verbose = active = 1;
- break;
- case 'i':
- verbose = invalid = 1;
- break;
- case 'l':
- verbose = active = invalid = 1;
- break;
- case 's':
- server = 1;
- break;
- case 't':
- test = atoi(optarg);
- ntest = 1;
- break;
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- if (optind == argc) {
- /* list all batches */
- rc = lst_list_batch_all();
- return rc;
- }
-
- if (ntest == 1 && test <= 0) {
- fprintf(stderr, "Invalid test id, test id starts from 1\n");
- return -1;
- }
-
- if (optind != argc - 1) {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- batch = argv[optind];
-
-loop:
- /* show detail of specified batch or test */
- rc = lst_info_batch_ioctl(batch, test, server,
- &ent, NULL, NULL, NULL);
- if (rc != 0) {
- lst_print_error((test > 0) ? "test" : "batch",
- (test > 0) ? "Failed to query test: %s\n" :
- "Failed to query batch: %s\n",
- strerror(errno));
- return -1;
- }
-
- if (verbose) {
- /* list nodes in test or batch */
- rc = lst_list_tsb_nodes(batch, test, server,
- server ? ent.tbe_srv_nle.nle_nnode :
- ent.tbe_cli_nle.nle_nnode,
- active, invalid);
- return rc;
- }
-
- /* only show number of hosts in batch or test */
- if (test == 0) {
- fprintf(stdout, "Batch: %s Tests: %d State: %d\n",
- batch, ent.u.tbe_batch.bae_ntest,
- ent.u.tbe_batch.bae_state);
- ntest = ent.u.tbe_batch.bae_ntest;
- test = 1; /* starting from test 1 */
-
- } else {
- fprintf(stdout,
- "\tTest %d(%s) (loop: %d, concurrency: %d)\n",
- test, lst_test_type2name(ent.u.tbe_test.tse_type),
- ent.u.tbe_test.tse_loop,
- ent.u.tbe_test.tse_concur);
- ntest --;
- test ++;
- }
-
- fprintf(stdout, LST_NODES_TITLE);
- fprintf(stdout, "client\t%d\t%d\t%d\t%d\t%d\n"
- "server\t%d\t%d\t%d\t%d\t%d\n",
- ent.tbe_cli_nle.nle_nactive,
- ent.tbe_cli_nle.nle_nbusy,
- ent.tbe_cli_nle.nle_ndown,
- ent.tbe_cli_nle.nle_nunknown,
- ent.tbe_cli_nle.nle_nnode,
- ent.tbe_srv_nle.nle_nactive,
- ent.tbe_srv_nle.nle_nbusy,
- ent.tbe_srv_nle.nle_ndown,
- ent.tbe_srv_nle.nle_nunknown,
- ent.tbe_srv_nle.nle_nnode);
-
- if (ntest != 0)
- goto loop;
-
- return 0;
-}
-
-int
-lst_query_batch_ioctl(char *batch, int test, int server,
- int timeout, struct list_head *head)
-{
- lstio_batch_query_args_t args = {
- .lstio_bat_key = session_key,
- .lstio_bat_testidx = test,
- .lstio_bat_client = !(server),
- .lstio_bat_timeout = timeout,
- .lstio_bat_nmlen = strlen(batch),
- .lstio_bat_namep = batch,
- .lstio_bat_resultp = head,
- };
-
- return lst_ioctl(LSTIO_BATCH_QUERY, &args, sizeof(args));
-}
-
-void
-lst_print_tsb_verbose(struct list_head *head,
- int active, int idle, int error)
-{
- lstcon_rpc_ent_t *ent;
-
- list_for_each_entry(ent, head, rpe_link) {
- if (ent->rpe_priv[0] == 0 && active)
- continue;
-
- if (ent->rpe_priv[0] != 0 && idle)
- continue;
-
- if (ent->rpe_fwk_errno == 0 && error)
- continue;
-
- fprintf(stdout, "%s [%s]: %s\n",
- libcfs_id2str(ent->rpe_peer),
- lst_node_state2str(ent->rpe_state),
- ent->rpe_rpc_errno != 0 ?
- strerror(ent->rpe_rpc_errno) :
- (ent->rpe_priv[0] > 0 ? "Running" : "Idle"));
- }
-}
-
-int
-jt_lst_query_batch(int argc, char **argv)
-{
- lstcon_test_batch_ent_t ent;
- struct list_head head;
- char *batch = NULL;
- time_t last = 0;
- int optidx = 0;
- int verbose = 0;
- int server = 0;
- int timeout = 5; /* default 5 seconds */
- int delay = 5; /* default 5 seconds */
- int loop = 1; /* default 1 loop */
- int active = 0;
- int error = 0;
- int idle = 0;
- int count = 0;
- int test = 0;
- int rc = 0;
- int c = 0;
- int i;
-
- static struct option query_batch_opts[] =
- {
- {"timeout", required_argument, 0, 'o' },
- {"delay", required_argument, 0, 'd' },
- {"loop", required_argument, 0, 'c' },
- {"test", required_argument, 0, 't' },
- {"server", no_argument, 0, 's' },
- {"active", no_argument, 0, 'a' },
- {"idle", no_argument, 0, 'i' },
- {"error", no_argument, 0, 'e' },
- {"all", no_argument, 0, 'l' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
- c = getopt_long(argc, argv, "o:d:c:t:saiel",
- query_batch_opts, &optidx);
-
- /* Detect the end of the options. */
- if (c == -1)
- break;
-
- switch (c) {
- case 'o':
- timeout = atoi(optarg);
- break;
- case 'd':
- delay = atoi(optarg);
- break;
- case 'c':
- loop = atoi(optarg);
- break;
- case 't':
- test = atoi(optarg);
- break;
- case 's':
- server = 1;
- break;
- case 'a':
- active = verbose = 1;
- break;
- case 'i':
- idle = verbose = 1;
- break;
- case 'e':
- error = verbose = 1;
- break;
- case 'l':
- verbose = 1;
- break;
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- if (test < 0 || timeout <= 0 || delay <= 0 || loop <= 0) {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- if (optind == argc) {
- batch = LST_DEFAULT_BATCH;
-
- } else if (optind == argc - 1) {
- batch = argv[optind];
-
- } else {
- lst_print_usage(argv[0]);
- return -1;
- }
-
-
- CFS_INIT_LIST_HEAD(&head);
-
- if (verbose) {
- rc = lst_info_batch_ioctl(batch, test, server,
- &ent, NULL, NULL, NULL);
- if (rc != 0) {
- fprintf(stderr, "Failed to query %s [%d]: %s\n",
- batch, test, strerror(errno));
- return -1;
- }
-
- count = server ? ent.tbe_srv_nle.nle_nnode :
- ent.tbe_cli_nle.nle_nnode;
- if (count == 0) {
- fprintf(stdout, "Batch or test is empty\n");
- return 0;
- }
- }
-
- rc = lst_alloc_rpcent(&head, count, 0);
- if (rc != 0) {
- fprintf(stderr, "Out of memory\n");
- return rc;
- }
-
- for (i = 0; i < loop; i++) {
- time_t now = time(NULL);
-
- if (now - last < delay) {
- sleep(delay - now + last);
- time(&now);
- }
-
- last = now;
-
- rc = lst_query_batch_ioctl(batch, test,
- server, timeout, &head);
- if (rc == -1) {
- fprintf(stderr, "Failed to query batch: %s\n",
- strerror(errno));
- break;
- }
-
- if (verbose) {
- /* Verbose mode */
- lst_print_tsb_verbose(&head, active, idle, error);
- continue;
- }
-
- fprintf(stdout, "%s [%d] ", batch, test);
-
- if (lstcon_rpc_stat_failure(&trans_stat, 0) != 0) {
- fprintf(stdout, "%d of %d nodes are unknown, ",
- lstcon_rpc_stat_failure(&trans_stat, 0),
- lstcon_rpc_stat_total(&trans_stat, 0));
- }
-
- if (lstcon_rpc_stat_failure(&trans_stat, 0) == 0 &&
- lstcon_tsbqry_stat_run(&trans_stat, 0) == 0 &&
- lstcon_tsbqry_stat_failure(&trans_stat, 0) == 0) {
- fprintf(stdout, "is stopped\n");
- continue;
- }
-
- if (lstcon_rpc_stat_failure(&trans_stat, 0) == 0 &&
- lstcon_tsbqry_stat_idle(&trans_stat, 0) == 0 &&
- lstcon_tsbqry_stat_failure(&trans_stat, 0) == 0) {
- fprintf(stdout, "is running\n");
- continue;
- }
-
- fprintf(stdout, "stopped: %d , running: %d, failed: %d\n",
- lstcon_tsbqry_stat_idle(&trans_stat, 0),
- lstcon_tsbqry_stat_run(&trans_stat, 0),
- lstcon_tsbqry_stat_failure(&trans_stat, 0));
- }
-
- lst_free_rpcent(&head);
-
- return rc;
-}
-
-int
-lst_parse_distribute(char *dstr, int *dist, int *span)
-{
- *dist = atoi(dstr);
- if (*dist <= 0)
- return -1;
-
- dstr = strchr(dstr, ':');
- if (dstr == NULL)
- return -1;
-
- *span = atoi(dstr + 1);
- if (*span <= 0)
- return -1;
-
- return 0;
-}
-
-int
-lst_get_bulk_param(int argc, char **argv, lst_test_bulk_param_t *bulk)
-{
- char *tok = NULL;
- char *end = NULL;
- int rc = 0;
- int i = 0;
-
- bulk->blk_size = 4096;
- bulk->blk_opc = LST_BRW_READ;
- bulk->blk_flags = LST_BRW_CHECK_NONE;
-
- while (i < argc) {
- if (strcasestr(argv[i], "check=") == argv[i] ||
- strcasestr(argv[i], "c=") == argv[i]) {
- tok = strchr(argv[i], '=') + 1;
-
- if (strcasecmp(tok, "full") == 0) {
- bulk->blk_flags = LST_BRW_CHECK_FULL;
- } else if (strcasecmp(tok, "simple") == 0) {
- bulk->blk_flags = LST_BRW_CHECK_SIMPLE;
- } else {
- fprintf(stderr, "Unknow flag %s\n", tok);
- return -1;
- }
-
- } else if (strcasestr(argv[i], "size=") == argv[i] ||
- strcasestr(argv[i], "s=") == argv[i]) {
- tok = strchr(argv[i], '=') + 1;
-
- bulk->blk_size = strtol(tok, &end, 0);
- if (bulk->blk_size <= 0) {
- fprintf(stderr, "Invalid size %s\n", tok);
- return -1;
- }
-
- if (end == NULL)
- return 0;
-
- if (*end == 'k' || *end == 'K')
- bulk->blk_size *= 1024;
- else if (*end == 'm' || *end == 'M')
- bulk->blk_size *= 1024 * 1024;
-
- if (bulk->blk_size > CFS_PAGE_SIZE * LNET_MAX_IOV) {
- fprintf(stderr, "Size exceed limitation: %d bytes\n",
- bulk->blk_size);
- return -1;
- }
-
- } else if (strcasecmp(argv[i], "read") == 0 ||
- strcasecmp(argv[i], "r") == 0) {
- bulk->blk_opc = LST_BRW_READ;
-
- } else if (strcasecmp(argv[i], "write") == 0 ||
- strcasecmp(argv[i], "w") == 0) {
- bulk->blk_opc = LST_BRW_WRITE;
-
- } else {
- fprintf(stderr, "Unknow parameter: %s\n", argv[i]);
- return -1;
- }
-
- i++;
- }
-
- return rc;
-}
-
-int
-lst_get_test_param(char *test, int argc, char **argv, void **param, int *plen)
-{
- lst_test_bulk_param_t *bulk = NULL;
- int type;
-
- type = lst_test_name2type(test);
- if (type < 0) {
- fprintf(stderr, "Unknow test name %s\n", test);
- return -1;
- }
-
- switch (type) {
- case LST_TEST_PING:
- break;
-
- case LST_TEST_BULK:
- bulk = malloc(sizeof(*bulk));
- if (bulk == NULL) {
- fprintf(stderr, "Out of memory\n");
- return -1;
- }
-
- memset(bulk, 0, sizeof(*bulk));
-
- if (lst_get_bulk_param(argc, argv, bulk) != 0) {
- free(bulk);
- return -1;
- }
-
- *param = bulk;
- *plen = sizeof(*bulk);
-
- break;
-
- default:
- break;
- }
-
- /* TODO: parse more parameter */
- return type;
-}
-
-int
-lst_add_test_ioctl(char *batch, int type, int loop, int concur,
- int dist, int span, char *sgrp, char *dgrp,
- void *param, int plen, int *retp, struct list_head *resultp)
-{
- lstio_test_args_t args = {
- .lstio_tes_key = session_key,
- .lstio_tes_bat_nmlen = strlen(batch),
- .lstio_tes_bat_name = batch,
- .lstio_tes_type = type,
- .lstio_tes_loop = loop,
- .lstio_tes_concur = concur,
- .lstio_tes_dist = dist,
- .lstio_tes_span = span,
- .lstio_tes_sgrp_nmlen = strlen(sgrp),
- .lstio_tes_sgrp_name = sgrp,
- .lstio_tes_dgrp_nmlen = strlen(dgrp),
- .lstio_tes_dgrp_name = dgrp,
- .lstio_tes_param_len = plen,
- .lstio_tes_param = param,
- .lstio_tes_retp = retp,
- .lstio_tes_resultp = resultp,
- };
-
- return lst_ioctl(LSTIO_TEST_ADD, &args, sizeof(args));
-}
-
-int
-jt_lst_add_test(int argc, char **argv)
-{
- struct list_head head;
- char *batch = NULL;
- char *test = NULL;
- char *dstr = NULL;
- char *from = NULL;
- char *to = NULL;
- void *param = NULL;
- int optidx = 0;
- int concur = 1;
- int loop = -1;
- int dist = 1;
- int span = 1;
- int plen = 0;
- int fcount = 0;
- int tcount = 0;
- int ret = 0;
- int type;
- int rc;
- int c;
-
- static struct option add_test_opts[] =
- {
- {"batch", required_argument, 0, 'b' },
- {"concurrency", required_argument, 0, 'c' },
- {"distribute", required_argument, 0, 'd' },
- {"from", required_argument, 0, 'f' },
- {"to", required_argument, 0, 't' },
- {"loop", required_argument, 0, 'l' },
- {0, 0, 0, 0 }
- };
-
- if (session_key == 0) {
- fprintf(stderr,
- "Can't find env LST_SESSION or value is not valid\n");
- return -1;
- }
-
- while (1) {
- c = getopt_long(argc, argv, "b:c:d:f:l:t:",
- add_test_opts, &optidx);
-
- /* Detect the end of the options. */
- if (c == -1)
- break;
-
- switch (c) {
- case 'b':
- batch = optarg;
- break;
- case 'c':
- concur = atoi(optarg);
- break;
- case 'd':
- dstr = optarg;
- break;
- case 'f':
- from = optarg;
- break;
- case 'l':
- loop = atoi(optarg);
- break;
- case 't':
- to = optarg;
- break;
- default:
- lst_print_usage(argv[0]);
- return -1;
- }
- }
-
- if (optind == argc || from == NULL || to == NULL) {
- lst_print_usage(argv[0]);
- return -1;
- }
-
- if (concur <= 0 || concur > LST_MAX_CONCUR) {
- fprintf(stderr, "Invalid concurrency of test: %d\n", concur);
- return -1;
- }
-
- if (batch == NULL)
- batch = LST_DEFAULT_BATCH;
-
- if (dstr != NULL) {
- rc = lst_parse_distribute(dstr, &dist, &span);
- if (rc != 0) {
- fprintf(stderr, "Invalid distribution: %s\n", dstr);
- return -1;
- }
- }
-
- test = argv[optind++];
-
- argc -= optind;
- argv += optind;
-
- type = lst_get_test_param(test, argc, argv, ¶m, &plen);
- if (type < 0) {
- fprintf(stderr, "Failed to add test (%s)\n", test);
- return -1;
- }
-
- CFS_INIT_LIST_HEAD(&head);
-
- rc = lst_get_node_count(LST_OPC_GROUP, from, &fcount, NULL);
- if (rc != 0) {
- fprintf(stderr, "Can't get count of nodes from %s: %s\n",
- from, strerror(errno));
- goto out;
- }
-
- rc = lst_get_node_count(LST_OPC_GROUP, to, &tcount, NULL);
- if (rc != 0) {
- fprintf(stderr, "Can't get count of nodes from %s: %s\n",
- to, strerror(errno));
- goto out;
- }
-
- rc = lst_alloc_rpcent(&head, fcount > tcount ? fcount : tcount, 0);
- if (rc != 0) {
- fprintf(stderr, "Out of memory\n");
- goto out;
- }
-
- rc = lst_add_test_ioctl(batch, type, loop, concur,
- dist, span, from, to, param, plen, &ret, &head);
-
- if (rc == 0) {
- fprintf(stdout, "Test was added successfully\n");
- if (ret != 0) {
- fprintf(stdout, "Server group contains userland test "
- "nodes, old version of tcplnd can't accept "
- "connection request\n");
- }
-
- goto out;
- }
-
- if (rc == -1) {
- lst_print_error("test", "Failed to add test: %s\n",
- strerror(errno));
- goto out;
- }
-
- lst_print_transerr(&head, "add test");
-out:
- lst_free_rpcent(&head);
-
- if (param != NULL)
- free(param);
-
- return rc;
-}
-
-static command_t lst_cmdlist[] = {
- {"new_session", jt_lst_new_session, NULL,
- "Usage: lst new_session [--timeout TIME] [--force] [NAME]" },
- {"end_session", jt_lst_end_session, NULL,
- "Usage: lst end_session" },
- {"show_session", jt_lst_show_session, NULL,
- "Usage: lst show_session" },
- {"ping", jt_lst_ping , NULL,
- "Usage: lst ping [--group NAME] [--batch NAME] [--session] [--nodes IDS]" },
- {"add_group", jt_lst_add_group, NULL,
- "Usage: lst group NAME IDs [IDs]..." },
- {"del_group", jt_lst_del_group, NULL,
- "Usage: lst del_group NAME" },
- {"update_group", jt_lst_update_group, NULL,
- "Usage: lst update_group NAME [--clean] [--refresh] [--remove IDs]" },
- {"list_group", jt_lst_list_group, NULL,
- "Usage: lst list_group [--active] [--busy] [--down] [--unknown] GROUP ..." },
- {"stat", jt_lst_stat, NULL,
- "Usage: lst stat [--bw] [--rate] [--read] [--write] [--max] [--min] [--avg] "
- " [--timeout #] [--delay #] GROUP [GROUP]" },
- {"show_error", jt_lst_show_error, NULL,
- "Usage: lst show_error NAME | IDS ..." },
- {"add_batch", jt_lst_add_batch, NULL,
- "Usage: lst add_batch NAME" },
- {"run", jt_lst_start_batch, NULL,
- "Usage: lst run [--timeout TIME] [NAME]" },
- {"stop", jt_lst_stop_batch, NULL,
- "Usage: lst stop [--force] BATCH_NAME" },
- {"list_batch", jt_lst_list_batch, NULL,
- "Usage: lst list_batch NAME [--test ID] [--server]" },
- {"query", jt_lst_query_batch, NULL,
- "Usage: lst query [--test ID] [--server] [--timeout TIME] NAME" },
- {"add_test", jt_lst_add_test, NULL,
- "Usage: lst add_test [--batch BATCH] [--loop #] [--concurrency #] "
- " [--distribute #:#] [--from GROUP] [--to GROUP] TEST..." },
- {"help", Parser_help, 0, "help" },
- {0, 0, 0, NULL }
-};
-
-int
-lst_initialize(void)
-{
- char *key;
-
- key = getenv("LST_SESSION");
-
- if (key == NULL) {
- session_key = 0;
- return 0;
- }
-
- session_key = atoi(key);
-
- return 0;
-}
-
-int
-main(int argc, char **argv)
-{
- setlinebuf(stdout);
-
- if (lst_initialize() < 0)
- exit(0);
-
- if (ptl_initialize(argc, argv) < 0)
- exit(0);
-
- Parser_init("lst > ", lst_cmdlist);
-
- if (argc != 1)
- return Parser_execarg(argc - 1, argv + 1, lst_cmdlist);
-
- Parser_commands();
-
- return 0;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <errno.h>
-#include <pwd.h>
-#include <lnet/lnetctl.h>
-#include <lnet/lnetst.h>
-#include "../selftest/rpc.h"
-#include "../selftest/selftest.h"
-
-static int lstjn_stopping = 0;
-static int lstjn_intialized = 0;
-
-unsigned int libcfs_subsystem_debug = ~0 - (S_LNET | S_LND);
-unsigned int libcfs_debug = 0;
-
-static struct option lstjn_options[] =
-{
- {"sesid", required_argument, 0, 's' },
- {"group", required_argument, 0, 'g' },
- {"server_mode", no_argument, 0, 'm' },
- {0, 0, 0, 0 }
-};
-
-void
-lstjn_stop (int sig)
-{
- lstjn_stopping = 1;
-}
-
-void
-lstjn_rpc_done(srpc_client_rpc_t *rpc)
-{
- if (!lstjn_intialized)
- lstjn_intialized = 1;
-}
-
-int
-lstjn_join_session(char *ses, char *grp)
-{
- lnet_process_id_t sesid;
- srpc_client_rpc_t *rpc;
- srpc_join_reqst_t *req;
- srpc_join_reply_t *rep;
- srpc_mksn_reqst_t *sreq;
- srpc_mksn_reply_t *srep;
- int rc;
-
- sesid.pid = LUSTRE_LNET_PID;
- sesid.nid = libcfs_str2nid(ses);
- if (sesid.nid == LNET_NID_ANY) {
- fprintf(stderr, "Invalid session NID: %s\n", ses);
- return -1;
- }
-
- rpc = sfw_create_rpc(sesid, SRPC_SERVICE_JOIN, 0,
- 0, lstjn_rpc_done, NULL);
- if (rpc == NULL) {
- fprintf(stderr, "Out of memory\n");
- return -1;
- }
-
- req = &rpc->crpc_reqstmsg.msg_body.join_reqst;
-
- req->join_sid = LST_INVALID_SID;
- strncpy(req->join_group, grp, LST_NAME_SIZE);
-
- sfw_post_rpc(rpc);
-
- for (;;) {
- rc = selftest_wait_events();
-
- if (lstjn_intialized)
- break;
- }
-
- if (rpc->crpc_status != 0) {
- fprintf(stderr, "Failed to send RPC to console: %s\n",
- strerror(rpc->crpc_status));
- srpc_client_rpc_decref(rpc);
- return -1;
- }
-
- sfw_unpack_message(&rpc->crpc_replymsg);
-
- rep = &rpc->crpc_replymsg.msg_body.join_reply;
- if (rep->join_status != 0) {
- fprintf(stderr, "Can't join session %s group %s: %s\n",
- ses, grp, strerror(rep->join_status));
- srpc_client_rpc_decref(rpc);
- return -1;
- }
-
- sreq = &rpc->crpc_reqstmsg.msg_body.mksn_reqst;
- sreq->mksn_sid = rep->join_sid;
- sreq->mksn_force = 0;
- strcpy(sreq->mksn_name, rep->join_session);
-
- srep = &rpc->crpc_replymsg.msg_body.mksn_reply;
-
- rc = sfw_make_session(sreq, srep);
- if (rc != 0 || srep->mksn_status != 0) {
- fprintf(stderr, "Can't create session: %d, %s\n",
- rc, strerror(srep->mksn_status));
- srpc_client_rpc_decref(rpc);
- return -1;
- }
-
- fprintf(stdout, "Session %s, ID: %s, %Lu\n",
- ses, libcfs_nid2str(rep->join_sid.ses_nid),
- rep->join_sid.ses_stamp);
-
- srpc_client_rpc_decref(rpc);
-
- return 0;
-}
-
-int
-main(int argc, char **argv)
-{
- char *ses = NULL;
- char *grp = NULL;
- int server_mode_flag = 0;
- int optidx;
- int c;
- int rc;
-
- const char *usage_string =
- "Usage: lstclient --sesid ID --group GROUP [--server_mode]\n";
-
- while (1) {
- c = getopt_long(argc, argv, "s:g:m",
- lstjn_options, &optidx);
-
- if (c == -1)
- break;
-
- switch (c) {
- case 's':
- ses = optarg;
- break;
- case 'g':
- grp = optarg;
- break;
- case 'm':
- server_mode_flag = 1;
- break;
- default:
- fprintf(stderr, usage_string);
- return -1;
- }
- }
-
- if (optind != argc || grp == NULL || ses == NULL) {
- fprintf(stderr, usage_string);
- return -1;
- }
-
- rc = libcfs_debug_init(5 * 1024 * 1024);
- if (rc != 0) {
- CERROR("libcfs_debug_init() failed: %d\n", rc);
- return -1;
- }
-
- rc = LNetInit();
- if (rc != 0) {
- CERROR("LNetInit() failed: %d\n", rc);
- libcfs_debug_cleanup();
- return -1;
- }
-
- if (server_mode_flag)
- lnet_server_mode();
-
- rc = lnet_selftest_init();
- if (rc != 0) {
- fprintf(stderr, "Can't startup selftest\n");
- LNetFini();
- libcfs_debug_cleanup();
-
- return -1;
- }
-
- rc = lstjn_join_session(ses, grp);
- if (rc != 0)
- goto out;
-
- signal(SIGINT, lstjn_stop);
-
- fprintf(stdout, "Start handling selftest requests, Ctl-C to stop\n");
-
- while (!lstjn_stopping) {
- selftest_wait_events();
-
- if (!sfw_session_removed())
- continue;
-
- fprintf(stdout, "Session ended\n");
- break;
- }
-
-out:
- lnet_selftest_fini();
-
- LNetFini();
-
- libcfs_debug_cleanup();
-
- return rc;
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-#include <stddef.h>
-#include <unistd.h>
-#include <sys/param.h>
-#include <assert.h>
-#include <lnet/api-support.h>
-
-#include "parser.h"
-
-static command_t * top_level; /* Top level of commands, initialized by
- * InitParser */
-static char * parser_prompt = NULL;/* Parser prompt, set by InitParser */
-static int done; /* Set to 1 if user types exit or quit */
-
-
-/* static functions */
-static char *skipwhitespace(char *s);
-static char *skiptowhitespace(char *s);
-static command_t *find_cmd(char *name, command_t cmds[], char **next);
-static int process(char *s, char **next, command_t *lookup, command_t **result,
- char **prev);
-static void print_commands(char *str, command_t *table);
-
-static char * skipwhitespace(char * s)
-{
- char * t;
- int len;
-
- len = (int)strlen(s);
- for (t = s; t <= s + len && isspace(*t); t++);
- return(t);
-}
-
-
-static char * skiptowhitespace(char * s)
-{
- char * t;
-
- for (t = s; *t && !isspace(*t); t++);
- return(t);
-}
-
-static int line2args(char *line, char **argv, int maxargs)
-{
- char *arg;
- int i = 0;
-
- arg = strtok(line, " \t");
- if ( arg ) {
- argv[i] = arg;
- i++;
- } else
- return 0;
-
- while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) {
- argv[i] = arg;
- i++;
- }
- return i;
-}
-
-/* find a command -- return it if unique otherwise print alternatives */
-static command_t *Parser_findargcmd(char *name, command_t cmds[])
-{
- command_t *cmd;
-
- for (cmd = cmds; cmd->pc_name; cmd++) {
- if (strcmp(name, cmd->pc_name) == 0)
- return cmd;
- }
- return NULL;
-}
-
-int Parser_execarg(int argc, char **argv, command_t cmds[])
-{
- command_t *cmd;
-
- cmd = Parser_findargcmd(argv[0], cmds);
- if ( cmd ) {
- int rc = (cmd->pc_func)(argc, argv);
- if (rc == CMD_HELP)
- fprintf(stderr, "%s\n", cmd->pc_help);
- return rc;
- } else {
- printf("Try interactive use without arguments or use one of:\n");
- for (cmd = cmds; cmd->pc_name; cmd++)
- printf("\"%s\" ", cmd->pc_name);
- printf("\nas argument.\n");
- }
- return -1;
-}
-
-/* returns the command_t * (NULL if not found) corresponding to a
- _partial_ match with the first token in name. It sets *next to
- point to the following token. Does not modify *name. */
-static command_t * find_cmd(char * name, command_t cmds[], char ** next)
-{
- int i, len;
-
- if (!cmds || !name )
- return NULL;
-
- /* This sets name to point to the first non-white space character,
- and next to the first whitespace after name, len to the length: do
- this with strtok*/
- name = skipwhitespace(name);
- *next = skiptowhitespace(name);
- len = *next - name;
- if (len == 0)
- return NULL;
-
- for (i = 0; cmds[i].pc_name; i++) {
- if (strncasecmp(name, cmds[i].pc_name, len) == 0) {
- *next = skipwhitespace(*next);
- return(&cmds[i]);
- }
- }
- return NULL;
-}
-
-/* Recursively process a command line string s and find the command
- corresponding to it. This can be ambiguous, full, incomplete,
- non-existent. */
-static int process(char *s, char ** next, command_t *lookup,
- command_t **result, char **prev)
-{
- *result = find_cmd(s, lookup, next);
- *prev = s;
-
- /* non existent */
- if ( ! *result )
- return CMD_NONE;
-
- /* found entry: is it ambigous, i.e. not exact command name and
- more than one command in the list matches. Note that find_cmd
- points to the first ambiguous entry */
- if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) &&
- find_cmd(s, (*result) + 1, next))
- return CMD_AMBIG;
-
- /* found a unique command: component or full? */
- if ( (*result)->pc_func ) {
- return CMD_COMPLETE;
- } else {
- if ( *next == '\0' ) {
- return CMD_INCOMPLETE;
- } else {
- return process(*next, next, (*result)->pc_sub_cmd, result, prev);
- }
- }
-}
-
-#ifdef HAVE_LIBREADLINE
-static command_t * match_tbl; /* Command completion against this table */
-static char * command_generator(const char * text, int state)
-{
- static int index,
- len;
- char *name;
-
- /* Do we have a match table? */
- if (!match_tbl)
- return NULL;
-
- /* If this is the first time called on this word, state is 0 */
- if (!state) {
- index = 0;
- len = (int)strlen(text);
- }
-
- /* Return next name in the command list that paritally matches test */
- while ( (name = (match_tbl + index)->pc_name) ) {
- index++;
-
- if (strncasecmp(name, text, len) == 0) {
- return(strdup(name));
- }
- }
-
- /* No more matches */
- return NULL;
-}
-
-/* probably called by readline */
-static char **command_completion(char * text, int start, int end)
-{
- command_t * table;
- char * pos;
-
- match_tbl = top_level;
-
- for (table = find_cmd(rl_line_buffer, match_tbl, &pos);
- table; table = find_cmd(pos, match_tbl, &pos))
- {
-
- if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd;
- }
-
- return completion_matches(text, command_generator);
-}
-#endif
-
-/* take a string and execute the function or print help */
-int execute_line(char * line)
-{
- command_t *cmd, *ambig;
- char *prev;
- char *next, *tmp;
- char *argv[MAXARGS];
- int i;
- int rc = 0;
-
- switch( process(line, &next, top_level, &cmd, &prev) ) {
- case CMD_AMBIG:
- fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line);
- while( (ambig = find_cmd(prev, cmd, &tmp)) ) {
- fprintf(stderr, "%s ", ambig->pc_name);
- cmd = ambig + 1;
- }
- fprintf(stderr, "\n");
- break;
- case CMD_NONE:
- fprintf(stderr, "No such command, type help\n");
- break;
- case CMD_INCOMPLETE:
- fprintf(stderr,
- "'%s' incomplete command. Use '%s x' where x is one of:\n",
- line, line);
- fprintf(stderr, "\t");
- for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) {
- fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name);
- }
- fprintf(stderr, "\n");
- break;
- case CMD_COMPLETE:
- i = line2args(line, argv, MAXARGS);
- rc = (cmd->pc_func)(i, argv);
-
- if (rc == CMD_HELP)
- fprintf(stderr, "%s\n", cmd->pc_help);
-
- break;
- }
-
- return rc;
-}
-
-int
-noop_fn ()
-{
- return (0);
-}
-
-/* just in case you're ever in an airplane and discover you
- forgot to install readline-dev. :) */
-int init_input()
-{
- int interactive = isatty (fileno (stdin));
-
-#ifdef HAVE_LIBREADLINE
- using_history();
- stifle_history(HISTORY);
-
- if (!interactive)
- {
- rl_prep_term_function = (rl_vintfunc_t *)noop_fn;
- rl_deprep_term_function = (rl_voidfunc_t *)noop_fn;
- }
-
- rl_attempted_completion_function = (CPPFunction *)command_completion;
- rl_completion_entry_function = (void *)command_generator;
-#endif
- return interactive;
-}
-
-#ifndef HAVE_LIBREADLINE
-#define add_history(s)
-char * readline(char * prompt)
-{
- char line[2048];
- int n = 0;
- if (prompt)
- printf ("%s", prompt);
- if (fgets(line, sizeof(line), stdin) == NULL)
- return (NULL);
- n = strlen(line);
- if (n && line[n-1] == '\n')
- line[n-1] = '\0';
- return strdup(line);
-}
-#endif
-
-/* this is the command execution machine */
-int Parser_commands(void)
-{
- char *line, *s;
- int rc = 0;
- int interactive;
-
- interactive = init_input();
-
- while(!done) {
- line = readline(interactive ? parser_prompt : NULL);
-
- if (!line) break;
-
- s = skipwhitespace(line);
-
- if (*s) {
- add_history(s);
- rc = execute_line(s);
-
- /* reset optind to 0 to tell getopt
- * to reinitialize itself */
- optind = 0;
- }
-
- free(line);
- }
- return rc;
-}
-
-
-/* sets the parser prompt */
-void Parser_init(char * prompt, command_t * cmds)
-{
- done = 0;
- top_level = cmds;
- if (parser_prompt) free(parser_prompt);
- parser_prompt = strdup(prompt);
-}
-
-/* frees the parser prompt */
-void Parser_exit(int argc, char *argv[])
-{
- done = 1;
- free(parser_prompt);
- parser_prompt = NULL;
-}
-
-/* convert a string to an integer */
-int Parser_int(char *s, int *val)
-{
- int ret;
-
- if (*s != '0')
- ret = sscanf(s, "%d", val);
- else if (*(s+1) != 'x')
- ret = sscanf(s, "%o", val);
- else {
- s++;
- ret = sscanf(++s, "%x", val);
- }
-
- return(ret);
-}
-
-
-void Parser_qhelp(int argc, char *argv[]) {
-
- printf("Available commands are:\n");
-
- print_commands(NULL, top_level);
- printf("For more help type: help command-name\n");
-}
-
-int Parser_help(int argc, char **argv)
-{
- char line[1024];
- char *next, *prev, *tmp;
- command_t *result, *ambig;
- int i;
-
- if ( argc == 1 ) {
- Parser_qhelp(argc, argv);
- return 0;
- }
-
- line[0]='\0';
- for ( i = 1 ; i < argc ; i++ ) {
- strcat(line, argv[i]);
- }
-
- switch ( process(line, &next, top_level, &result, &prev) ) {
- case CMD_COMPLETE:
- fprintf(stderr, "%s: %s\n",line, result->pc_help);
- break;
- case CMD_NONE:
- fprintf(stderr, "%s: Unknown command.\n", line);
- break;
- case CMD_INCOMPLETE:
- fprintf(stderr,
- "'%s' incomplete command. Use '%s x' where x is one of:\n",
- line, line);
- fprintf(stderr, "\t");
- for (i = 0; result->pc_sub_cmd[i].pc_name; i++) {
- fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name);
- }
- fprintf(stderr, "\n");
- break;
- case CMD_AMBIG:
- fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line);
- while( (ambig = find_cmd(prev, result, &tmp)) ) {
- fprintf(stderr, "%s ", ambig->pc_name);
- result = ambig + 1;
- }
- fprintf(stderr, "\n");
- break;
- }
- return 0;
-}
-
-
-void Parser_printhelp(char *cmd)
-{
- char *argv[] = { "help", cmd };
- Parser_help(2, argv);
-}
-
-/*************************************************************************
- * COMMANDS *
- *************************************************************************/
-
-
-static void print_commands(char * str, command_t * table) {
- command_t * cmds;
- char buf[80];
-
- for (cmds = table; cmds->pc_name; cmds++) {
- if (cmds->pc_func) {
- if (str) printf("\t%s %s\n", str, cmds->pc_name);
- else printf("\t%s\n", cmds->pc_name);
- }
- if (cmds->pc_sub_cmd) {
- if (str) {
- sprintf(buf, "%s %s", str, cmds->pc_name);
- print_commands(buf, cmds->pc_sub_cmd);
- } else {
- print_commands(cmds->pc_name, cmds->pc_sub_cmd);
- }
- }
- }
-}
-
-char *Parser_getstr(const char *prompt, const char *deft, char *res,
- size_t len)
-{
- char *line = NULL;
- int size = strlen(prompt) + strlen(deft) + 8;
- char *theprompt;
- theprompt = malloc(size);
- assert(theprompt);
-
- sprintf(theprompt, "%s [%s]: ", prompt, deft);
-
- line = readline(theprompt);
- free(theprompt);
-
- if ( line == NULL || *line == '\0' ) {
- strncpy(res, deft, len);
- } else {
- strncpy(res, line, len);
- }
-
- if ( line ) {
- free(line);
- return res;
- } else {
- return NULL;
- }
-}
-
-/* get integer from prompt, loop forever to get it */
-int Parser_getint(const char *prompt, long min, long max, long deft, int base)
-{
- int rc;
- long result;
- char *line;
- int size = strlen(prompt) + 40;
- char *theprompt = malloc(size);
- assert(theprompt);
- sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft);
-
- fflush(stdout);
-
- do {
- line = NULL;
- line = readline(theprompt);
- if ( !line ) {
- fprintf(stdout, "Please enter an integer.\n");
- fflush(stdout);
- continue;
- }
- if ( *line == '\0' ) {
- free(line);
- result = deft;
- break;
- }
- rc = Parser_arg2int(line, &result, base);
- free(line);
- if ( rc != 0 ) {
- fprintf(stdout, "Invalid string.\n");
- fflush(stdout);
- } else if ( result > max || result < min ) {
- fprintf(stdout, "Error: response must lie between %ld and %ld.\n",
- min, max);
- fflush(stdout);
- } else {
- break;
- }
- } while ( 1 ) ;
-
- if (theprompt)
- free(theprompt);
- return result;
-
-}
-
-/* get boolean (starting with YyNn; loop forever */
-int Parser_getbool(const char *prompt, int deft)
-{
- int result = 0;
- char *line;
- int size = strlen(prompt) + 8;
- char *theprompt = malloc(size);
- assert(theprompt);
-
- fflush(stdout);
-
- if ( deft != 0 && deft != 1 ) {
- fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n",
- deft);
- assert ( 0 );
- }
- sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y");
-
- do {
- line = NULL;
- line = readline(theprompt);
- if ( line == NULL ) {
- result = deft;
- break;
- }
- if ( *line == '\0' ) {
- result = deft;
- break;
- }
- if ( *line == 'y' || *line == 'Y' ) {
- result = 1;
- break;
- }
- if ( *line == 'n' || *line == 'N' ) {
- result = 0;
- break;
- }
- if ( line )
- free(line);
- fprintf(stdout, "Invalid string. Must start with yY or nN\n");
- fflush(stdout);
- } while ( 1 );
-
- if ( line )
- free(line);
- if ( theprompt )
- free(theprompt);
- return result;
-}
-
-/* parse int out of a string or prompt for it */
-long Parser_intarg(const char *inp, const char *prompt, int deft,
- int min, int max, int base)
-{
- long result;
- int rc;
-
- rc = Parser_arg2int(inp, &result, base);
-
- if ( rc == 0 ) {
- return result;
- } else {
- return Parser_getint(prompt, deft, min, max, base);
- }
-}
-
-/* parse int out of a string or prompt for it */
-char *Parser_strarg(char *inp, const char *prompt, const char *deft,
- char *answer, int len)
-{
- if ( inp == NULL || *inp == '\0' ) {
- return Parser_getstr(prompt, deft, answer, len);
- } else
- return inp;
-}
-
-/* change a string into a number: return 0 on success. No invalid characters
- allowed. The processing of base and validity follows strtol(3)*/
-int Parser_arg2int(const char *inp, long *result, int base)
-{
- char *endptr;
-
- if ( (base !=0) && (base < 2 || base > 36) )
- return 1;
-
- *result = strtol(inp, &endptr, base);
-
- if ( *inp != '\0' && *endptr == '\0' )
- return 0;
- else
- return 1;
-}
-
-int Parser_quit(int argc, char **argv)
-{
- argc = argc;
- argv = argv;
- done = 1;
- return 0;
-}
+++ /dev/null
-#ifndef _PARSER_H_
-#define _PARSER_H_
-
-#define HISTORY 100 /* Don't let history grow unbounded */
-#define MAXARGS 512
-
-#define CMD_COMPLETE 0
-#define CMD_INCOMPLETE 1
-#define CMD_NONE 2
-#define CMD_AMBIG 3
-#define CMD_HELP 4
-
-typedef struct parser_cmd {
- char *pc_name;
- int (* pc_func)(int, char **);
- struct parser_cmd * pc_sub_cmd;
- char *pc_help;
-} command_t;
-
-typedef struct argcmd {
- char *ac_name;
- int (*ac_func)(int, char **);
- char *ac_help;
-} argcmd_t;
-
-typedef struct network {
- char *type;
- char *server;
- int port;
-} network_t;
-
-int Parser_quit(int argc, char **argv);
-void Parser_init(char *, command_t *); /* Set prompt and load command list */
-int Parser_commands(void); /* Start the command parser */
-void Parser_qhelp(int, char **); /* Quick help routine */
-int Parser_help(int, char **); /* Detailed help routine */
-void Parser_printhelp(char *); /* Detailed help routine */
-void Parser_exit(int, char **); /* Shuts down command parser */
-int Parser_execarg(int argc, char **argv, command_t cmds[]);
-int execute_line(char * line);
-
-/* Converts a string to an integer */
-int Parser_int(char *, int *);
-
-/* Prompts for a string, with default values and a maximum length */
-char *Parser_getstr(const char *prompt, const char *deft, char *res,
- size_t len);
-
-/* Prompts for an integer, with minimum, maximum and default values and base */
-int Parser_getint(const char *prompt, long min, long max, long deft,
- int base);
-
-/* Prompts for a yes/no, with default */
-int Parser_getbool(const char *prompt, int deft);
-
-/* Extracts an integer from a string, or prompts if it cannot get one */
-long Parser_intarg(const char *inp, const char *prompt, int deft,
- int min, int max, int base);
-
-/* Extracts a word from the input, or propmts if it cannot get one */
-char *Parser_strarg(char *inp, const char *prompt, const char *deft,
- char *answer, int len);
-
-/* Extracts an integer from a string with a base */
-int Parser_arg2int(const char *inp, long *result, int base);
-
-#endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <stdio.h>
-#include <sys/types.h>
-#ifdef HAVE_NETDB_H
-#include <netdb.h>
-#endif
-#include <sys/socket.h>
-#ifdef HAVE_NETINET_TCP_H
-#include <netinet/tcp.h>
-#endif
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#ifdef HAVE_SYS_IOCTL_H
-#include <sys/ioctl.h>
-#endif
-#ifndef _IOWR
-#include "ioctl.h"
-#endif
-#include <errno.h>
-#include <unistd.h>
-#include <time.h>
-#include <stdarg.h>
-#ifdef HAVE_ENDIAN_H
-#include <endian.h>
-#endif
-
-#include <libcfs/portals_utils.h>
-#include <lnet/api-support.h>
-#include <lnet/lnetctl.h>
-#include <lnet/socklnd.h>
-#include "parser.h"
-
-unsigned int libcfs_debug;
-unsigned int libcfs_printk = D_CANTMASK;
-
-static int g_net_set;
-static __u32 g_net;
-
-/* Convert a string boolean to an int; "enable" -> 1 */
-int
-lnet_parse_bool (int *b, char *str)
-{
- if (!strcasecmp (str, "no") ||
- !strcasecmp (str, "n") ||
- !strcasecmp (str, "off") ||
- !strcasecmp (str, "down") ||
- !strcasecmp (str, "disable"))
- {
- *b = 0;
- return (0);
- }
-
- if (!strcasecmp (str, "yes") ||
- !strcasecmp (str, "y") ||
- !strcasecmp (str, "on") ||
- !strcasecmp (str, "up") ||
- !strcasecmp (str, "enable"))
- {
- *b = 1;
- return (0);
- }
-
- return (-1);
-}
-
-int
-lnet_parse_port (int *port, char *str)
-{
- char *end;
-
- *port = strtol (str, &end, 0);
-
- if (*end == 0 && /* parsed whole string */
- *port > 0 && *port < 65536) /* minimal sanity check */
- return (0);
-
- return (-1);
-}
-
-#ifdef HAVE_GETHOSTBYNAME
-static struct hostent *
-ptl_gethostbyname(char * hname) {
- struct hostent *he;
- he = gethostbyname(hname);
- if (!he) {
- switch(h_errno) {
- case HOST_NOT_FOUND:
- case NO_ADDRESS:
- fprintf(stderr, "Unable to resolve hostname: %s\n",
- hname);
- break;
- default:
- fprintf(stderr, "gethostbyname error for %s: %s\n",
- hname, strerror(h_errno));
- break;
- }
- return NULL;
- }
- return he;
-}
-#endif
-
-int
-lnet_parse_ipquad (__u32 *ipaddrp, char *str)
-{
- int a;
- int b;
- int c;
- int d;
-
- if (sscanf (str, "%d.%d.%d.%d", &a, &b, &c, &d) == 4 &&
- (a & ~0xff) == 0 && (b & ~0xff) == 0 &&
- (c & ~0xff) == 0 && (d & ~0xff) == 0)
- {
- *ipaddrp = (a<<24)|(b<<16)|(c<<8)|d;
- return (0);
- }
-
- return (-1);
-}
-
-int
-lnet_parse_ipaddr (__u32 *ipaddrp, char *str)
-{
-#ifdef HAVE_GETHOSTBYNAME
- struct hostent *he;
-#endif
-
- if (!strcmp (str, "_all_")) {
- *ipaddrp = 0;
- return (0);
- }
-
- if (lnet_parse_ipquad(ipaddrp, str) == 0)
- return (0);
-
-#ifdef HAVE_GETHOSTBYNAME
- if ((('a' <= str[0] && str[0] <= 'z') ||
- ('A' <= str[0] && str[0] <= 'Z')) &&
- (he = ptl_gethostbyname (str)) != NULL) {
- __u32 addr = *(__u32 *)he->h_addr;
-
- *ipaddrp = ntohl(addr); /* HOST byte order */
- return (0);
- }
-#endif
-
- return (-1);
-}
-
-char *
-ptl_ipaddr_2_str (__u32 ipaddr, char *str, int lookup)
-{
-#ifdef HAVE_GETHOSTBYNAME
- __u32 net_ip;
- struct hostent *he;
-
- if (lookup) {
- net_ip = htonl (ipaddr);
- he = gethostbyaddr (&net_ip, sizeof (net_ip), AF_INET);
- if (he != NULL) {
- strcpy(str, he->h_name);
- return (str);
- }
- }
-#endif
-
- sprintf (str, "%d.%d.%d.%d",
- (ipaddr >> 24) & 0xff, (ipaddr >> 16) & 0xff,
- (ipaddr >> 8) & 0xff, ipaddr & 0xff);
- return (str);
-}
-
-int
-lnet_parse_time (time_t *t, char *str)
-{
- char *end;
- int n;
- struct tm tm;
-
- *t = strtol (str, &end, 0);
- if (*end == 0) /* parsed whole string */
- return (0);
-
- memset (&tm, 0, sizeof (tm));
- n = sscanf (str, "%d-%d-%d-%d:%d:%d",
- &tm.tm_year, &tm.tm_mon, &tm.tm_mday,
- &tm.tm_hour, &tm.tm_min, &tm.tm_sec);
- if (n != 6)
- return (-1);
-
- tm.tm_mon--; /* convert to 0 == Jan */
- tm.tm_year -= 1900; /* y2k quirk */
- tm.tm_isdst = -1; /* dunno if it's daylight savings... */
-
- *t = mktime (&tm);
- if (*t == (time_t)-1)
- return (-1);
-
- return (0);
-}
-
-int g_net_is_set (char *cmd)
-{
- if (g_net_set)
- return 1;
-
- if (cmd != NULL)
- fprintf(stderr,
- "You must run the 'network' command before '%s'.\n",
- cmd);
- return 0;
-}
-
-int g_net_is_compatible (char *cmd, ...)
-{
- va_list ap;
- int nal;
-
- if (!g_net_is_set(cmd))
- return 0;
-
- va_start(ap, cmd);
-
- do {
- nal = va_arg (ap, int);
- if (nal == LNET_NETTYP(g_net)) {
- va_end (ap);
- return 1;
- }
- } while (nal != 0);
-
- va_end (ap);
-
- if (cmd != NULL)
- fprintf (stderr,
- "Command %s not compatible with %s NAL\n",
- cmd,
- libcfs_lnd2str(LNET_NETTYP(g_net)));
- return 0;
-}
-
-int ptl_initialize(int argc, char **argv)
-{
- register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH,
- LNET_DEV_MAJOR, LNET_DEV_MINOR);
- return 0;
-}
-
-
-int jt_ptl_network(int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- __u32 net = LNET_NIDNET(LNET_NID_ANY);
- int rc;
-
- if (argc < 2) {
- fprintf(stderr, "usage: %s <net>|up|down\n", argv[0]);
- return 0;
- }
-
- if (!strcmp(argv[1], "unconfigure") ||
- !strcmp(argv[1], "down")) {
- LIBCFS_IOC_INIT(data);
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_UNCONFIGURE, &data);
-
- if (rc == 0) {
- printf ("LNET ready to unload\n");
- return 0;
- }
-
- if (errno == EBUSY)
- fprintf(stderr, "LNET busy\n");
- else
- fprintf(stderr, "LNET unconfigure error %d: %s\n",
- errno, strerror(errno));
- return -1;
- }
-
- if (!strcmp(argv[1], "configure") ||
- !strcmp(argv[1], "up")) {
- LIBCFS_IOC_INIT(data);
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_CONFIGURE, &data);
-
- if (rc == 0) {
- printf ("LNET configured\n");
- return 0;
- }
-
- fprintf(stderr, "LNET configure error %d: %s\n",
- errno, strerror(errno));
- return -1;
- }
-
- net = libcfs_str2net(argv[1]);
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- fprintf(stderr, "Can't parse net %s\n", argv[1]);
- return -1;
- }
-
- g_net_set = 1;
- g_net = net;
- return 0;
-}
-
-int
-jt_ptl_list_nids(int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- int all = 0, return_nid = 0;
- int count;
- int rc;
-
- all = (argc == 2) && (strcmp(argv[1], "all") == 0);
- /* Hack to pass back value */
- return_nid = (argc == 2) && (argv[1][0] == 1);
-
- if ((argc > 2) && !(all || return_nid)) {
- fprintf(stderr, "usage: %s [all]\n", argv[0]);
- return 0;
- }
-
- for (count = 0;; count++) {
- LIBCFS_IOC_INIT (data);
- data.ioc_count = count;
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_NI, &data);
-
- if (rc < 0) {
- if ((count > 0) && (errno == ENOENT))
- /* We found them all */
- break;
- fprintf(stderr,"IOC_LIBCFS_GET_NI error %d: %s\n",
- errno, strerror(errno));
- return -1;
- }
-
- if (all || (LNET_NETTYP(LNET_NIDNET(data.ioc_nid)) != LOLND)) {
- printf("%s\n", libcfs_nid2str(data.ioc_nid));
- if (return_nid) {
- *(__u64 *)(argv[1]) = data.ioc_nid;
- return_nid--;
- }
- }
- }
-
- return 0;
-}
-
-int
-jt_ptl_which_nid (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- int best_dist = 0;
- int best_order = 0;
- lnet_nid_t best_nid = LNET_NID_ANY;
- int dist;
- int order;
- lnet_nid_t nid;
- char *nidstr;
- int rc;
- int i;
-
- if (argc < 2) {
- fprintf(stderr, "usage: %s NID [NID...]\n", argv[0]);
- return 0;
- }
-
- for (i = 1; i < argc; i++) {
- nidstr = argv[i];
- nid = libcfs_str2nid(nidstr);
- if (nid == LNET_NID_ANY) {
- fprintf(stderr, "Can't parse NID %s\n", nidstr);
- return -1;
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_nid = nid;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNET_DIST, &data);
- if (rc != 0) {
- fprintf(stderr, "Can't get distance to %s: %s\n",
- nidstr, strerror(errno));
- return -1;
- }
-
- dist = data.ioc_u32[0];
- order = data.ioc_u32[1];
-
- if (dist < 0) {
- if (dist == -EHOSTUNREACH)
- continue;
-
- fprintf(stderr, "Unexpected distance to %s: %d\n",
- nidstr, dist);
- return -1;
- }
-
- if (best_nid == LNET_NID_ANY ||
- dist < best_dist ||
- (dist == best_dist && order < best_order)) {
- best_dist = dist;
- best_order = order;
- best_nid = nid;
- }
- }
-
- if (best_nid == LNET_NID_ANY) {
- fprintf(stderr, "No reachable NID\n");
- return -1;
- }
-
- printf("%s\n", libcfs_nid2str(best_nid));
- return 0;
-}
-
-int
-jt_ptl_print_interfaces (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- char buffer[3][64];
- int index;
- int rc;
-
- if (!g_net_is_compatible (argv[0], SOCKLND, 0))
- return -1;
-
- for (index = 0;;index++) {
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_count = index;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_INTERFACE, &data);
- if (rc != 0)
- break;
-
- printf ("%s: (%s/%s) npeer %d nroute %d\n",
- ptl_ipaddr_2_str(data.ioc_u32[0], buffer[2], 1),
- ptl_ipaddr_2_str(data.ioc_u32[0], buffer[0], 0),
- ptl_ipaddr_2_str(data.ioc_u32[1], buffer[1], 0),
- data.ioc_u32[2], data.ioc_u32[3]);
- }
-
- if (index == 0) {
- if (errno == ENOENT) {
- printf ("<no interfaces>\n");
- } else {
- fprintf(stderr, "Error getting interfaces: %s: "
- "check dmesg.\n",
- strerror(errno));
- }
- }
-
- return 0;
-}
-
-int
-jt_ptl_add_interface (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- __u32 ipaddr;
- int rc;
- __u32 netmask = 0xffffff00;
- int i;
- int count;
- char *end;
-
- if (argc < 2 || argc > 3) {
- fprintf (stderr, "usage: %s ipaddr [netmask]\n", argv[0]);
- return 0;
- }
-
- if (!g_net_is_compatible(argv[0], SOCKLND, 0))
- return -1;
-
- if (lnet_parse_ipaddr(&ipaddr, argv[1]) != 0) {
- fprintf (stderr, "Can't parse ip: %s\n", argv[1]);
- return -1;
- }
-
- if (argc > 2 ) {
- count = strtol(argv[2], &end, 0);
- if (count > 0 && count < 32 && *end == 0) {
- netmask = 0;
- for (i = count; i > 0; i--)
- netmask = netmask|(1<<(32-i));
- } else if (lnet_parse_ipquad(&netmask, argv[2]) != 0) {
- fprintf (stderr, "Can't parse netmask: %s\n", argv[2]);
- return -1;
- }
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_u32[0] = ipaddr;
- data.ioc_u32[1] = netmask;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_INTERFACE, &data);
- if (rc != 0) {
- fprintf (stderr, "failed to add interface: %s\n",
- strerror (errno));
- return -1;
- }
-
- return 0;
-}
-
-int
-jt_ptl_del_interface (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- int rc;
- __u32 ipaddr = 0;
-
- if (argc > 2) {
- fprintf (stderr, "usage: %s [ipaddr]\n", argv[0]);
- return 0;
- }
-
- if (!g_net_is_compatible(argv[0], SOCKLND, 0))
- return -1;
-
- if (argc == 2 &&
- lnet_parse_ipaddr(&ipaddr, argv[1]) != 0) {
- fprintf (stderr, "Can't parse ip: %s\n", argv[1]);
- return -1;
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_u32[0] = ipaddr;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_INTERFACE, &data);
- if (rc != 0) {
- fprintf (stderr, "failed to delete interface: %s\n",
- strerror (errno));
- return -1;
- }
-
- return 0;
-}
-
-int
-jt_ptl_print_peers (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- lnet_process_id_t id;
- char buffer[2][64];
- int index;
- int rc;
-
- if (!g_net_is_compatible (argv[0], SOCKLND, RALND, PTLLND, MXLND,
- OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0))
- return -1;
-
- for (index = 0;;index++) {
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_count = index;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_PEER, &data);
- if (rc != 0)
- break;
-
- if (g_net_is_compatible(NULL, SOCKLND, 0)) {
- id.nid = data.ioc_nid;
- id.pid = data.ioc_u32[4];
- printf ("%-20s [%d]%s->%s:%d #%d\n",
- libcfs_id2str(id),
- data.ioc_count, /* persistence */
- ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* my ip */
- ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */
- data.ioc_u32[1], /* peer port */
- data.ioc_u32[3]); /* conn_count */
- } else if (g_net_is_compatible(NULL, PTLLND, 0)) {
- id.nid = data.ioc_nid;
- id.pid = data.ioc_u32[4];
- printf ("%-20s s %d%s [%d] "LPD64".%06d"
- " m "LPD64"/"LPD64" q %d/%d c %d/%d\n",
- libcfs_id2str(id),
- data.ioc_net, /* state */
- data.ioc_flags ? "" : " ~!h", /* sent_hello */
- data.ioc_count, /* refcount */
- data.ioc_u64[0]/1000000, /* incarnation secs */
- (int)(data.ioc_u64[0]%1000000), /* incarnation usecs */
- (((__u64)data.ioc_u32[1])<<32) |
- ((__u64)data.ioc_u32[0]), /* next_matchbits */
- (((__u64)data.ioc_u32[3])<<32) |
- ((__u64)data.ioc_u32[2]), /* last_matchbits_seen */
- data.ioc_u32[5] >> 16, /* nsendq */
- data.ioc_u32[5] & 0xffff, /* nactiveq */
- data.ioc_u32[6] >> 16, /* credits */
- data.ioc_u32[6] & 0xffff); /* outstanding_credits */
- } else if (g_net_is_compatible(NULL, RALND, OPENIBLND, CIBLND, VIBLND, 0)) {
- printf ("%-20s [%d]@%s:%d\n",
- libcfs_nid2str(data.ioc_nid), /* peer nid */
- data.ioc_count, /* peer persistence */
- ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */
- data.ioc_u32[1]); /* peer port */
- } else {
- printf ("%-20s [%d]\n",
- libcfs_nid2str(data.ioc_nid), data.ioc_count);
- }
- }
-
- if (index == 0) {
- if (errno == ENOENT) {
- printf ("<no peers>\n");
- } else {
- fprintf(stderr, "Error getting peer list: %s: "
- "check dmesg.\n",
- strerror(errno));
- }
- }
- return 0;
-}
-
-int
-jt_ptl_add_peer (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- lnet_nid_t nid;
- __u32 ip = 0;
- int port = 0;
- int rc;
-
- if (!g_net_is_compatible (argv[0], SOCKLND, RALND,
- OPENIBLND, CIBLND, IIBLND, VIBLND, 0))
- return -1;
-
- if (g_net_is_compatible(NULL, SOCKLND, OPENIBLND, CIBLND, RALND, 0)) {
- if (argc != 4) {
- fprintf (stderr, "usage(tcp,openib,cib,ra): %s nid ipaddr port\n",
- argv[0]);
- return 0;
- }
- } else if (g_net_is_compatible(NULL, VIBLND, 0)) {
- if (argc != 3) {
- fprintf (stderr, "usage(vib): %s nid ipaddr\n",
- argv[0]);
- return 0;
- }
- } else if (argc != 2) {
- fprintf (stderr, "usage(iib): %s nid\n", argv[0]);
- return 0;
- }
-
- nid = libcfs_str2nid(argv[1]);
- if (nid == LNET_NID_ANY) {
- fprintf (stderr, "Can't parse NID: %s\n", argv[1]);
- return -1;
- }
-
- if (g_net_is_compatible (NULL, SOCKLND, OPENIBLND, CIBLND, VIBLND, RALND, 0) &&
- lnet_parse_ipaddr (&ip, argv[2]) != 0) {
- fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]);
- return -1;
- }
-
- if (g_net_is_compatible (NULL, SOCKLND, OPENIBLND, CIBLND, RALND, 0) &&
- lnet_parse_port (&port, argv[3]) != 0) {
- fprintf (stderr, "Can't parse port: %s\n", argv[3]);
- return -1;
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_nid = nid;
- data.ioc_u32[0] = ip;
- data.ioc_u32[1] = port;
-
- rc = l_ioctl (LNET_DEV_ID, IOC_LIBCFS_ADD_PEER, &data);
- if (rc != 0) {
- fprintf (stderr, "failed to add peer: %s\n",
- strerror (errno));
- return -1;
- }
-
- return 0;
-}
-
-int
-jt_ptl_del_peer (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- lnet_nid_t nid = LNET_NID_ANY;
- lnet_pid_t pid = LNET_PID_ANY;
- __u32 ip = 0;
- char *end;
- int rc;
-
- if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, PTLLND,
- OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0))
- return -1;
-
- if (g_net_is_compatible(NULL, SOCKLND, 0)) {
- if (argc > 3) {
- fprintf (stderr, "usage: %s [nid] [ipaddr]\n",
- argv[0]);
- return 0;
- }
- } else if (g_net_is_compatible(NULL, PTLLND, 0)) {
- if (argc > 3) {
- fprintf (stderr, "usage: %s [nid] [pid]\n",
- argv[0]);
- return 0;
- }
- } else if (argc > 2) {
- fprintf (stderr, "usage: %s [nid]\n", argv[0]);
- return 0;
- }
-
- if (argc > 1 &&
- !libcfs_str2anynid(&nid, argv[1])) {
- fprintf (stderr, "Can't parse nid: %s\n", argv[1]);
- return -1;
- }
-
- if (g_net_is_compatible(NULL, SOCKLND, 0)) {
- if (argc > 2 &&
- lnet_parse_ipaddr (&ip, argv[2]) != 0) {
- fprintf (stderr, "Can't parse ip addr: %s\n",
- argv[2]);
- return -1;
- }
- } else if (g_net_is_compatible(NULL, PTLLND, 0)) {
- if (argc > 2) {
- pid = strtol(argv[2], &end, 0);
- if (end == argv[2] || *end == 0) {
- fprintf(stderr, "Can't parse pid %s\n",
- argv[2]);
- return -1;
- }
- }
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_nid = nid;
- data.ioc_u32[0] = ip;
- data.ioc_u32[1] = pid;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_PEER, &data);
- if (rc != 0) {
- fprintf (stderr, "failed to remove peer: %s\n",
- strerror (errno));
- return -1;
- }
-
- return 0;
-}
-
-int
-jt_ptl_print_connections (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- lnet_process_id_t id;
- char buffer[2][64];
- int index;
- int rc;
-
- if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND,
- OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0))
- return -1;
-
- for (index = 0; ; index++) {
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_count = index;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_CONN, &data);
- if (rc != 0)
- break;
-
- if (g_net_is_compatible (NULL, SOCKLND, 0)) {
- id.nid = data.ioc_nid;
- id.pid = data.ioc_u32[6];
- printf ("%-20s %s[%d]%s->%s:%d %d/%d %s\n",
- libcfs_id2str(id),
- (data.ioc_u32[3] == SOCKLND_CONN_ANY) ? "A" :
- (data.ioc_u32[3] == SOCKLND_CONN_CONTROL) ? "C" :
- (data.ioc_u32[3] == SOCKLND_CONN_BULK_IN) ? "I" :
- (data.ioc_u32[3] == SOCKLND_CONN_BULK_OUT) ? "O" : "?",
- data.ioc_u32[4], /* scheduler */
- ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* local IP addr */
- ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* remote IP addr */
- data.ioc_u32[1], /* remote port */
- data.ioc_count, /* tx buffer size */
- data.ioc_u32[5], /* rx buffer size */
- data.ioc_flags ? "nagle" : "nonagle");
- } else if (g_net_is_compatible (NULL, RALND, 0)) {
- printf ("%-20s [%d]\n",
- libcfs_nid2str(data.ioc_nid),
- data.ioc_u32[0] /* device id */);
- } else {
- printf ("%s\n", libcfs_nid2str(data.ioc_nid));
- }
- }
-
- if (index == 0) {
- if (errno == ENOENT) {
- printf ("<no connections>\n");
- } else {
- fprintf(stderr, "Error getting connection list: %s: "
- "check dmesg.\n",
- strerror(errno));
- }
- }
- return 0;
-}
-
-int jt_ptl_disconnect(int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- lnet_nid_t nid = LNET_NID_ANY;
- __u32 ipaddr = 0;
- int rc;
-
- if (argc > 3) {
- fprintf(stderr, "usage: %s [nid] [ipaddr]\n", argv[0]);
- return 0;
- }
-
- if (!g_net_is_compatible (NULL, SOCKLND, RALND, MXLND,
- OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0))
- return 0;
-
- if (argc >= 2 &&
- !libcfs_str2anynid(&nid, argv[1])) {
- fprintf (stderr, "Can't parse nid %s\n", argv[1]);
- return -1;
- }
-
- if (g_net_is_compatible (NULL, SOCKLND, 0) &&
- argc >= 3 &&
- lnet_parse_ipaddr (&ipaddr, argv[2]) != 0) {
- fprintf (stderr, "Can't parse ip addr %s\n", argv[2]);
- return -1;
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_nid = nid;
- data.ioc_u32[0] = ipaddr;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_CLOSE_CONNECTION, &data);
- if (rc != 0) {
- fprintf(stderr, "failed to remove connection: %s\n",
- strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
-int jt_ptl_push_connection (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- int rc;
- lnet_nid_t nid = LNET_NID_ANY;
-
- if (argc > 2) {
- fprintf(stderr, "usage: %s [nid]\n", argv[0]);
- return 0;
- }
-
- if (!g_net_is_compatible (argv[0], SOCKLND, 0))
- return -1;
-
- if (argc > 1 &&
- !libcfs_str2anynid(&nid, argv[1])) {
- fprintf(stderr, "Can't parse nid: %s\n", argv[1]);
- return -1;
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_nid = nid;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_PUSH_CONNECTION, &data);
- if (rc != 0) {
- fprintf(stderr, "failed to push connection: %s\n",
- strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
-int
-jt_ptl_print_active_txs (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- int index;
- int rc;
-
- if (!g_net_is_compatible (argv[0], QSWLND, 0))
- return -1;
-
- for (index = 0;;index++) {
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_count = index;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_TXDESC, &data);
- if (rc != 0)
- break;
-
- printf ("type %u payload %6d to %s via %s by pid %6d: "
- "%s, %s, state %d\n",
- data.ioc_u32[0],
- data.ioc_count,
- libcfs_nid2str(data.ioc_nid),
- libcfs_nid2str(data.ioc_u64[0]),
- data.ioc_u32[1],
- (data.ioc_flags & 1) ? "delayed" : "immediate",
- (data.ioc_flags & 2) ? "nblk" : "normal",
- data.ioc_flags >> 2);
- }
-
- if (index == 0) {
- if (errno == ENOENT) {
- printf ("<no active descs>\n");
- } else {
- fprintf(stderr, "Error getting active transmits list: "
- "%s: check dmesg.\n",
- strerror(errno));
- }
- }
- return 0;
-}
-
-int jt_ptl_ping(int argc, char **argv)
-{
- int rc;
- int timeout;
- lnet_process_id_t id;
- lnet_process_id_t ids[16];
- int maxids = sizeof(ids)/sizeof(ids[0]);
- struct libcfs_ioctl_data data;
- char *sep;
- int i;
-
- if (argc < 2) {
- fprintf(stderr, "usage: %s id [timeout (secs)]\n", argv[0]);
- return 0;
- }
-
- sep = strchr(argv[1], '-');
- if (sep == NULL) {
- id.pid = LNET_PID_ANY;
- id.nid = libcfs_str2nid(argv[1]);
- if (id.nid == LNET_NID_ANY) {
- fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]);
- return -1;
- }
- } else {
- char *end;
-
- if (argv[1][0] == 'u' ||
- argv[1][0] == 'U')
- id.pid = strtoul(&argv[1][1], &end, 0) | LNET_PID_USERFLAG;
- else
- id.pid = strtoul(argv[1], &end, 0);
-
- id.nid = libcfs_str2nid(sep + 1);
-
- if (end != sep ||
- id.nid == LNET_NID_ANY) {
- fprintf(stderr, "Can't parse process id \"%s\"\n", argv[1]);
- return -1;
- }
- }
-
- if (argc > 2)
- timeout = 1000 * atol(argv[2]);
- else
- timeout = 1000; /* default 1 second timeout */
-
- LIBCFS_IOC_INIT (data);
- data.ioc_nid = id.nid;
- data.ioc_u32[0] = id.pid;
- data.ioc_u32[1] = timeout;
- data.ioc_plen1 = sizeof(ids);
- data.ioc_pbuf1 = (char *)ids;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_PING, &data);
- if (rc != 0) {
- fprintf(stderr, "failed to ping %s: %s\n",
- id.pid == LNET_PID_ANY ?
- libcfs_nid2str(id.nid) : libcfs_id2str(id),
- strerror(errno));
- return -1;
- }
-
- for (i = 0; i < data.ioc_count && i < maxids; i++)
- printf("%s\n", libcfs_id2str(ids[i]));
-
- if (data.ioc_count > maxids)
- printf("%d out of %d ids listed\n", maxids, data.ioc_count);
-
- return 0;
-}
-
-int jt_ptl_mynid(int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- lnet_nid_t nid;
- int rc;
-
- if (argc != 2) {
- fprintf(stderr, "usage: %s NID\n", argv[0]);
- return 0;
- }
-
- nid = libcfs_str2nid(argv[1]);
- if (nid == LNET_NID_ANY) {
- fprintf(stderr, "Can't parse NID '%s'\n", argv[1]);
- return -1;
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_net = LNET_NIDNET(nid);
- data.ioc_nid = nid;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_REGISTER_MYNID, &data);
- if (rc < 0)
- fprintf(stderr, "setting my NID failed: %s\n",
- strerror(errno));
- else
- printf("registered my nid %s\n", libcfs_nid2str(nid));
-
- return 0;
-}
-
-int
-jt_ptl_fail_nid (int argc, char **argv)
-{
- int rc;
- lnet_nid_t nid;
- unsigned int threshold;
- struct libcfs_ioctl_data data;
-
- if (argc < 2 || argc > 3)
- {
- fprintf (stderr, "usage: %s nid|\"*\" [count (0 == mend)]\n", argv[0]);
- return (0);
- }
-
- if (!libcfs_str2anynid(&nid, argv[1]))
- {
- fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]);
- return (-1);
- }
-
- if (argc < 3) {
- threshold = LNET_MD_THRESH_INF;
- } else if (sscanf (argv[2], "%i", &threshold) != 1) {
- fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]);
- return (-1);
- }
-
- LIBCFS_IOC_INIT (data);
- data.ioc_nid = nid;
- data.ioc_count = threshold;
-
- rc = l_ioctl (LNET_DEV_ID, IOC_LIBCFS_FAIL_NID, &data);
- if (rc < 0)
- fprintf (stderr, "IOC_LIBCFS_FAIL_NID failed: %s\n",
- strerror (errno));
- else
- printf ("%s %s\n", threshold == 0 ? "Unfailing" : "Failing", argv[1]);
-
- return (0);
-}
-
-int
-jt_ptl_add_route (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- lnet_nid_t gateway_nid;
- unsigned int hops = 1;
- char *end;
- int rc;
-
- if (argc < 2 || argc > 3)
- {
- fprintf (stderr, "usage: %s gateway [hopcount]\n", argv[0]);
- return (0);
- }
-
- if (!g_net_is_set(argv[0]))
- return (-1);
-
- gateway_nid = libcfs_str2nid(argv[1]);
- if (gateway_nid == LNET_NID_ANY) {
- fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]);
- return (-1);
- }
-
- if (argc == 3) {
- hops = strtoul(argv[2], &end, 0);
- if (hops >= 256 || *end != 0) {
- fprintf (stderr, "Can't parse hopcount \"%s\"\n", argv[2]);
- return -1;
- }
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net;
- data.ioc_count = hops;
- data.ioc_nid = gateway_nid;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_ROUTE, &data);
- if (rc != 0) {
- fprintf (stderr, "IOC_LIBCFS_ADD_ROUTE failed: %s\n", strerror (errno));
- return (-1);
- }
-
- return (0);
-}
-
-int
-jt_ptl_del_route (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- lnet_nid_t nid;
- int rc;
-
- if (argc != 2) {
- fprintf (stderr, "usage: %s gatewayNID\n", argv[0]);
- return (0);
- }
-
- if (!libcfs_str2anynid(&nid, argv[1])) {
- fprintf (stderr, "Can't parse gateway NID "
- "\"%s\"\n", argv[1]);
- return -1;
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_net = g_net_set ? g_net : LNET_NIDNET(LNET_NID_ANY);
- data.ioc_nid = nid;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_ROUTE, &data);
- if (rc != 0) {
- fprintf (stderr, "IOC_LIBCFS_DEL_ROUTE (%s) failed: %s\n",
- libcfs_nid2str(nid), strerror (errno));
- return (-1);
- }
-
- return (0);
-}
-
-int
-jt_ptl_notify_router (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- int enable;
- lnet_nid_t nid;
- int rc;
- struct timeval now;
- time_t when;
-
- if (argc < 3)
- {
- fprintf (stderr, "usage: %s targetNID <up/down> [<time>]\n",
- argv[0]);
- return (0);
- }
-
- nid = libcfs_str2nid(argv[1]);
- if (nid == LNET_NID_ANY) {
- fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[1]);
- return (-1);
- }
-
- if (lnet_parse_bool (&enable, argv[2]) != 0) {
- fprintf (stderr, "Can't parse boolean %s\n", argv[2]);
- return (-1);
- }
-
- gettimeofday(&now, NULL);
-
- if (argc < 4) {
- when = now.tv_sec;
- } else if (lnet_parse_time (&when, argv[3]) != 0) {
- fprintf(stderr, "Can't parse time %s\n"
- "Please specify either 'YYYY-MM-DD-HH:MM:SS'\n"
- "or an absolute unix time in seconds\n", argv[3]);
- return (-1);
- } else if (when > now.tv_sec) {
- fprintf (stderr, "%s specifies a time in the future\n",
- argv[3]);
- return (-1);
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_nid = nid;
- data.ioc_flags = enable;
- /* Yeuch; 'cept I need a __u64 on 64 bit machines... */
- data.ioc_u64[0] = (__u64)when;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_NOTIFY_ROUTER, &data);
- if (rc != 0) {
- fprintf (stderr, "IOC_LIBCFS_NOTIFY_ROUTER (%s) failed: %s\n",
- libcfs_nid2str(nid), strerror (errno));
- return (-1);
- }
-
- return (0);
-}
-
-int
-jt_ptl_print_routes (int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- int rc;
- int index;
- __u32 net;
- lnet_nid_t nid;
- unsigned int hops;
- int alive;
-
- for (index = 0;;index++)
- {
- LIBCFS_IOC_INIT(data);
- data.ioc_count = index;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_ROUTE, &data);
- if (rc != 0)
- break;
-
- net = data.ioc_net;
- hops = data.ioc_count;
- nid = data.ioc_nid;
- alive = data.ioc_flags;
-
- printf ("net %18s hops %u gw %32s %s\n",
- libcfs_net2str(net), hops,
- libcfs_nid2str(nid), alive ? "up" : "down");
- }
-
- if (errno != ENOENT)
- fprintf(stderr, "Error getting routes: %s: check dmesg.\n",
- strerror(errno));
-
- return (0);
-}
-
-static int
-lwt_control(int enable, int clear)
-{
- struct libcfs_ioctl_data data;
- int rc;
-
- LIBCFS_IOC_INIT(data);
- data.ioc_flags = (enable ? 1 : 0) | (clear ? 2 : 0);
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_CONTROL, &data);
- if (rc == 0)
- return (0);
-
- fprintf(stderr, "IOC_LIBCFS_LWT_CONTROL failed: %s\n",
- strerror(errno));
- return (-1);
-}
-
-static int
-lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize,
- lwt_event_t *events, int size)
-{
- struct libcfs_ioctl_data data;
- int rc;
-
- LIBCFS_IOC_INIT(data);
- data.ioc_pbuf1 = (char *)events;
- data.ioc_plen1 = size;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_SNAPSHOT, &data);
- if (rc != 0) {
- fprintf(stderr, "IOC_LIBCFS_LWT_SNAPSHOT failed: %s\n",
- strerror(errno));
- return (-1);
- }
-
- /* crappy overloads */
- if (data.ioc_u32[2] != sizeof(lwt_event_t) ||
- data.ioc_u32[3] != offsetof(lwt_event_t, lwte_where)) {
- fprintf(stderr,"kernel/user LWT event mismatch %d(%d),%d(%d)\n",
- (int)data.ioc_u32[2], (int)sizeof(lwt_event_t),
- (int)data.ioc_u32[3],
- (int)offsetof(lwt_event_t, lwte_where));
- return (-1);
- }
-
- if (now != NULL)
- *now = data.ioc_u64[0];
-
- LASSERT (data.ioc_u32[0] != 0);
- if (ncpu != NULL)
- *ncpu = data.ioc_u32[0];
-
- LASSERT (data.ioc_u32[1] != 0);
- if (totalsize != NULL)
- *totalsize = data.ioc_u32[1];
-
- return (0);
-}
-
-static char *
-lwt_get_string(char *kstr)
-{
- char *ustr;
- struct libcfs_ioctl_data data;
- int size;
- int rc;
-
- /* FIXME: this could maintain a symbol table since we expect to be
- * looking up the same strings all the time... */
-
- LIBCFS_IOC_INIT(data);
- data.ioc_pbuf1 = kstr;
- data.ioc_plen1 = 1; /* non-zero just to fool portal_ioctl_is_invalid() */
- data.ioc_pbuf2 = NULL;
- data.ioc_plen2 = 0;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_LOOKUP_STRING, &data);
- if (rc != 0) {
- fprintf(stderr, "IOC_LIBCFS_LWT_LOOKUP_STRING failed: %s\n",
- strerror(errno));
- return (NULL);
- }
-
- size = data.ioc_count;
- ustr = (char *)malloc(size);
- if (ustr == NULL) {
- fprintf(stderr, "Can't allocate string storage of size %d\n",
- size);
- return (NULL);
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_pbuf1 = kstr;
- data.ioc_plen1 = 1; /* non-zero just to fool portal_ioctl_is_invalid() */
- data.ioc_pbuf2 = ustr;
- data.ioc_plen2 = size;
-
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_LOOKUP_STRING, &data);
- if (rc != 0) {
- fprintf(stderr, "IOC_LIBCFS_LWT_LOOKUP_STRING failed: %s\n",
- strerror(errno));
- return (NULL);
- }
-
- LASSERT(strlen(ustr) == size - 1);
- return (ustr);
-}
-
-static void
-lwt_put_string(char *ustr)
-{
- free(ustr);
-}
-
-static int
-lwt_print(FILE *f, cycles_t t0, cycles_t tlast, double mhz, int cpu, lwt_event_t *e)
-{
-#ifndef __WORDSIZE
-# error "__WORDSIZE not defined"
-#elif __WORDSIZE == 32
-# define XFMT "%#010lx"
-#elif __WORDSIZE== 64
-# define XFMT "%#018lx"
-#else
-# error "Unexpected __WORDSIZE"
-#endif
- char *where = lwt_get_string(e->lwte_where);
-
- if (where == NULL)
- return (-1);
-
- fprintf(f, XFMT" "XFMT" "XFMT" "XFMT": "XFMT" %2d %10.6f %10.2f %s\n",
- e->lwte_p1, e->lwte_p2, e->lwte_p3, e->lwte_p4,
- (long)e->lwte_task, cpu, (e->lwte_when - t0) / (mhz * 1000000.0),
- (t0 == e->lwte_when) ? 0.0 : (e->lwte_when - tlast) / mhz,
- where);
-
- lwt_put_string(where);
-
- return (0);
-#undef XFMT
-}
-
-double
-get_cycles_per_usec ()
-{
- FILE *f = fopen ("/proc/cpuinfo", "r");
- double mhz;
- char line[64];
-
- if (f != NULL) {
- while (fgets (line, sizeof (line), f) != NULL)
- if (sscanf (line, "cpu MHz : %lf", &mhz) == 1) {
- fclose (f);
- return (mhz);
- }
- fclose (f);
- }
-
- fprintf (stderr, "Can't read/parse /proc/cpuinfo\n");
- return (1000.0);
-}
-
-int
-jt_ptl_lwt(int argc, char **argv)
-{
- const int lwt_max_cpus = 32;
- int ncpus;
- int totalspace;
- int nevents_per_cpu;
- lwt_event_t *events;
- lwt_event_t *cpu_event[lwt_max_cpus + 1];
- lwt_event_t *next_event[lwt_max_cpus];
- lwt_event_t *first_event[lwt_max_cpus];
- int cpu;
- lwt_event_t *e;
- int rc;
- int i;
- double mhz;
- cycles_t t0;
- cycles_t tlast;
- cycles_t tnow;
- struct timeval tvnow;
- int printed_date = 0;
- int nlines = 0;
- FILE *f = stdout;
-
- if (argc < 2 ||
- (strcmp(argv[1], "start") &&
- strcmp(argv[1], "stop"))) {
- fprintf(stderr,
- "usage: %s start\n"
- " %s stop [fname]\n", argv[0], argv[0]);
- return (-1);
- }
-
- if (!strcmp(argv[1], "start")) {
- /* disable */
- if (lwt_control(0, 0) != 0)
- return (-1);
-
- /* clear */
- if (lwt_control(0, 1) != 0)
- return (-1);
-
- /* enable */
- if (lwt_control(1, 0) != 0)
- return (-1);
-
- return (0);
- }
-
- if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0)
- return (-1);
-
- if (ncpus > lwt_max_cpus) {
- fprintf(stderr, "Too many cpus: %d (%d)\n",
- ncpus, lwt_max_cpus);
- return (-1);
- }
-
- events = (lwt_event_t *)malloc(totalspace);
- if (events == NULL) {
- fprintf(stderr, "Can't allocate %d\n", totalspace);
- return (-1);
- }
-
- if (lwt_control(0, 0) != 0) { /* disable */
- free(events);
- return (-1);
- }
-
- if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) {
- free(events);
- return (-1);
- }
-
- /* we want this time to be sampled at snapshot time */
- gettimeofday(&tvnow, NULL);
-
- if (argc > 2) {
- f = fopen (argv[2], "w");
- if (f == NULL) {
- fprintf(stderr, "Can't open %s for writing: %s\n", argv[2], strerror (errno));
- free(events);
- return (-1);
- }
- }
-
- mhz = get_cycles_per_usec();
-
- /* carve events into per-cpu slices */
- nevents_per_cpu = totalspace / (ncpus * sizeof(lwt_event_t));
- for (cpu = 0; cpu <= ncpus; cpu++)
- cpu_event[cpu] = &events[cpu * nevents_per_cpu];
-
- /* find the earliest event on each cpu */
- for (cpu = 0; cpu < ncpus; cpu++) {
- first_event[cpu] = NULL;
-
- for (e = cpu_event[cpu]; e < cpu_event[cpu + 1]; e++) {
-
- if (e->lwte_where == NULL) /* not an event */
- continue;
-
- if (first_event[cpu] == NULL ||
- first_event[cpu]->lwte_when > e->lwte_when)
- first_event[cpu] = e;
- }
-
- next_event[cpu] = first_event[cpu];
- }
-
- t0 = tlast = 0;
- for (cpu = 0; cpu < ncpus; cpu++) {
- e = first_event[cpu];
- if (e == NULL) /* no events this cpu */
- continue;
-
- if (e == cpu_event[cpu])
- e = cpu_event[cpu + 1] - 1;
- else
- e = e - 1;
-
- /* If there's an event immediately before the first one, this
- * cpu wrapped its event buffer */
- if (e->lwte_where == NULL)
- continue;
-
- /* We should only start outputting events from the most recent
- * first event in any wrapped cpu. Events before this time on
- * other cpus won't have any events from this CPU to interleave
- * with. */
- if (t0 < first_event[cpu]->lwte_when)
- t0 = first_event[cpu]->lwte_when;
- }
-
- for (;;) {
- /* find which cpu has the next event */
- cpu = -1;
- for (i = 0; i < ncpus; i++) {
-
- if (next_event[i] == NULL) /* this cpu exhausted */
- continue;
-
- if (cpu < 0 ||
- next_event[i]->lwte_when < next_event[cpu]->lwte_when)
- cpu = i;
- }
-
- if (cpu < 0) /* all cpus exhausted */
- break;
-
- if (t0 == 0) {
- /* no wrapped cpus and this is he first ever event */
- t0 = next_event[cpu]->lwte_when;
- }
-
- if (t0 <= next_event[cpu]->lwte_when) {
- /* on or after the first event */
- if (!printed_date) {
- cycles_t du = (tnow - t0) / mhz;
- time_t then = tvnow.tv_sec - du/1000000;
-
- if (du % 1000000 > tvnow.tv_usec)
- then--;
-
- fprintf(f, "%s", ctime(&then));
- printed_date = 1;
- }
-
- rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]);
- if (rc != 0)
- break;
-
- if (++nlines % 10000 == 0 && f != stdout) {
- /* show some activity... */
- printf(".");
- fflush (stdout);
- }
- }
-
- tlast = next_event[cpu]->lwte_when;
-
- next_event[cpu]++;
- if (next_event[cpu] == cpu_event[cpu + 1])
- next_event[cpu] = cpu_event[cpu];
-
- if (next_event[cpu]->lwte_where == NULL ||
- next_event[cpu] == first_event[cpu])
- next_event[cpu] = NULL;
- }
-
- if (f != stdout) {
- printf("\n");
- fclose(f);
- }
-
- free(events);
- return (0);
-}
-
-int jt_ptl_memhog(int argc, char **argv)
-{
- static int gfp = 0; /* sticky! */
-
- struct libcfs_ioctl_data data;
- int rc;
- int count;
- char *end;
-
- if (argc < 2) {
- fprintf(stderr, "usage: %s <npages> [<GFP flags>]\n", argv[0]);
- return 0;
- }
-
- count = strtol(argv[1], &end, 0);
- if (count < 0 || *end != 0) {
- fprintf(stderr, "Can't parse page count '%s'\n", argv[1]);
- return -1;
- }
-
- if (argc >= 3) {
- rc = strtol(argv[2], &end, 0);
- if (*end != 0) {
- fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]);
- return -1;
- }
- gfp = rc;
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_count = count;
- data.ioc_flags = gfp;
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_MEMHOG, &data);
-
- if (rc != 0) {
- fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno));
- return -1;
- }
-
- printf("memhog %d OK\n", count);
- return 0;
-}
-
-int jt_ptl_testprotocompat(int argc, char **argv)
-{
- struct libcfs_ioctl_data data;
- int rc;
- int flags;
- char *end;
-
- if (argc < 2) {
- fprintf(stderr, "usage: %s <number>\n", argv[0]);
- return 0;
- }
-
- flags = strtol(argv[1], &end, 0);
- if (flags < 0 || *end != 0) {
- fprintf(stderr, "Can't parse flags '%s'\n", argv[1]);
- return -1;
- }
-
- LIBCFS_IOC_INIT(data);
- data.ioc_flags = flags;
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_TESTPROTOCOMPAT, &data);
-
- if (rc != 0) {
- fprintf(stderr, "test proto compat %x failed: %s\n",
- flags, strerror(errno));
- return -1;
- }
-
- printf("test proto compat %x OK\n", flags);
- return 0;
-}
-
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <lnet/api-support.h>
-#include <lnet/lnetctl.h>
-
-#include "parser.h"
-
-
-command_t list[] = {
- {"network", jt_ptl_network, 0,"select/configure network (args: up|down|LND name)"},
- {"net", jt_ptl_network, 0,"select/configure network (args: up|down|LND name)"},
- {"list_nids", jt_ptl_list_nids, 0,"list local NIDs"},
- {"which_nid", jt_ptl_which_nid, 0,"select the closest NID"},
- {"print_interfaces", jt_ptl_print_interfaces, 0, "print interface entries (no args)"},
- {"add_interface", jt_ptl_add_interface, 0, "add interface entry (args: ip [netmask])"},
- {"del_interface", jt_ptl_del_interface, 0, "delete interface entries (args: [ip])"},
- {"print_peers", jt_ptl_print_peers, 0, "print peer entries (no args)"},
- {"add_peer", jt_ptl_add_peer, 0, "add peer entry (args: nid host port)"},
- {"del_peer", jt_ptl_del_peer, 0, "delete peer entry (args: [nid] [host])"},
- {"print_conns", jt_ptl_print_connections, 0, "print connections (no args)"},
- {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [nid] [host]"},
- {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [nid]"},
- {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)"},
- {"ping", jt_ptl_ping, 0, "ping (args: nid [timeout] [pid])"},
- {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"},
- {"add_route", jt_ptl_add_route, 0,
- "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"},
- {"del_route", jt_ptl_del_route, 0,
- "delete all routes via a gateway from the routing table (args: gatewayNID"},
- {"set_route", jt_ptl_notify_router, 0,
- "enable/disable a route in the routing table (args: gatewayNID up/down [time]"},
- {"print_routes", jt_ptl_print_routes, 0, "print the routing table (args: none)"},
- {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
- {"fail", jt_ptl_fail_nid, 0, "usage: fail nid|_all_ [count]"},
- {"testprotocompat", jt_ptl_testprotocompat, 0, "usage: testprotocompat count"},
- {"help", Parser_help, 0, "help"},
- {"exit", Parser_quit, 0, "quit"},
- {"quit", Parser_quit, 0, "quit"},
- { 0, 0, 0, NULL }
-};
-
-int main(int argc, char **argv)
-{
- if (ptl_initialize(argc, argv) < 0)
- exit(1);
-
- Parser_init("ptlctl > ", list);
- if (argc > 1)
- return Parser_execarg(argc - 1, &argv[1], list);
-
- Parser_commands();
-
- return 0;
-}
+++ /dev/null
-#include <stdio.h>
-#include <errno.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/time.h>
-
-double
-timenow ()
-{
- struct timeval tv;
-
- gettimeofday (&tv, NULL);
- return (tv.tv_sec + tv.tv_usec / 1000000.0);
-}
-
-typedef struct {
- unsigned long msgs_alloc;
- unsigned long msgs_max;
- unsigned long errors;
- unsigned long send_count;
- unsigned long recv_count;
- unsigned long route_count;
- unsigned long drop_count;
- unsigned long long send_length;
- unsigned long long recv_length;
- unsigned long long route_length;
- unsigned long long drop_length;
-} counters_t;
-
-unsigned long long subull(unsigned long long a, unsigned long long b)
-{
- if (a < b)
- return -1ULL - b + a + 1;
-
- return a - b;
-}
-
-unsigned long long subul(unsigned long a, unsigned long b)
-{
- if (a < b)
- return -1UL - b + a + 1;
-
- return a - b;
-}
-
-double rul(unsigned long a, double secs)
-{
- return (double)a/secs;
-}
-
-double rull(unsigned long long a, double secs)
-{
- return (double)a/secs;
-}
-
-void
-do_stat (int fd)
-{
- static char buffer[1024];
- static double last = 0.0;
- static counters_t old_counter;
- double now;
- double t;
- counters_t new_counter;
- counters_t counter;
- int n;
-
- lseek (fd, 0, SEEK_SET);
- now = timenow();
- n = read (fd, buffer, sizeof (buffer));
- if (n < 0)
- {
- fprintf (stderr, "Can't read statfile\n");
- exit (1);
- }
- buffer[n] = 0;
-
- n = sscanf (buffer, "%lu %lu %lu %lu %lu %lu %lu %Lu %Lu %Lu %Lu",
- &new_counter.msgs_alloc, &new_counter.msgs_max,
- &new_counter.errors,
- &new_counter.send_count, &new_counter.recv_count,
- &new_counter.route_count, &new_counter.drop_count,
- &new_counter.send_length, &new_counter.recv_length,
- &new_counter.route_length, &new_counter.drop_length);
- if (n < 11)
- {
- fprintf (stderr, "Can't parse statfile\n");
- exit (1);
- }
-
- if (last == 0.0) {
- printf ("M %lu(%lu) E %lu S %lu/%llu R %lu/%llu F %lu/%llu D %lu/%llu\n",
- new_counter.msgs_alloc, new_counter.msgs_max,
- new_counter.errors,
- new_counter.send_count, new_counter.send_length,
- new_counter.recv_count, new_counter.recv_length,
- new_counter.route_count, new_counter.route_length,
- new_counter.drop_count, new_counter.drop_length);
- } else {
- t = now - last;
-
- counter.msgs_alloc = new_counter.msgs_alloc;
- counter.msgs_max = new_counter.msgs_max;
-
- counter.errors = subul(new_counter.errors, old_counter.errors);
- counter.send_count = subul(new_counter.send_count, old_counter.send_count);
- counter.recv_count = subul(new_counter.recv_count, old_counter.recv_count);
- counter.route_count = subul(new_counter.route_count, old_counter.route_count);
- counter.drop_count = subul(new_counter.drop_count, old_counter.drop_count);
- counter.send_length = subull(new_counter.send_length, old_counter.send_length);
- counter.recv_length = subull(new_counter.recv_length, old_counter.recv_length);
- counter.route_length = subull(new_counter.route_length, old_counter.route_length);
- counter.drop_length = subull(new_counter.drop_length, old_counter.drop_length);
-
- printf ("M %3lu(%3lu) E %0.0f S %7.2f/%6.0f R %7.2f/%6.0f F %7.2f/%6.0f D %4.2f/%0.0f\n",
- counter.msgs_alloc, counter.msgs_max,
- rul(counter.errors,t),
- rull(counter.send_length,t*1024.0*1024.0), rul(counter.send_count, t),
- rull(counter.recv_length,t*1024.0*1024.0), rul(counter.recv_count, t),
- rull(counter.route_length,t*1024.0*1024.0), rul(counter.route_count, t),
- rull(counter.drop_length,t*1024.0*1024.0), rul(counter.drop_count, t));
- }
-
- old_counter = new_counter;
- fflush (stdout);
-
- lseek (fd, 0, SEEK_SET);
- last = timenow();
-}
-
-int main (int argc, char **argv)
-{
- int interval = 0;
- int fd;
-
- if (argc > 1)
- interval = atoi (argv[1]);
-
- fd = open ("/proc/sys/lnet/stats", O_RDONLY);
- if (fd < 0)
- {
- fprintf (stderr, "Can't open stat: %s\n", strerror (errno));
- return (1);
- }
-
- do_stat (fd);
- if (interval == 0)
- return (0);
-
- for (;;)
- {
- sleep (interval);
- do_stat (fd);
- }
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <lnet/lib-lnet.h>
-
-#include <string.h>
-
-#ifndef HAVE_STRNLEN
-#define strnlen(s, i) strlen(s)
-#endif
-
-#define BLANK_LINE() \
-do { \
- printf ("\n"); \
-} while (0)
-
-#define COMMENT(c) \
-do { \
- printf (" /* "c" */\n"); \
-} while (0)
-
-#define STRINGIFY(a) #a
-
-#define CHECK_DEFINE(a) \
-do { \
- printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \
-} while (0)
-
-#define CHECK_VALUE(a) \
-do { \
- printf (" CLASSERT ("#a" == %d);\n", a); \
-} while (0)
-
-#define CHECK_MEMBER_OFFSET(s,m) \
-do { \
- CHECK_VALUE((int)offsetof(s, m)); \
-} while (0)
-
-#define CHECK_MEMBER_SIZEOF(s,m) \
-do { \
- CHECK_VALUE((int)sizeof(((s *)0)->m)); \
-} while (0)
-
-#define CHECK_MEMBER(s,m) \
-do { \
- CHECK_MEMBER_OFFSET(s, m); \
- CHECK_MEMBER_SIZEOF(s, m); \
-} while (0)
-
-#define CHECK_STRUCT(s) \
-do { \
- BLANK_LINE (); \
- COMMENT ("Checks for struct "#s); \
- CHECK_VALUE((int)sizeof(s)); \
-} while (0)
-
-void
-check_lnet_handle_wire (void)
-{
- CHECK_STRUCT (lnet_handle_wire_t);
- CHECK_MEMBER (lnet_handle_wire_t, wh_interface_cookie);
- CHECK_MEMBER (lnet_handle_wire_t, wh_object_cookie);
-}
-
-void
-check_lnet_magicversion (void)
-{
- CHECK_STRUCT (lnet_magicversion_t);
- CHECK_MEMBER (lnet_magicversion_t, magic);
- CHECK_MEMBER (lnet_magicversion_t, version_major);
- CHECK_MEMBER (lnet_magicversion_t, version_minor);
-}
-
-void
-check_lnet_hdr (void)
-{
- CHECK_STRUCT (lnet_hdr_t);
- CHECK_MEMBER (lnet_hdr_t, dest_nid);
- CHECK_MEMBER (lnet_hdr_t, src_nid);
- CHECK_MEMBER (lnet_hdr_t, dest_pid);
- CHECK_MEMBER (lnet_hdr_t, src_pid);
- CHECK_MEMBER (lnet_hdr_t, type);
- CHECK_MEMBER (lnet_hdr_t, payload_length);
- CHECK_MEMBER (lnet_hdr_t, msg);
-
- BLANK_LINE ();
- COMMENT ("Ack");
- CHECK_MEMBER (lnet_hdr_t, msg.ack.dst_wmd);
- CHECK_MEMBER (lnet_hdr_t, msg.ack.match_bits);
- CHECK_MEMBER (lnet_hdr_t, msg.ack.mlength);
-
- BLANK_LINE ();
- COMMENT ("Put");
- CHECK_MEMBER (lnet_hdr_t, msg.put.ack_wmd);
- CHECK_MEMBER (lnet_hdr_t, msg.put.match_bits);
- CHECK_MEMBER (lnet_hdr_t, msg.put.hdr_data);
- CHECK_MEMBER (lnet_hdr_t, msg.put.ptl_index);
- CHECK_MEMBER (lnet_hdr_t, msg.put.offset);
-
- BLANK_LINE ();
- COMMENT ("Get");
- CHECK_MEMBER (lnet_hdr_t, msg.get.return_wmd);
- CHECK_MEMBER (lnet_hdr_t, msg.get.match_bits);
- CHECK_MEMBER (lnet_hdr_t, msg.get.ptl_index);
- CHECK_MEMBER (lnet_hdr_t, msg.get.src_offset);
- CHECK_MEMBER (lnet_hdr_t, msg.get.sink_length);
-
- BLANK_LINE ();
- COMMENT ("Reply");
- CHECK_MEMBER (lnet_hdr_t, msg.reply.dst_wmd);
-
- BLANK_LINE ();
- COMMENT ("Hello");
- CHECK_MEMBER (lnet_hdr_t, msg.hello.incarnation);
- CHECK_MEMBER (lnet_hdr_t, msg.hello.type);
-}
-
-void
-system_string (char *cmdline, char *str, int len)
-{
- int fds[2];
- int rc;
- pid_t pid;
-
- rc = pipe (fds);
- if (rc != 0)
- abort ();
-
- pid = fork ();
- if (pid == 0) {
- /* child */
- int fd = fileno(stdout);
-
- rc = dup2(fds[1], fd);
- if (rc != fd)
- abort();
-
- exit(system(cmdline));
- /* notreached */
- } else if ((int)pid < 0) {
- abort();
- } else {
- FILE *f = fdopen (fds[0], "r");
-
- if (f == NULL)
- abort();
-
- close(fds[1]);
-
- if (fgets(str, len, f) == NULL)
- abort();
-
- if (waitpid(pid, &rc, 0) != pid)
- abort();
-
- if (!WIFEXITED(rc) ||
- WEXITSTATUS(rc) != 0)
- abort();
-
- if (strnlen(str, len) == len)
- str[len - 1] = 0;
-
- if (str[strlen(str) - 1] == '\n')
- str[strlen(str) - 1] = 0;
-
- fclose(f);
- }
-}
-
-int
-main (int argc, char **argv)
-{
- char unameinfo[256];
- char gccinfo[256];
-
- system_string("uname -a", unameinfo, sizeof(unameinfo));
- system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo));
-
- printf ("void lnet_assert_wire_constants (void)\n"
- "{\n"
- " /* Wire protocol assertions generated by 'wirecheck'\n"
- " * running on %s\n"
- " * with %s */\n"
- "\n", unameinfo, gccinfo);
-
- BLANK_LINE ();
-
- COMMENT ("Constants...");
-
- CHECK_DEFINE (LNET_PROTO_OPENIB_MAGIC);
- CHECK_DEFINE (LNET_PROTO_RA_MAGIC);
-
- CHECK_DEFINE (LNET_PROTO_TCP_MAGIC);
- CHECK_DEFINE (LNET_PROTO_TCP_VERSION_MAJOR);
- CHECK_DEFINE (LNET_PROTO_TCP_VERSION_MINOR);
-
- CHECK_VALUE (LNET_MSG_ACK);
- CHECK_VALUE (LNET_MSG_PUT);
- CHECK_VALUE (LNET_MSG_GET);
- CHECK_VALUE (LNET_MSG_REPLY);
- CHECK_VALUE (LNET_MSG_HELLO);
-
- check_lnet_handle_wire ();
- check_lnet_magicversion ();
- check_lnet_hdr ();
-
- printf ("}\n\n");
-
- return (0);
-}