X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Finclude%2Flnet%2Flib-types.h;h=2060f928f304e2ff17c5f9edd3517d4cc3c260cc;hp=f6302ab851b04e8ec5746b6ffe47a2e3fa14fcbe;hb=5c61559c099f9343a36886f4746ac966e4b4b70f;hpb=35a27554e5ca9e6e26638f17fbadd64034b59f2a diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index f6302ab..2060f92 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -1,37 +1,60 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * p30/lib-types.h + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see [sun.com URL with a + * copy of GPLv2]. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/lib-types.h * * Types used by the library side routines that do not need to be * exposed to the user application */ -#ifndef __PORTALS_LIB_TYPES_H__ -#define __PORTALS_LIB_TYPES_H__ - -#include "build_check.h" +#ifndef __LNET_LIB_TYPES_H__ +#define __LNET_LIB_TYPES_H__ #if defined(__linux__) -#include +#include #elif defined(__APPLE__) -#include +#include +#elif defined(__WINNT__) +#include #else #error Unsupported Operating System #endif #include #include -#include -#include - -typedef char *user_ptr; -typedef struct lib_msg_t lib_msg_t; -typedef struct lib_ptl_t lib_ptl_t; -typedef struct lib_ac_t lib_ac_t; -typedef struct lib_me_t lib_me_t; -typedef struct lib_md_t lib_md_t; -typedef struct lib_eq_t lib_eq_t; +#include #define WIRE_ATTR __attribute__((packed)) @@ -42,336 +65,531 @@ typedef struct lib_eq_t lib_eq_t; typedef struct { __u64 wh_interface_cookie; __u64 wh_object_cookie; -} WIRE_ATTR ptl_handle_wire_t; +} WIRE_ATTR lnet_handle_wire_t; /* byte-flip insensitive! */ -#define PTL_WIRE_HANDLE_NONE \ -((const ptl_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1}) +#define LNET_WIRE_HANDLE_NONE \ +((const lnet_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1}) typedef enum { - PTL_MSG_ACK = 0, - PTL_MSG_PUT, - PTL_MSG_GET, - PTL_MSG_REPLY, - PTL_MSG_HELLO, -} ptl_msg_type_t; + LNET_MSG_ACK = 0, + LNET_MSG_PUT, + LNET_MSG_GET, + LNET_MSG_REPLY, + LNET_MSG_HELLO, +} lnet_msg_type_t; /* The variant fields of the portals message header are aligned on an 8 * byte boundary in the message header. Note that all types used in these * wire structs MUST be fixed size and the smaller types are placed at the * end. */ -typedef struct ptl_ack { - ptl_handle_wire_t dst_wmd; - ptl_match_bits_t match_bits; - ptl_size_t mlength; -} WIRE_ATTR ptl_ack_t; - -typedef struct ptl_put { - ptl_handle_wire_t ack_wmd; - ptl_match_bits_t match_bits; - ptl_hdr_data_t hdr_data; - ptl_pt_index_t ptl_index; - ptl_size_t offset; -} WIRE_ATTR ptl_put_t; - -typedef struct ptl_get { - ptl_handle_wire_t return_wmd; - ptl_match_bits_t match_bits; - ptl_pt_index_t ptl_index; - ptl_size_t src_offset; - ptl_size_t sink_length; -} WIRE_ATTR ptl_get_t; - -typedef struct ptl_reply { - ptl_handle_wire_t dst_wmd; -} WIRE_ATTR ptl_reply_t; - -typedef struct ptl_hello { +typedef struct lnet_ack { + lnet_handle_wire_t dst_wmd; + __u64 match_bits; + __u32 mlength; +} WIRE_ATTR lnet_ack_t; + +typedef struct lnet_put { + lnet_handle_wire_t ack_wmd; + __u64 match_bits; + __u64 hdr_data; + __u32 ptl_index; + __u32 offset; +} WIRE_ATTR lnet_put_t; + +typedef struct lnet_get { + lnet_handle_wire_t return_wmd; + __u64 match_bits; + __u32 ptl_index; + __u32 src_offset; + __u32 sink_length; +} WIRE_ATTR lnet_get_t; + +typedef struct lnet_reply { + lnet_handle_wire_t dst_wmd; +} WIRE_ATTR lnet_reply_t; + +typedef struct lnet_hello { __u64 incarnation; __u32 type; -} WIRE_ATTR ptl_hello_t; +} WIRE_ATTR lnet_hello_t; typedef struct { - ptl_nid_t dest_nid; - ptl_nid_t src_nid; - ptl_pid_t dest_pid; - ptl_pid_t src_pid; - __u32 type; /* ptl_msg_type_t */ + lnet_nid_t dest_nid; + lnet_nid_t src_nid; + lnet_pid_t dest_pid; + lnet_pid_t src_pid; + __u32 type; /* lnet_msg_type_t */ __u32 payload_length; /* payload data to follow */ /*<------__u64 aligned------->*/ union { - ptl_ack_t ack; - ptl_put_t put; - ptl_get_t get; - ptl_reply_t reply; - ptl_hello_t hello; + lnet_ack_t ack; + lnet_put_t put; + lnet_get_t get; + lnet_reply_t reply; + lnet_hello_t hello; } msg; -} WIRE_ATTR ptl_hdr_t; +} WIRE_ATTR lnet_hdr_t; -/* A HELLO message contains the portals magic number and protocol version +/* A HELLO message contains a magic number and protocol version * code in the header's dest_nid, the peer's NID in the src_nid, and - * PTL_MSG_HELLO in the type field. All other common fields are zero + * LNET_MSG_HELLO in the type field. All other common fields are zero * (including payload_size; i.e. no payload). - * This is for use by byte-stream NALs (e.g. TCP/IP) to check the peer is - * running the same protocol and to find out its NID, so that hosts with - * multiple IP interfaces can have a single NID. These NALs should exchange - * HELLO messages when a connection is first established. - * Individual NALs can put whatever else they fancy in ptl_hdr_t::msg. + * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is + * running the same protocol and to find out its NID. These LNDs should + * exchange HELLO messages when a connection is first established. Individual + * LNDs can put whatever else they fancy in lnet_hdr_t::msg. */ typedef struct { - __u32 magic; /* PORTALS_PROTO_MAGIC */ + __u32 magic; /* LNET_PROTO_TCP_MAGIC */ __u16 version_major; /* increment on incompatible change */ __u16 version_minor; /* increment on compatible change */ -} WIRE_ATTR ptl_magicversion_t; - -#define PORTALS_PROTO_MAGIC 0xeebc0ded - -#define PORTALS_PROTO_VERSION_MAJOR 1 -#define PORTALS_PROTO_VERSION_MINOR 0 - -typedef struct { - long recv_count, recv_length, send_count, send_length, drop_count, - drop_length, msgs_alloc, msgs_max; -} lib_counters_t; - -/* temporary expedient: limit number of entries in discontiguous MDs */ -#define PTL_MTU (1<<20) -#define PTL_MD_MAX_IOV 256 - -struct lib_msg_t { - struct list_head msg_list; - lib_md_t *md; - ptl_handle_wire_t ack_wmd; - ptl_event_t ev; -}; - -struct lib_ptl_t { - ptl_pt_index_t size; - struct list_head *tbl; -}; - -struct lib_ac_t { - int next_free; -}; - +} WIRE_ATTR lnet_magicversion_t; + +/* PROTO MAGIC for LNDs */ +#define LNET_PROTO_IB_MAGIC 0x0be91b91 +#define LNET_PROTO_OPENIB_MAGIC LNET_PROTO_IB_MAGIC +#define LNET_PROTO_IIB_MAGIC LNET_PROTO_IB_MAGIC +#define LNET_PROTO_VIB_MAGIC LNET_PROTO_IB_MAGIC +#define LNET_PROTO_RA_MAGIC 0x0be91b92 +#define LNET_PROTO_QSW_MAGIC 0x0be91b93 +#define LNET_PROTO_TCP_MAGIC 0xeebc0ded +#define LNET_PROTO_PTL_MAGIC 0x50746C4E /* 'PtlN' unique magic */ +#define LNET_PROTO_GM_MAGIC 0x6d797269 /* 'myri'! */ +#define LNET_PROTO_MX_MAGIC 0x4d583130 /* 'MX10'! */ +#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100 +#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */ + +/* Placeholder for a future "unified" protocol across all LNDs */ +/* Current LNDs that receive a request with this magic will respond with a + * "stub" reply using their current protocol */ +#define LNET_PROTO_MAGIC 0x45726963 /* ! */ + + +#define LNET_PROTO_TCP_VERSION_MAJOR 1 +#define LNET_PROTO_TCP_VERSION_MINOR 0 + +/* Acceptor connection request */ typedef struct { + __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */ + __u32 acr_version; /* protocol version */ + __u64 acr_nid; /* target NID */ +} WIRE_ATTR lnet_acceptor_connreq_t; + +#define LNET_PROTO_ACCEPTOR_VERSION 1 + +/* forward refs */ +struct lnet_libmd; + +typedef struct lnet_msg { + struct list_head msg_activelist; + struct list_head msg_list; /* Q for credits/MD */ + + lnet_process_id_t msg_target; + __u32 msg_type; + + unsigned int msg_target_is_router:1; /* sending to a router */ + unsigned int msg_routing:1; /* being forwarded */ + unsigned int msg_ack:1; /* ack on finalize (PUT) */ + unsigned int msg_sending:1; /* outgoing message */ + unsigned int msg_receiving:1; /* being received */ + unsigned int msg_delayed:1; /* had to Q for buffer or tx credit */ + unsigned int msg_txcredit:1; /* taken an NI send credit */ + unsigned int msg_peertxcredit:1; /* taken a peer send credit */ + unsigned int msg_rtrcredit:1; /* taken a globel router credit */ + unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ + unsigned int msg_onactivelist:1; /* on the activelist */ + + struct lnet_peer *msg_txpeer; /* peer I'm sending to */ + struct lnet_peer *msg_rxpeer; /* peer I received from */ + + void *msg_private; + struct lnet_libmd *msg_md; + + unsigned int msg_len; + unsigned int msg_wanted; + unsigned int msg_offset; + unsigned int msg_niov; + struct iovec *msg_iov; + lnet_kiov_t *msg_kiov; + + lnet_event_t msg_ev; + lnet_hdr_t msg_hdr; +} lnet_msg_t; + + +typedef struct lnet_libhandle { struct list_head lh_hash_chain; __u64 lh_cookie; -} lib_handle_t; +} lnet_libhandle_t; #define lh_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) -struct lib_eq_t { +typedef struct lnet_eq { struct list_head eq_list; - lib_handle_t eq_lh; - ptl_seq_t eq_enq_seq; - ptl_seq_t eq_deq_seq; - ptl_size_t eq_size; - ptl_event_t *eq_events; + lnet_libhandle_t eq_lh; + lnet_seq_t eq_enq_seq; + lnet_seq_t eq_deq_seq; + unsigned int eq_size; + lnet_event_t *eq_events; int eq_refcount; - ptl_eq_handler_t eq_callback; - void *eq_addrkey; -}; - -struct lib_me_t { - struct list_head me_list; - lib_handle_t me_lh; - ptl_process_id_t match_id; - ptl_match_bits_t match_bits, ignore_bits; - ptl_unlink_t unlink; - lib_md_t *md; -}; - -struct lib_md_t { + lnet_eq_handler_t eq_callback; +} lnet_eq_t; + +typedef struct lnet_me { + struct list_head me_list; + lnet_libhandle_t me_lh; + lnet_process_id_t me_match_id; + unsigned int me_portal; + __u64 me_match_bits; + __u64 me_ignore_bits; + lnet_unlink_t me_unlink; + struct lnet_libmd *me_md; +} lnet_me_t; + +typedef struct lnet_libmd { struct list_head md_list; - lib_handle_t md_lh; - lib_me_t *me; - user_ptr start; - ptl_size_t offset; - ptl_size_t length; - ptl_size_t max_size; - int threshold; - int pending; - unsigned int options; + lnet_libhandle_t md_lh; + lnet_me_t *md_me; + char *md_start; + unsigned int md_offset; + unsigned int md_length; + unsigned int md_max_size; + int md_threshold; + int md_refcount; + unsigned int md_options; unsigned int md_flags; - void *user_ptr; - lib_eq_t *eq; + void *md_user_ptr; + lnet_eq_t *md_eq; void *md_addrkey; unsigned int md_niov; /* # frags */ union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; + struct iovec iov[LNET_MAX_IOV]; + lnet_kiov_t kiov[LNET_MAX_IOV]; } md_iov; -}; - -#define PTL_MD_FLAG_ZOMBIE (1 << 0) -#define PTL_MD_FLAG_AUTO_UNLINK (1 << 1) +} lnet_libmd_t; -static inline int lib_md_exhausted (lib_md_t *md) -{ - return (md->threshold == 0 || - ((md->options & PTL_MD_MAX_SIZE) != 0 && - md->offset + md->max_size > md->length)); -} +#define LNET_MD_FLAG_ZOMBIE (1 << 0) +#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) -#ifdef PTL_USE_LIB_FREELIST +#ifdef LNET_USE_LIB_FREELIST typedef struct { void *fl_objs; /* single contiguous array of objects */ int fl_nobjs; /* the number of them */ int fl_objsize; /* the size (including overhead) of each of them */ struct list_head fl_list; /* where they are enqueued */ -} lib_freelist_t; +} lnet_freelist_t; typedef struct { struct list_head fo_list; /* enqueue on fl_list */ void *fo_contents; /* aligned contents */ -} lib_freeobj_t; +} lnet_freeobj_t; #endif typedef struct { /* info about peers we are trying to fail */ - struct list_head tp_list; /* stash in ni.ni_test_peers */ - ptl_nid_t tp_nid; /* matching nid */ - unsigned int tp_threshold; /* # failures to simulate */ -} lib_test_peer_t; - -#define PTL_COOKIE_TYPE_MD 1 -#define PTL_COOKIE_TYPE_ME 2 -#define PTL_COOKIE_TYPE_EQ 3 -#define PTL_COOKIE_TYPES 4 -/* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be - * extracted by masking with (PTL_COOKIE_TYPES - 1) */ - -typedef struct lib_ni + struct list_head tp_list; /* ln_test_peers */ + lnet_nid_t tp_nid; /* matching nid */ + unsigned int tp_threshold; /* # failures to simulate */ +} lnet_test_peer_t; + +#define LNET_COOKIE_TYPE_MD 1 +#define LNET_COOKIE_TYPE_ME 2 +#define LNET_COOKIE_TYPE_EQ 3 +#define LNET_COOKIE_TYPES 4 +/* LNET_COOKIE_TYPES must be a power of 2, so the cookie type can be + * extracted by masking with (LNET_COOKIE_TYPES - 1) */ + +struct lnet_ni; /* forward ref */ + +typedef struct lnet_lnd { - nal_t *ni_api; - ptl_process_id_t ni_pid; - lib_ptl_t ni_portals; - lib_counters_t ni_counters; - ptl_ni_limits_t ni_actual_limits; - - int ni_lh_hash_size; /* size of lib handle hash table */ - struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */ - __u64 ni_next_object_cookie; /* cookie generator */ - __u64 ni_interface_cookie; /* uniquely identifies this ni in this epoch */ - - struct list_head ni_test_peers; - int ni_loopback; /* loopback shortcircuits NAL */ + /* fields managed by portals */ + struct list_head lnd_list; /* stash in the LND table */ + int lnd_refcount; /* # active instances */ + + /* fields initialised by the LND */ + unsigned int lnd_type; -#ifdef PTL_USE_LIB_FREELIST - lib_freelist_t ni_free_mes; - lib_freelist_t ni_free_msgs; - lib_freelist_t ni_free_mds; - lib_freelist_t ni_free_eqs; + int (*lnd_startup) (struct lnet_ni *ni); + void (*lnd_shutdown) (struct lnet_ni *ni); + int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); + + /* In data movement APIs below, payload buffers are described as a set + * of 'niov' fragments which are... + * EITHER + * in virtual memory (struct iovec *iov != NULL) + * OR + * in pages (kernel only: plt_kiov_t *kiov != NULL). + * The LND may NOT overwrite these fragment descriptors. + * An 'offset' and may specify a byte offset within the set of + * fragments to start from + */ + + /* Start sending a preformatted message. 'private' is NULL for PUT and + * GET messages; otherwise this is a response to an incoming message + * and 'private' is the 'private' passed to lnet_parse(). Return + * non-zero for immediate failure, otherwise complete later with + * lnet_finalize() */ + int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg); + + /* Start receiving 'mlen' bytes of payload data, skipping the following + * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to + * lnet_parse(). Return non-zero for immedaite failure, otherwise + * complete later with lnet_finalize(). This also gives back a receive + * credit if the LND does flow control. */ + int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, + int delayed, unsigned int niov, + struct iovec *iov, lnet_kiov_t *kiov, + unsigned int offset, unsigned int mlen, unsigned int rlen); + + /* lnet_parse() has had to delay processing of this message + * (e.g. waiting for a forwarding buffer or send credits). Give the + * LND a chance to free urgently needed resources. If called, return 0 + * for success and do NOT give back a receive credit; that has to wait + * until lnd_recv() gets called. On failure return < 0 and + * release resources; lnd_recv() will not be called. */ + int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, + void **new_privatep); + + /* notification of peer health */ + void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); + +#ifdef __KERNEL__ + /* accept a new connection */ + int (*lnd_accept)(struct lnet_ni *ni, cfs_socket_t *sock); +#else + /* wait for something to happen */ + void (*lnd_wait)(struct lnet_ni *ni, int milliseconds); + + /* ensure non-RDMA messages can be received outside liblustre */ + int (*lnd_setasync)(struct lnet_ni *ni, lnet_process_id_t id, int nasync); + +#ifdef HAVE_LIBPTHREAD + int (*lnd_accept)(struct lnet_ni *ni, int sock); #endif +#endif +} lnd_t; + +#define LNET_MAX_INTERFACES 16 + +typedef struct lnet_ni { + struct list_head ni_list; /* chain on ln_nis */ + struct list_head ni_txq; /* messages waiting for tx credits */ + int ni_maxtxcredits; /* # tx credits */ + int ni_txcredits; /* # tx credits free */ + int ni_mintxcredits; /* lowest it's been */ + int ni_peertxcredits; /* # per-peer send credits */ + lnet_nid_t ni_nid; /* interface's NID */ + void *ni_data; /* instance-specific data */ + lnd_t *ni_lnd; /* procedural interface */ + int ni_refcount; /* reference count */ + char *ni_interfaces[LNET_MAX_INTERFACES]; /* equivalent interfaces to use */ +} lnet_ni_t; + +typedef struct lnet_peer { + struct list_head lp_hashlist; /* chain on peer hash */ + struct list_head lp_txq; /* messages blocking for tx credits */ + struct list_head lp_rtrq; /* messages blocking for router credits */ + struct list_head lp_rtr_list; /* chain on router list */ + int lp_txcredits; /* # tx credits available */ + int lp_mintxcredits; /* low water mark */ + int lp_rtrcredits; /* # router credits */ + int lp_minrtrcredits; /* low water mark */ + unsigned int lp_alive:1; /* alive/dead? */ + unsigned int lp_notify:1; /* notification outstanding? */ + unsigned int lp_notifylnd:1; /* outstanding notification for LND? */ + unsigned int lp_notifying:1; /* some thread is handling notification */ + unsigned int lp_ping_notsent; /* SEND event outstanding from ping */ + int lp_alive_count; /* # times router went dead<->alive */ + long lp_txqnob; /* bytes queued for sending */ + time_t lp_timestamp; /* time of last aliveness news */ + time_t lp_ping_timestamp; /* time of last ping attempt */ + time_t lp_ping_deadline; /* != 0 if ping reply expected */ + lnet_ni_t *lp_ni; /* interface peer is on */ + lnet_nid_t lp_nid; /* peer's NID */ + int lp_refcount; /* # refs */ + int lp_rtr_refcount; /* # refs from lnet_route_t::lr_gateway */ +} lnet_peer_t; + +typedef struct { + struct list_head lr_list; /* chain on net */ + lnet_peer_t *lr_gateway; /* router node */ +} lnet_route_t; + +typedef struct { + struct list_head lrn_list; /* chain on ln_remote_nets */ + struct list_head lrn_routes; /* routes to me */ + __u32 lrn_net; /* my net number */ + unsigned int lrn_hops; /* how far I am */ +} lnet_remotenet_t; - struct list_head ni_active_msgs; - struct list_head ni_active_mds; - struct list_head ni_active_eqs; +typedef struct { + struct list_head rbp_bufs; /* my free buffer pool */ + struct list_head rbp_msgs; /* messages blocking for a buffer */ + int rbp_npages; /* # pages in each buffer */ + int rbp_nbuffers; /* # buffers */ + int rbp_credits; /* # free buffers / blocked messages */ + int rbp_mincredits; /* low water mark */ +} lnet_rtrbufpool_t; + +typedef struct { + struct list_head rb_list; /* chain on rbp_bufs */ + lnet_rtrbufpool_t *rb_pool; /* owning pool */ + lnet_kiov_t rb_kiov[0]; /* the buffer space */ +} lnet_rtrbuf_t; + +typedef struct { + __u32 msgs_alloc; + __u32 msgs_max; + __u32 errors; + __u32 send_count; + __u32 recv_count; + __u32 route_count; + __u32 drop_count; + __u64 send_length; + __u64 recv_length; + __u64 route_length; + __u64 drop_length; +} lnet_counters_t; + +#define LNET_PEER_HASHSIZE 503 /* prime! */ + +#define LNET_NRBPOOLS 3 /* # different router buffer pools */ + +#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL +#define LNET_PROTO_PING_VERSION 1 +typedef struct { + __u32 pi_magic; + __u32 pi_version; + lnet_pid_t pi_pid; + __u32 pi_nnids; + lnet_nid_t pi_nid[0]; +} WIRE_ATTR lnet_ping_info_t; + +/* Options for lnet_portal_t::ptl_options */ +#define LNET_PTL_LAZY (1 << 0) +typedef struct { + struct list_head ptl_ml; /* match list */ + struct list_head ptl_msgq; /* messages blocking for MD */ + __u64 ptl_msgq_version; /* validity stamp */ + unsigned int ptl_options; +} lnet_portal_t; + +/* Router Checker */ +/* < 0 == startup error */ +#define LNET_RC_STATE_SHUTDOWN 0 /* not started */ +#define LNET_RC_STATE_RUNNING 1 /* started up OK */ +#define LNET_RC_STATE_STOPTHREAD 2 /* telling thread to stop */ +#define LNET_RC_STATE_UNLINKING 3 /* unlinking RC MD */ +#define LNET_RC_STATE_UNLINKED 4 /* RC's MD has been unlinked */ + +typedef struct +{ + /* Stuff initialised at LNetInit() */ + int ln_init; /* LNetInit() called? */ + int ln_refcount; /* LNetNIInit/LNetNIFini counter */ + int ln_niinit_self; /* Have I called LNetNIInit myself? */ + + struct list_head ln_lnds; /* registered LNDs */ #ifdef __KERNEL__ - spinlock_t ni_lock; - cfs_waitq_t ni_waitq; + spinlock_t ln_lock; + cfs_waitq_t ln_waitq; + struct semaphore ln_api_mutex; + struct semaphore ln_lnd_mutex; #else - pthread_mutex_t ni_mutex; - pthread_cond_t ni_cond; +# ifndef HAVE_LIBPTHREAD + int ln_lock; + int ln_api_mutex; + int ln_lnd_mutex; +# else + pthread_cond_t ln_cond; + pthread_mutex_t ln_lock; + pthread_mutex_t ln_api_mutex; + pthread_mutex_t ln_lnd_mutex; +# endif #endif -} lib_ni_t; + /* Stuff initialised at LNetNIInit() */ -typedef struct lib_nal -{ - /* lib-level interface state */ - lib_ni_t libnal_ni; - - /* NAL-private data */ - void *libnal_data; - - /* - * send: Sends a preformatted header and payload data to a - * specified remote process. The payload is scattered over 'niov' - * fragments described by iov, starting at 'offset' for 'mlen' - * bytes. - * NB the NAL may NOT overwrite iov. - * PTL_OK on success => NAL has committed to send and will call - * lib_finalize on completion - */ - ptl_err_t (*libnal_send) - (struct lib_nal *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int niov, struct iovec *iov, - size_t offset, size_t mlen); + int ln_shutdown; /* shutdown in progress */ + int ln_nportals; /* # portals */ + lnet_portal_t *ln_portals; /* the vector of portals */ + + lnet_pid_t ln_pid; /* requested pid */ + + struct list_head ln_nis; /* LND instances */ + lnet_ni_t *ln_loni; /* the loopback NI */ + lnet_ni_t *ln_eqwaitni; /* NI to wait for events in */ + struct list_head ln_zombie_nis; /* dying LND instances */ + int ln_nzombie_nis; /* # of NIs to wait for */ + + struct list_head ln_remote_nets; /* remote networks with routes to them */ + __u64 ln_remote_nets_version; /* validity stamp */ + + struct list_head ln_routers; /* list of all known routers */ + __u64 ln_routers_version; /* validity stamp */ + + struct list_head *ln_peer_hash; /* NID->peer hash */ + int ln_npeers; /* # peers extant */ + int ln_peertable_version; /* /proc validity stamp */ - /* as send, but with a set of page fragments (NULL if not supported) */ - ptl_err_t (*libnal_send_pages) - (struct lib_nal *nal, void *private, lib_msg_t * cookie, - ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int niov, ptl_kiov_t *iov, - size_t offset, size_t mlen); - /* - * recv: Receives an incoming message from a remote process. The - * payload is to be received into the scattered buffer of 'niov' - * fragments described by iov, starting at 'offset' for 'mlen' - * bytes. Payload bytes after 'mlen' up to 'rlen' are to be - * discarded. - * NB the NAL may NOT overwrite iov. - * PTL_OK on success => NAL has committed to receive and will call - * lib_finalize on completion - */ - ptl_err_t (*libnal_recv) - (struct lib_nal *nal, void *private, lib_msg_t * cookie, - unsigned int niov, struct iovec *iov, - size_t offset, size_t mlen, size_t rlen); - - /* as recv, but with a set of page fragments (NULL if not supported) */ - ptl_err_t (*libnal_recv_pages) - (struct lib_nal *nal, void *private, lib_msg_t * cookie, - unsigned int niov, ptl_kiov_t *iov, - size_t offset, size_t mlen, size_t rlen); - - /* - * (un)map: Tell the NAL about some memory it will access. - * *addrkey passed to libnal_unmap() is what libnal_map() set it to. - * type of *iov depends on options. - * Set to NULL if not required. - */ - ptl_err_t (*libnal_map) - (struct lib_nal *nal, unsigned int niov, struct iovec *iov, - void **addrkey); - void (*libnal_unmap) - (struct lib_nal *nal, unsigned int niov, struct iovec *iov, - void **addrkey); - - /* as (un)map, but with a set of page fragments */ - ptl_err_t (*libnal_map_pages) - (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, - void **addrkey); - void (*libnal_unmap_pages) - (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, - void **addrkey); - - void (*libnal_printf)(struct lib_nal *nal, const char *fmt, ...); - - /* Calculate a network "distance" to given node */ - int (*libnal_dist) (struct lib_nal *nal, ptl_nid_t nid, unsigned long *dist); -} lib_nal_t; - -typedef struct /* loopback descriptor */ -{ - unsigned int lod_type; - unsigned int lod_niov; - size_t lod_offset; - size_t lod_nob; - union { - struct iovec *iov; - ptl_kiov_t *kiov; - } lod_iov; -} lo_desc_t; + int ln_routing; /* am I a router? */ + lnet_rtrbufpool_t ln_rtrpools[LNET_NRBPOOLS]; /* router buffer pools */ + + int ln_lh_hash_size; /* size of lib handle hash table */ + struct list_head *ln_lh_hash_table; /* all extant lib handles, this interface */ + __u64 ln_next_object_cookie; /* cookie generator */ + __u64 ln_interface_cookie; /* uniquely identifies this ni in this epoch */ + + char *ln_network_tokens; /* space for network names */ + int ln_network_tokens_nob; + + int ln_testprotocompat; /* test protocol compatibility flags */ + + struct list_head ln_finalizeq; /* msgs waiting to complete finalizing */ +#ifdef __KERNEL__ + void **ln_finalizers; /* threads doing finalization */ + int ln_nfinalizers; /* max # threads finalizing */ +#else + int ln_finalizing; +#endif + struct list_head ln_test_peers; /* failure simulation */ + + lnet_handle_md_t ln_ping_target_md; + lnet_handle_eq_t ln_ping_target_eq; + lnet_ping_info_t *ln_ping_info; + +#ifdef __KERNEL__ + int ln_rc_state; /* router checker startup/shutdown state */ + struct semaphore ln_rc_signal; /* serialise startup/shutdown */ + lnet_handle_eq_t ln_rc_eqh; /* router checker's event queue */ +#endif + +#ifdef LNET_USE_LIB_FREELIST + lnet_freelist_t ln_free_mes; + lnet_freelist_t ln_free_msgs; + lnet_freelist_t ln_free_mds; + lnet_freelist_t ln_free_eqs; +#endif + struct list_head ln_active_msgs; + struct list_head ln_active_mds; + struct list_head ln_active_eqs; + + lnet_counters_t ln_counters; + +#ifndef __KERNEL__ + /* Temporary workaround to allow uOSS and test programs force + * server mode in userspace. The only place where we use it is + * lnet_prepare(). The only way to turn this flag on is to + * call lnet_server_mode() */ -#define LOD_IOV 0xeb105 -#define LOD_KIOV 0xeb106 + int ln_server_mode_flag; +#endif +} lnet_t; #endif