X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Finclude%2Flnet%2Flib-types.h;h=ddfc7f4304dc840cde5470fbbdea0c4f872591ca;hp=3cd61b855b5ebbfe28b0dfd0b0869e7c6c015756;hb=c079e8c57a09419b1e5978dba2a469d6ae4c712c;hpb=75a8f4b4aa9ad6bf697aedece539e62111e9029a diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 3cd61b8..ddfc7f4 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -26,6 +26,8 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2012, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -44,8 +46,6 @@ #include #elif defined(__APPLE__) #include -#elif defined(__WINNT__) -#include #else #error Unsupported Operating System #endif @@ -57,7 +57,6 @@ #define WIRE_ATTR __attribute__((packed)) /* Packed version of lnet_process_id_t to transfer via network */ -#include typedef struct { lnet_nid_t nid; lnet_pid_t pid; /* node id / process id */ @@ -71,7 +70,6 @@ typedef struct { __u64 wh_interface_cookie; __u64 wh_object_cookie; } WIRE_ATTR lnet_handle_wire_t; -#include typedef enum { LNET_MSG_ACK = 0, @@ -85,7 +83,6 @@ typedef enum { * byte boundary in the message header. Note that all types used in these * wire structs MUST be fixed size and the smaller types are placed at the * end. */ -#include typedef struct lnet_ack { lnet_handle_wire_t dst_wmd; __u64 match_bits; @@ -175,7 +172,6 @@ typedef struct { __u32 acr_version; /* protocol version */ __u64 acr_nid; /* target NID */ } WIRE_ATTR lnet_acceptor_connreq_t; -#include #define LNET_PROTO_ACCEPTOR_VERSION 1 @@ -183,16 +179,28 @@ typedef struct { struct lnet_libmd; typedef struct lnet_msg { - cfs_list_t msg_activelist; - cfs_list_t msg_list; /* Q for credits/MD */ + struct list_head msg_activelist; + struct list_head msg_list; /* Q for credits/MD */ - lnet_process_id_t msg_target; + lnet_process_id_t msg_target; /* where is it from, it's only for building event */ lnet_nid_t msg_from; __u32 msg_type; - unsigned int msg_rx_committed:1; + /* commited for sending */ unsigned int msg_tx_committed:1; + /* CPT # this message committed for sending */ + unsigned int msg_tx_cpt:15; + /* commited for receiving */ + unsigned int msg_rx_committed:1; + /* CPT # this message committed for receiving */ + unsigned int msg_rx_cpt:15; + /* queued for tx credit */ + unsigned int msg_tx_delayed:1; + /* queued for RX buffer */ + unsigned int msg_rx_delayed:1; + /* ready for pending on RX delay list */ + unsigned int msg_rx_ready_delay:1; unsigned int msg_vmflush:1; /* VM trying to free memory */ unsigned int msg_target_is_router:1; /* sending to a router */ @@ -200,7 +208,6 @@ typedef struct lnet_msg { unsigned int msg_ack:1; /* ack on finalize (PUT) */ unsigned int msg_sending:1; /* outgoing message */ unsigned int msg_receiving:1; /* being received */ - unsigned int msg_delayed:1; /* had to Q for buffer or tx credit */ unsigned int msg_txcredit:1; /* taken an NI send credit */ unsigned int msg_peertxcredit:1; /* taken a peer send credit */ unsigned int msg_rtrcredit:1; /* taken a globel router credit */ @@ -226,97 +233,101 @@ typedef struct lnet_msg { typedef struct lnet_libhandle { - cfs_list_t lh_hash_chain; - __u64 lh_cookie; + struct list_head lh_hash_chain; + __u64 lh_cookie; } lnet_libhandle_t; #define lh_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) typedef struct lnet_eq { - cfs_list_t eq_list; - lnet_libhandle_t eq_lh; - lnet_seq_t eq_enq_seq; - lnet_seq_t eq_deq_seq; - unsigned int eq_size; - lnet_event_t *eq_events; - int eq_refcount; - lnet_eq_handler_t eq_callback; + struct list_head eq_list; + lnet_libhandle_t eq_lh; + lnet_seq_t eq_enq_seq; + lnet_seq_t eq_deq_seq; + unsigned int eq_size; + lnet_eq_handler_t eq_callback; + lnet_event_t *eq_events; + int **eq_refs; /* percpt refcount for EQ */ } lnet_eq_t; typedef struct lnet_me { - cfs_list_t me_list; - lnet_libhandle_t me_lh; - lnet_process_id_t me_match_id; - unsigned int me_portal; - __u64 me_match_bits; - __u64 me_ignore_bits; - lnet_unlink_t me_unlink; - struct lnet_libmd *me_md; + struct list_head me_list; + lnet_libhandle_t me_lh; + lnet_process_id_t me_match_id; + unsigned int me_portal; + unsigned int me_pos; /* hash offset in mt_hash */ + __u64 me_match_bits; + __u64 me_ignore_bits; + lnet_unlink_t me_unlink; + struct lnet_libmd *me_md; } lnet_me_t; typedef struct lnet_libmd { - cfs_list_t md_list; - lnet_libhandle_t md_lh; - lnet_me_t *md_me; - char *md_start; - unsigned int md_offset; - unsigned int md_length; - unsigned int md_max_size; - int md_threshold; - int md_refcount; - unsigned int md_options; - unsigned int md_flags; - void *md_user_ptr; - lnet_eq_t *md_eq; - unsigned int md_niov; /* # frags */ - union { - struct iovec iov[LNET_MAX_IOV]; - lnet_kiov_t kiov[LNET_MAX_IOV]; - } md_iov; + struct list_head md_list; + lnet_libhandle_t md_lh; + lnet_me_t *md_me; + char *md_start; + unsigned int md_offset; + unsigned int md_length; + unsigned int md_max_size; + int md_threshold; + int md_refcount; + unsigned int md_options; + unsigned int md_flags; + void *md_user_ptr; + lnet_eq_t *md_eq; + unsigned int md_niov; /* # frags */ + union { + struct iovec iov[LNET_MAX_IOV]; + lnet_kiov_t kiov[LNET_MAX_IOV]; + } md_iov; } lnet_libmd_t; -#define LNET_MD_FLAG_ZOMBIE (1 << 0) -#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) +#define LNET_MD_FLAG_ZOMBIE (1 << 0) +#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) +#define LNET_MD_FLAG_ABORTED (1 << 2) #ifdef LNET_USE_LIB_FREELIST typedef struct { - void *fl_objs; /* single contiguous array of objects */ - int fl_nobjs; /* the number of them */ - int fl_objsize; /* the size (including overhead) of each of them */ - cfs_list_t fl_list; /* where they are enqueued */ + /* single contiguous array of objects */ + void *fl_objs; + /* the number of them */ + int fl_nobjs; + /* the size (including overhead) of each of them */ + int fl_objsize; + /* where they are enqueued */ + struct list_head fl_list; } lnet_freelist_t; typedef struct { - cfs_list_t fo_list; /* enqueue on fl_list */ - void *fo_contents; /* aligned contents */ + struct list_head fo_list; /* enqueue on fl_list */ + void *fo_contents; /* aligned contents */ } lnet_freeobj_t; #endif typedef struct { - /* info about peers we are trying to fail */ - cfs_list_t tp_list; /* ln_test_peers */ - lnet_nid_t tp_nid; /* matching nid */ - unsigned int tp_threshold; /* # failures to simulate */ + /* info about peers we are trying to fail */ + struct list_head tp_list; /* ln_test_peers */ + lnet_nid_t tp_nid; /* matching nid */ + unsigned int tp_threshold; /* # failures to simulate */ } lnet_test_peer_t; #define LNET_COOKIE_TYPE_MD 1 #define LNET_COOKIE_TYPE_ME 2 #define LNET_COOKIE_TYPE_EQ 3 #define LNET_COOKIE_TYPE_BITS 2 -#define LNET_COOKIE_TYPES (1 << LNET_COOKIE_TYPE_BITS) -/* LNET_COOKIE_TYPES must be a power of 2, so the cookie type can be - * extracted by masking with (LNET_COOKIE_TYPES - 1) */ +#define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) struct lnet_ni; /* forward ref */ typedef struct lnet_lnd { - /* fields managed by portals */ - cfs_list_t lnd_list; /* stash in the LND table */ - int lnd_refcount; /* # active instances */ + /* fields managed by portals */ + struct list_head lnd_list; /* stash in the LND table */ + int lnd_refcount; /* # active instances */ /* fields initialised by the LND */ unsigned int lnd_type; @@ -391,41 +402,65 @@ typedef struct { __u32 ns_unused; } WIRE_ATTR lnet_ni_status_t; +struct lnet_tx_queue { + int tq_credits; /* # tx credits free */ + int tq_credits_min; /* lowest it's been */ + int tq_credits_max; /* total # tx credits */ + struct list_head tq_delayed; /* delayed TXs */ +}; + #define LNET_MAX_INTERFACES 16 typedef struct lnet_ni { - cfs_list_t ni_list; /* chain on ln_nis */ - cfs_list_t ni_txq; /* messages waiting for tx credits */ - int ni_maxtxcredits; /* # tx credits */ - int ni_txcredits; /* # tx credits free */ - int ni_mintxcredits; /* lowest it's been */ - int ni_peertxcredits; /* # per-peer send credits */ - int ni_peerrtrcredits; /* # per-peer router buffer credits */ - int ni_peertimeout; /* seconds to consider peer dead */ - lnet_nid_t ni_nid; /* interface's NID */ - void *ni_data; /* instance-specific data */ - lnd_t *ni_lnd; /* procedural interface */ - int ni_refcount; /* reference count */ - /* when I was last alive */ - long ni_last_alive; +#ifdef __KERNEL__ + spinlock_t ni_lock; +#else +# ifndef HAVE_LIBPTHREAD + int ni_lock; +# else + pthread_mutex_t ni_lock; +# endif +#endif + struct list_head ni_list; /* chain on ln_nis */ + struct list_head ni_cptlist; /* chain on ln_nis_cpt */ + int ni_maxtxcredits; /* # tx credits */ + /* # per-peer send credits */ + int ni_peertxcredits; + /* # per-peer router buffer credits */ + int ni_peerrtrcredits; + /* seconds to consider peer dead */ + int ni_peertimeout; + int ni_ncpts; /* number of CPTs */ + __u32 *ni_cpts; /* bond NI on some CPTs */ + lnet_nid_t ni_nid; /* interface's NID */ + void *ni_data; /* instance-specific data */ + lnd_t *ni_lnd; /* procedural interface */ + struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */ + int **ni_refs; /* percpt reference count */ + long ni_last_alive; /* when I was last alive */ lnet_ni_status_t *ni_status; /* my health status */ /* equivalent interfaces to use */ char *ni_interfaces[LNET_MAX_INTERFACES]; } lnet_ni_t; -#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL -enum { - LNET_PROTO_PING_UNKNOWN = 0, /* unknown */ - LNET_PROTO_PING_VERSION_1 = 1, /* old version */ - LNET_PROTO_PING_VERSION = 2, /* current version */ -}; +#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL + +/* NB: value of these features equal to LNET_PROTO_PING_VERSION_x + * of old LNet, so there shouldn't be any compatibility issue */ +#define LNET_PING_FEAT_INVAL (0) /* no feature */ +#define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */ +#define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */ +#define LNET_PING_FEAT_RTE_DISABLED (1 << 2) /* Routing enabled */ + +#define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \ + LNET_PING_FEAT_NI_STATUS) typedef struct { - __u32 pi_magic; - __u32 pi_version; - lnet_pid_t pi_pid; - __u32 pi_nnis; - lnet_ni_status_t pi_ni[0]; + __u32 pi_magic; + __u32 pi_features; + lnet_pid_t pi_pid; + __u32 pi_nnis; + lnet_ni_status_t pi_ni[0]; } WIRE_ATTR lnet_ping_info_t; /* router checker data, per router */ @@ -433,44 +468,66 @@ typedef struct { #define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS]) typedef struct { /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ - cfs_list_t rcd_list; + struct list_head rcd_list; lnet_handle_md_t rcd_mdh; /* ping buffer MD */ struct lnet_peer *rcd_gateway; /* reference to gateway */ lnet_ping_info_t *rcd_pinginfo; /* ping buffer */ } lnet_rc_data_t; typedef struct lnet_peer { - cfs_list_t lp_hashlist; /* chain on peer hash */ - cfs_list_t lp_txq; /* messages blocking for tx credits */ - cfs_list_t lp_rtrq; /* messages blocking for router credits */ - cfs_list_t lp_rtr_list; /* chain on router list */ - int lp_txcredits; /* # tx credits available */ - int lp_mintxcredits; /* low water mark */ - int lp_rtrcredits; /* # router credits */ - int lp_minrtrcredits; /* low water mark */ - unsigned int lp_alive:1; /* alive/dead? */ - unsigned int lp_notify:1; /* notification outstanding? */ - unsigned int lp_notifylnd:1; /* outstanding notification for LND? */ - unsigned int lp_notifying:1; /* some thread is handling notification */ - unsigned int lp_ping_notsent; /* SEND event outstanding from ping */ - int lp_alive_count; /* # times router went dead<->alive */ - long lp_txqnob; /* bytes queued for sending */ - cfs_time_t lp_timestamp; /* time of last aliveness news */ - cfs_time_t lp_ping_timestamp; /* time of last ping attempt */ - cfs_time_t lp_ping_deadline; /* != 0 if ping reply expected */ - cfs_time_t lp_last_alive; /* when I was last alive */ - cfs_time_t lp_last_query; /* when lp_ni was queried last time */ - lnet_ni_t *lp_ni; /* interface peer is on */ - lnet_nid_t lp_nid; /* peer's NID */ - int lp_refcount; /* # refs */ - int lp_rtr_refcount; /* # refs from lnet_route_t::lr_gateway */ - /* returned RC ping version */ - unsigned int lp_ping_version; - cfs_list_t lp_routes; /* routers on this peer */ + /* chain on peer hash */ + struct list_head lp_hashlist; + /* messages blocking for tx credits */ + struct list_head lp_txq; + /* messages blocking for router credits */ + struct list_head lp_rtrq; + /* chain on router list */ + struct list_head lp_rtr_list; + /* # tx credits available */ + int lp_txcredits; + /* low water mark */ + int lp_mintxcredits; + /* # router credits */ + int lp_rtrcredits; + /* low water mark */ + int lp_minrtrcredits; + /* alive/dead? */ + unsigned int lp_alive:1; + /* notification outstanding? */ + unsigned int lp_notify:1; + /* outstanding notification for LND? */ + unsigned int lp_notifylnd:1; + /* some thread is handling notification */ + unsigned int lp_notifying:1; + /* SEND event outstanding from ping */ + unsigned int lp_ping_notsent; + /* # times router went dead<->alive */ + int lp_alive_count; + /* bytes queued for sending */ + long lp_txqnob; + /* time of last aliveness news */ + cfs_time_t lp_timestamp; + /* time of last ping attempt */ + cfs_time_t lp_ping_timestamp; + /* != 0 if ping reply expected */ + cfs_time_t lp_ping_deadline; + /* when I was last alive */ + cfs_time_t lp_last_alive; + /* when lp_ni was queried last time */ + cfs_time_t lp_last_query; + /* interface peer is on */ + lnet_ni_t *lp_ni; + lnet_nid_t lp_nid; /* peer's NID */ + int lp_refcount; /* # refs */ + int lp_cpt; /* CPT this peer attached on */ + /* # refs from lnet_route_t::lr_gateway */ + int lp_rtr_refcount; + /* returned RC ping features */ + unsigned int lp_ping_feats; + struct list_head lp_routes; /* routers on this peer */ lnet_rc_data_t *lp_rcd; /* router checker state */ } lnet_peer_t; - /* peer hash size */ #define LNET_PEER_HASH_BITS 9 #define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS) @@ -479,43 +536,62 @@ typedef struct lnet_peer { struct lnet_peer_table { int pt_version; /* /proc validity stamp */ int pt_number; /* # peers extant */ - cfs_list_t pt_deathrow; /* zombie peers */ - cfs_list_t *pt_hash; /* NID->peer hash */ + int pt_zombies; /* # zombies to go to deathrow + * (and not there yet) */ + struct list_head pt_deathrow; /* zombie peers */ + struct list_head *pt_hash; /* NID->peer hash */ }; -#define lnet_peer_aliveness_enabled(lp) ((lp)->lp_ni->ni_peertimeout > 0) +/* peer aliveness is enabled only on routers for peers in a network where the + * lnet_ni_t::ni_peertimeout has been set to a positive value */ +#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \ + (lp)->lp_ni->ni_peertimeout > 0) typedef struct { - cfs_list_t lr_list; /* chain on net */ - cfs_list_t lr_gwlist; /* chain on gateway */ + struct list_head lr_list; /* chain on net */ + struct list_head lr_gwlist; /* chain on gateway */ lnet_peer_t *lr_gateway; /* router node */ __u32 lr_net; /* remote network number */ + int lr_seq; /* sequence for round-robin */ unsigned int lr_downis; /* number of down NIs */ unsigned int lr_hops; /* how far I am */ + unsigned int lr_priority; /* route priority */ } lnet_route_t; +#define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) +#define LNET_REMOTE_NETS_HASH_MAX (1U << 16) +#define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits) + typedef struct { - cfs_list_t lrn_list; /* chain on ln_remote_nets */ - cfs_list_t lrn_routes; /* routes to me */ - __u32 lrn_net; /* my net number */ + /* chain on ln_remote_nets_hash */ + struct list_head lrn_list; + /* routes to me */ + struct list_head lrn_routes; + /* my net number */ + __u32 lrn_net; } lnet_remotenet_t; typedef struct { - cfs_list_t rbp_bufs; /* my free buffer pool */ - cfs_list_t rbp_msgs; /* messages blocking for a buffer */ - int rbp_npages; /* # pages in each buffer */ - int rbp_nbuffers; /* # buffers */ - int rbp_credits; /* # free buffers / blocked messages */ - int rbp_mincredits; /* low water mark */ + /* my free buffer pool */ + struct list_head rbp_bufs; + /* messages blocking for a buffer */ + struct list_head rbp_msgs; + /* # pages in each buffer */ + int rbp_npages; + /* # buffers */ + int rbp_nbuffers; + /* # free buffers / blocked messages */ + int rbp_credits; + /* low water mark */ + int rbp_mincredits; } lnet_rtrbufpool_t; typedef struct { - cfs_list_t rb_list; /* chain on rbp_bufs */ - lnet_rtrbufpool_t *rb_pool; /* owning pool */ - lnet_kiov_t rb_kiov[0]; /* the buffer space */ + struct list_head rb_list; /* chain on rbp_bufs */ + lnet_rtrbufpool_t *rb_pool; /* owning pool */ + lnet_kiov_t rb_kiov[0]; /* the buffer space */ } lnet_rtrbuf_t; -#include typedef struct { __u32 msgs_alloc; __u32 msgs_max; @@ -529,11 +605,15 @@ typedef struct { __u64 route_length; __u64 drop_length; } WIRE_ATTR lnet_counters_t; -#include #define LNET_PEER_HASHSIZE 503 /* prime! */ -#define LNET_NRBPOOLS 3 /* # different router buffer pools */ +#define LNET_TINY_BUF_IDX 0 +#define LNET_SMALL_BUF_IDX 1 +#define LNET_LARGE_BUF_IDX 2 + +/* # different router buffer pools */ +#define LNET_NRBPOOLS (LNET_LARGE_BUF_IDX + 1) enum { /* Didn't match anything */ @@ -542,6 +622,10 @@ enum { LNET_MATCHMD_OK = (1 << 1), /* Must be discarded */ LNET_MATCHMD_DROP = (1 << 2), + /* match and buffer is exhausted */ + LNET_MATCHMD_EXHAUSTED = (1 << 3), + /* match or drop */ + LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), }; /* Options for lnet_portal_t::ptl_options */ @@ -549,18 +633,78 @@ enum { #define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */ #define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, request portal */ +/* parameter for matching operations (GET, PUT) */ +struct lnet_match_info { + __u64 mi_mbits; + lnet_process_id_t mi_id; + unsigned int mi_opc; + unsigned int mi_portal; + unsigned int mi_rlength; + unsigned int mi_roffset; +}; + /* ME hash of RDMA portal */ -#define LNET_PORTAL_HASH_BITS 8 -#define LNET_PORTAL_HASH_SIZE (1 << LNET_PORTAL_HASH_BITS) +#define LNET_MT_HASH_BITS 8 +#define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS) +#define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1) +/* we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash, + * the last entry is reserved for MEs with ignore-bits */ +#define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE +/* __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which + * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the + * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] */ +#define LNET_MT_BITS_U64 6 /* 2^6 bits */ +#define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64) +#define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1) + +/* portal match table */ +struct lnet_match_table { + /* reserved for upcoming patches, CPU partition ID */ + unsigned int mt_cpt; + unsigned int mt_portal; /* portal index */ + /* match table is set as "enabled" if there's non-exhausted MD + * attached on mt_mhash, it's only valide for wildcard portal */ + unsigned int mt_enabled; + /* bitmap to flag whether MEs on mt_hash are exhausted or not */ + __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP]; + struct list_head *mt_mhash; /* matching hash */ +}; + +/* these are only useful for wildcard portal */ +/* Turn off message rotor for wildcard portals */ +#define LNET_PTL_ROTOR_OFF 0 +/* round-robin dispatch all PUT messages for wildcard portals */ +#define LNET_PTL_ROTOR_ON 1 +/* round-robin dispatch routed PUT message for wildcard portals */ +#define LNET_PTL_ROTOR_RR_RT 2 +/* dispatch routed PUT message by hashing source NID for wildcard portals */ +#define LNET_PTL_ROTOR_HASH_RT 3 typedef struct lnet_portal { +#ifdef __KERNEL__ + spinlock_t ptl_lock; +#else +# ifndef HAVE_LIBPTHREAD + int ptl_lock; +# else + pthread_mutex_t ptl_lock; +# endif +#endif unsigned int ptl_index; /* portal ID, reserved */ - cfs_list_t *ptl_mhash; /* match hash */ - cfs_list_t ptl_mlist; /* match list */ - cfs_list_t ptl_msgq; /* messages blocking for MD */ - __u64 ptl_ml_version; /* validity stamp, only changed for new attached MD */ - __u64 ptl_msgq_version; /* validity stamp */ - unsigned int ptl_options; + /* flags on this portal: lazy, unique... */ + unsigned int ptl_options; + /* list of messags which are stealing buffer */ + struct list_head ptl_msg_stealing; + /* messages blocking for MD */ + struct list_head ptl_msg_delayed; + /* Match table for each CPT */ + struct lnet_match_table **ptl_mtables; + /* spread rotor of incoming "PUT" */ + int ptl_rotor; + /* # active entries for this portal */ + int ptl_mt_nmaps; + /* array of active entries' cpu-partition-id */ + int ptl_mt_maps[0]; } lnet_portal_t; #define LNET_LH_HASH_BITS 12 @@ -571,8 +715,8 @@ typedef struct lnet_portal { struct lnet_res_container { unsigned int rec_type; /* container type */ __u64 rec_lh_cookie; /* cookie generator */ - cfs_list_t rec_active; /* active resource list */ - cfs_list_t *rec_lh_hash; /* handle hash */ + struct list_head rec_active; /* active resource list */ + struct list_head *rec_lh_hash; /* handle hash */ #ifdef LNET_USE_LIB_FREELIST lnet_freelist_t rec_freelist; /* freelist for resources */ #endif @@ -584,8 +728,8 @@ struct lnet_msg_container { /* max # threads finalizing */ int msc_nfinalizers; /* msgs waiting to complete finalizing */ - cfs_list_t msc_finalizing; - cfs_list_t msc_active; /* active message list */ + struct list_head msc_finalizing; + struct list_head msc_active; /* active message list */ /* threads doing finalization */ void **msc_finalizers; #ifdef LNET_USE_LIB_FREELIST @@ -600,101 +744,118 @@ struct lnet_msg_container { typedef struct { - /* Stuff initialised at LNetInit() */ - int ln_init; /* LNetInit() called? */ - int ln_refcount; /* LNetNIInit/LNetNIFini counter */ - int ln_niinit_self; /* Have I called LNetNIInit myself? */ - /* shutdown in progress */ - int ln_shutdown; - - cfs_list_t ln_lnds; /* registered LNDs */ + /* CPU partition table of LNet */ + struct cfs_cpt_table *ln_cpt_table; + /* number of CPTs in ln_cpt_table */ + unsigned int ln_cpt_number; + unsigned int ln_cpt_bits; + + /* protect LNet resources (ME/MD/EQ) */ + struct cfs_percpt_lock *ln_res_lock; + /* # portals */ + int ln_nportals; + /* the vector of portals */ + lnet_portal_t **ln_portals; + /* percpt ME containers */ + struct lnet_res_container **ln_me_containers; + /* percpt MD container */ + struct lnet_res_container **ln_md_containers; + /* Event Queue container */ + struct lnet_res_container ln_eq_container; #ifdef __KERNEL__ - cfs_spinlock_t ln_lock; - cfs_mutex_t ln_api_mutex; - cfs_mutex_t ln_lnd_mutex; - cfs_waitq_t ln_eq_waitq; + wait_queue_head_t ln_eq_waitq; + spinlock_t ln_eq_wait_lock; #else # ifndef HAVE_LIBPTHREAD - int ln_lock; - int ln_api_mutex; - int ln_lnd_mutex; + int ln_eq_wait_lock; # else - pthread_mutex_t ln_lock; - pthread_mutex_t ln_api_mutex; - pthread_mutex_t ln_lnd_mutex; pthread_cond_t ln_eq_cond; + pthread_mutex_t ln_eq_wait_lock; # endif #endif - /* ME container */ - struct lnet_res_container ln_me_container; - /* MD container */ - struct lnet_res_container ln_md_container; - /* Event Queue container */ - struct lnet_res_container ln_eq_container; - - /* # portals */ - int ln_nportals; - /* the vector of portals */ - lnet_portal_t **ln_portals; - - lnet_pid_t ln_pid; /* requested pid */ - - cfs_list_t ln_nis; /* LND instances */ - lnet_ni_t *ln_loni; /* the loopback NI */ + unsigned int ln_remote_nets_hbits; + + /* protect NI, peer table, credits, routers, rtrbuf... */ + struct cfs_percpt_lock *ln_net_lock; + /* percpt message containers for active/finalizing/freed message */ + struct lnet_msg_container **ln_msg_containers; + lnet_counters_t **ln_counters; + struct lnet_peer_table **ln_peer_tables; + /* failure simulation */ + struct list_head ln_test_peers; + + struct list_head ln_nis; /* LND instances */ + /* NIs bond on specific CPT(s) */ + struct list_head ln_nis_cpt; + /* dying LND instances */ + struct list_head ln_nis_zombie; + lnet_ni_t *ln_loni; /* the loopback NI */ /* NI to wait for events in */ lnet_ni_t *ln_eq_waitni; - cfs_list_t ln_zombie_nis; /* dying LND instances */ - int ln_nzombie_nis; /* # of NIs to wait for */ - - cfs_list_t ln_remote_nets; /* remote networks with routes to them */ - __u64 ln_remote_nets_version; /* validity stamp */ - - cfs_list_t ln_routers; /* list of all known routers */ - __u64 ln_routers_version; /* validity stamp */ - - int ln_routing; /* am I a router? */ - lnet_rtrbufpool_t ln_rtrpools[LNET_NRBPOOLS]; /* router buffer pools */ - - __u64 ln_interface_cookie; /* uniquely identifies this ni in this epoch */ - - char *ln_network_tokens; /* space for network names */ - int ln_network_tokens_nob; + /* remote networks with routes to them */ + struct list_head *ln_remote_nets_hash; + /* validity stamp */ + __u64 ln_remote_nets_version; + /* list of all known routers */ + struct list_head ln_routers; + /* validity stamp */ + __u64 ln_routers_version; + /* percpt router buffer pools */ + lnet_rtrbufpool_t **ln_rtrpools; + + lnet_handle_md_t ln_ping_target_md; + lnet_handle_eq_t ln_ping_target_eq; + lnet_ping_info_t *ln_ping_info; - int ln_testprotocompat; /* test protocol compatibility flags */ - - cfs_list_t ln_test_peers; /* failure simulation */ - - /* message container */ - struct lnet_peer_table *ln_peer_table; - struct lnet_msg_container ln_msg_container; - - lnet_handle_md_t ln_ping_target_md; - lnet_handle_eq_t ln_ping_target_eq; - lnet_ping_info_t *ln_ping_info; - -#ifdef __KERNEL__ - cfs_semaphore_t ln_rc_signal; /* serialise startup/shutdown */ -#endif /* router checker startup/shutdown state */ int ln_rc_state; /* router checker's event queue */ lnet_handle_eq_t ln_rc_eqh; /* rcd still pending on net */ - cfs_list_t ln_rcd_deathrow; + struct list_head ln_rcd_deathrow; /* rcd ready for free */ - cfs_list_t ln_rcd_zombie; + struct list_head ln_rcd_zombie; +#ifdef __KERNEL__ + /* serialise startup/shutdown */ + struct semaphore ln_rc_signal; - lnet_counters_t ln_counters; + struct mutex ln_api_mutex; + struct mutex ln_lnd_mutex; +#else +# ifndef HAVE_LIBPTHREAD + int ln_api_mutex; + int ln_lnd_mutex; +# else + pthread_mutex_t ln_api_mutex; + pthread_mutex_t ln_lnd_mutex; +# endif +#endif + int ln_init; /* LNetInit() called? */ + /* Have I called LNetNIInit myself? */ + int ln_niinit_self; + /* LNetNIInit/LNetNIFini counter */ + int ln_refcount; + /* shutdown in progress */ + int ln_shutdown; -#ifndef __KERNEL__ - /* Temporary workaround to allow uOSS and test programs force - * server mode in userspace. The only place where we use it is - * lnet_prepare(). The only way to turn this flag on is to - * call lnet_server_mode() */ + int ln_routing; /* am I a router? */ + lnet_pid_t ln_pid; /* requested pid */ + /* uniquely identifies this ni in this epoch */ + __u64 ln_interface_cookie; + /* registered LNDs */ + struct list_head ln_lnds; - int ln_server_mode_flag; + /* test protocol compatibility flags */ + int ln_testprotocompat; + +#ifndef __KERNEL__ + /* Temporary workaround to allow uOSS and test programs force + * server mode in userspace. The only place where we use it is + * lnet_prepare(). The only way to turn this flag on is to + * call lnet_server_mode() */ + int ln_server_mode_flag; #endif } lnet_t;