X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Finclude%2Flnet%2Flib-types.h;h=ed52e0a5e4f8c086d1e9e9f2b820707fcef60cc8;hp=fdb17844e466704586d6a58848f5c928e575451f;hb=0131d39a622f1efc07dc49df7bceed1bbe16357d;hpb=6b1571209a9938719b081465f1ee327380a70554 diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index fdb1784..ed52e0a 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lnet/include/lnet/lib-types.h * @@ -46,20 +45,24 @@ #include #include #include +#include #include #include +#include /* Max payload size */ #define LNET_MAX_PAYLOAD LNET_MTU -#define LNET_MAX_IOV (LNET_MAX_PAYLOAD >> PAGE_SHIFT) +/** limit on the number of fragments in discontiguous MDs */ +#define LNET_MAX_IOV 256 /* * This is the maximum health value. * All local and peer NIs created have their health default to this value. */ #define LNET_MAX_HEALTH_VALUE 1000 +#define LNET_MAX_SELECTION_PRIORITY UINT_MAX /* forward refs */ struct lnet_libmd; @@ -75,7 +78,8 @@ enum lnet_msg_hstatus { LNET_MSG_STATUS_REMOTE_ERROR, LNET_MSG_STATUS_REMOTE_DROPPED, LNET_MSG_STATUS_REMOTE_TIMEOUT, - LNET_MSG_STATUS_NETWORK_TIMEOUT + LNET_MSG_STATUS_NETWORK_TIMEOUT, + LNET_MSG_STATUS_END, }; struct lnet_rsp_tracker { @@ -83,6 +87,8 @@ struct lnet_rsp_tracker { struct list_head rspt_on_list; /* cpt to lock */ int rspt_cpt; + /* nid of next hop */ + lnet_nid_t rspt_next_hop_nid; /* deadline of the REPLY/ACK */ ktime_t rspt_deadline; /* parent MD */ @@ -163,8 +169,7 @@ struct lnet_msg { unsigned int msg_wanted; unsigned int msg_offset; unsigned int msg_niov; - struct kvec *msg_iov; - lnet_kiov_t *msg_kiov; + struct bio_vec *msg_kiov; struct lnet_event msg_ev; struct lnet_hdr msg_hdr; @@ -178,20 +183,9 @@ struct lnet_libhandle { #define lh_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) -struct lnet_eq { - struct list_head eq_list; - struct lnet_libhandle eq_lh; - unsigned long eq_enq_seq; - unsigned long eq_deq_seq; - unsigned int eq_size; - lnet_eq_handler_t eq_callback; - struct lnet_event *eq_events; - int **eq_refs; /* percpt refcount for EQ */ -}; - struct lnet_me { struct list_head me_list; - struct lnet_libhandle me_lh; + int me_cpt; struct lnet_process_id me_match_id; unsigned int me_portal; unsigned int me_pos; /* hash offset in mt_hash */ @@ -216,17 +210,24 @@ struct lnet_libmd { unsigned int md_niov; /* # frags at end of struct */ void *md_user_ptr; struct lnet_rsp_tracker *md_rspt_ptr; - struct lnet_eq *md_eq; + lnet_handler_t md_handler; struct lnet_handle_md md_bulk_handle; - union { - struct kvec iov[LNET_MAX_IOV]; - lnet_kiov_t kiov[LNET_MAX_IOV]; - } md_iov; + struct bio_vec md_kiov[LNET_MAX_IOV]; }; -#define LNET_MD_FLAG_ZOMBIE (1 << 0) -#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) -#define LNET_MD_FLAG_ABORTED (1 << 2) +#define LNET_MD_FLAG_ZOMBIE BIT(0) +#define LNET_MD_FLAG_AUTO_UNLINK BIT(1) +#define LNET_MD_FLAG_ABORTED BIT(2) +/* LNET_MD_FLAG_HANDLING is set when a non-unlink event handler + * is being called for an event relating to the md. + * It ensures only one such handler runs at a time. + * The final "unlink" event is only called once the + * md_refcount has reached zero, and this flag has been cleared, + * ensuring that it doesn't race with any other event handler + * call. + */ +#define LNET_MD_FLAG_HANDLING BIT(3) +#define LNET_MD_FLAG_DISCARD BIT(4) struct lnet_test_peer { /* info about peers we are trying to fail */ @@ -241,14 +242,25 @@ struct lnet_test_peer { #define LNET_COOKIE_TYPE_BITS 2 #define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) +struct netstrfns { + u32 nf_type; + char *nf_name; + char *nf_modname; + void (*nf_addr2str)(u32 addr, char *str, size_t size); + int (*nf_str2addr)(const char *str, int nob, u32 *addr); + int (*nf_parse_addrlist)(char *str, int len, + struct list_head *list); + int (*nf_print_addrlist)(char *buffer, int count, + struct list_head *list); + int (*nf_match_addr)(u32 addr, struct list_head *list); + int (*nf_min_max)(struct list_head *nidlist, u32 *min_nid, + u32 *max_nid); +}; + struct lnet_ni; /* forward ref */ struct socket; struct lnet_lnd { - /* fields managed by portals */ - struct list_head lnd_list; /* stash in the LND table */ - int lnd_refcount; /* # active instances */ - /* fields initialized by the LND */ __u32 lnd_type; @@ -257,11 +269,7 @@ struct lnet_lnd { int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); /* In data movement APIs below, payload buffers are described as a set - * of 'niov' fragments which are... - * EITHER - * in virtual memory (struct kvec *iov != NULL) - * OR - * in pages (kernel only: plt_kiov_t *kiov != NULL). + * of 'niov' fragments which are in pages. * The LND may NOT overwrite these fragment descriptors. * An 'offset' and may specify a byte offset within the set of * fragments to start from @@ -282,7 +290,7 @@ struct lnet_lnd { * credit if the LND does flow control. */ int (*lnd_recv)(struct lnet_ni *ni, void *private, struct lnet_msg *msg, int delayed, unsigned int niov, - struct kvec *iov, lnet_kiov_t *kiov, + struct bio_vec *kiov, unsigned int offset, unsigned int mlen, unsigned int rlen); /* lnet_parse() has had to delay processing of this message @@ -294,11 +302,8 @@ struct lnet_lnd { int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, struct lnet_msg *msg, void **new_privatep); - /* notification of peer health */ - void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); - - /* query of peer aliveness */ - void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, time64_t *when); + /* notification of peer down */ + void (*lnd_notify_peer_down)(lnet_nid_t peer); /* accept a new connection */ int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock); @@ -322,11 +327,17 @@ enum lnet_net_state { LNET_NET_STATE_DELETING }; -#define LNET_NI_STATE_INIT (1 << 0) -#define LNET_NI_STATE_ACTIVE (1 << 1) -#define LNET_NI_STATE_FAILED (1 << 2) -#define LNET_NI_STATE_RECOVERY_PENDING (1 << 3) -#define LNET_NI_STATE_DELETING (1 << 4) +enum lnet_ni_state { + /* initial state when NI is created */ + LNET_NI_STATE_INIT = 0, + /* set when NI is brought up */ + LNET_NI_STATE_ACTIVE, + /* set when NI is being shutdown */ + LNET_NI_STATE_DELETING, +}; + +#define LNET_NI_RECOVERY_PENDING BIT(0) +#define LNET_NI_RECOVERY_FAILED BIT(1) enum lnet_stats_type { LNET_STATS_TYPE_SEND = 0, @@ -348,6 +359,22 @@ struct lnet_element_stats { struct lnet_comm_count el_drop_stats; }; +struct lnet_health_local_stats { + atomic_t hlt_local_interrupt; + atomic_t hlt_local_dropped; + atomic_t hlt_local_aborted; + atomic_t hlt_local_no_route; + atomic_t hlt_local_timeout; + atomic_t hlt_local_error; +}; + +struct lnet_health_remote_stats { + atomic_t hlt_remote_dropped; + atomic_t hlt_remote_timeout; + atomic_t hlt_remote_error; + atomic_t hlt_network_timeout; +}; + struct lnet_net { /* chain on the ln_nets */ struct list_head net_list; @@ -358,8 +385,8 @@ struct lnet_net { * lnet/include/lnet/nidstr.h */ __u32 net_id; - /* priority of the network */ - __u32 net_prio; + /* round robin selection */ + __u32 net_seq; /* total number of CPTs in the array */ __u32 net_ncpts; @@ -367,6 +394,9 @@ struct lnet_net { /* cumulative CPTs of all NIs in this net */ __u32 *net_cpts; + /* relative net selection priority */ + __u32 net_sel_priority; + /* network tunables */ struct lnet_ioctl_config_lnd_cmn_tunables net_tunables; @@ -377,7 +407,7 @@ struct lnet_net { bool net_tunables_set; /* procedural interface */ - struct lnet_lnd *net_lnd; + const struct lnet_lnd *net_lnd; /* list of NIs on this net */ struct list_head net_ni_list; @@ -388,17 +418,20 @@ struct lnet_net { /* dying LND instances */ struct list_head net_ni_zombie; - /* network state */ - enum lnet_net_state net_state; + /* when I was last alive */ + time64_t net_last_alive; + + /* protects access to net_last_alive */ + spinlock_t net_lock; + + /* list of router nids preferred for this network */ + struct list_head net_rtr_pref_nids; }; struct lnet_ni { /* chain on the lnet_net structure */ struct list_head ni_netlist; - /* chain on net_ni_cpt */ - struct list_head ni_cptlist; - /* chain on the recovery queue */ struct list_head ni_recovery; @@ -428,17 +461,24 @@ struct lnet_ni { /* percpt reference count */ int **ni_refs; - /* when I was last alive */ - time64_t ni_last_alive; - /* pointer to parent network */ struct lnet_net *ni_net; /* my health status */ struct lnet_ni_status *ni_status; - /* NI FSM */ - __u32 ni_state; + /* NI FSM. Protected by lnet_ni_lock() */ + enum lnet_ni_state ni_state; + + /* Recovery state. Protected by lnet_ni_lock() */ + __u32 ni_recovery_state; + + /* When to send the next recovery ping */ + time64_t ni_next_ping; + /* How many pings sent during current recovery period did not receive + * a reply. NB: reset whenever _any_ message arrives on this NI + */ + unsigned int ni_ping_count; /* per NI LND tunables */ struct lnet_lnd_tunables ni_lnd_tunables; @@ -448,6 +488,7 @@ struct lnet_ni { /* NI statistics */ struct lnet_element_stats ni_stats; + struct lnet_health_local_stats ni_hstats; /* physical device CPT */ int ni_dev_cpt; @@ -471,11 +512,13 @@ struct lnet_ni { */ atomic_t ni_fatal_error_on; + /* the relative selection priority of this NI */ + __u32 ni_sel_priority; + /* - * equivalent interfaces to use - * This is an array because socklnd bonding can still be configured + * equivalent interface to use */ - char *ni_interfaces[LNET_INTERFACES_NUM]; + char *ni_interface; struct net *ni_net_ns; /* original net namespace */ }; @@ -490,6 +533,7 @@ struct lnet_ni { struct lnet_ping_buffer { int pb_nnis; atomic_t pb_refcnt; + bool pb_needs_post; struct lnet_ping_info pb_info; }; @@ -501,14 +545,9 @@ struct lnet_ping_buffer { #define LNET_PING_INFO_TO_BUFFER(PINFO) \ container_of((PINFO), struct lnet_ping_buffer, pb_info) -/* router checker data, per router */ -struct lnet_rc_data { - /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ - struct list_head rcd_list; - struct lnet_handle_md rcd_mdh; /* ping buffer MD */ - struct lnet_peer_ni *rcd_gateway; /* reference to gateway */ - struct lnet_ping_buffer *rcd_pingbuffer;/* ping buffer */ - int rcd_nnis; /* desired size of buffer */ +struct lnet_nid_list { + struct list_head nl_list; + lnet_nid_t nl_nid; }; struct lnet_peer_ni { @@ -522,91 +561,78 @@ struct lnet_peer_ni { struct list_head lpni_hashlist; /* messages blocking for tx credits */ struct list_head lpni_txq; - /* messages blocking for router credits */ - struct list_head lpni_rtrq; - /* chain on router list */ - struct list_head lpni_rtr_list; /* pointer to peer net I'm part of */ struct lnet_peer_net *lpni_peer_net; /* statistics kept on each peer NI */ struct lnet_element_stats lpni_stats; - /* spin lock protecting credits and lpni_txq / lpni_rtrq */ + struct lnet_health_remote_stats lpni_hstats; + /* spin lock protecting credits and lpni_txq */ spinlock_t lpni_lock; /* # tx credits available */ int lpni_txcredits; /* low water mark */ int lpni_mintxcredits; + /* + * Each peer_ni in a gateway maintains its own credits. This + * allows more traffic to gateways that have multiple interfaces. + */ /* # router credits */ int lpni_rtrcredits; /* low water mark */ int lpni_minrtrcredits; /* bytes queued for sending */ long lpni_txqnob; - /* alive/dead? */ - bool lpni_alive; - /* notification outstanding? */ - bool lpni_notify; - /* outstanding notification for LND? */ - bool lpni_notifylnd; - /* some thread is handling notification */ - bool lpni_notifying; - /* SEND event outstanding from ping */ - bool lpni_ping_notsent; - /* # times router went dead<->alive. Protected with lpni_lock */ - int lpni_alive_count; - /* time of last aliveness news */ - time64_t lpni_timestamp; - /* time of last ping attempt */ - time64_t lpni_ping_timestamp; - /* != 0 if ping reply expected */ - time64_t lpni_ping_deadline; - /* when I was last alive */ - time64_t lpni_last_alive; - /* when lpni_ni was queried last time */ - time64_t lpni_last_query; /* network peer is on */ struct lnet_net *lpni_net; /* peer's NID */ lnet_nid_t lpni_nid; /* # refs */ - atomic_t lpni_refcount; + struct kref lpni_kref; /* health value for the peer */ atomic_t lpni_healthv; /* recovery ping mdh */ struct lnet_handle_md lpni_recovery_ping_mdh; + /* When to send the next recovery ping */ + time64_t lpni_next_ping; + /* How many pings sent during current recovery period did not receive + * a reply. NB: reset whenever _any_ message arrives from this peer NI + */ + unsigned int lpni_ping_count; /* CPT this peer attached on */ int lpni_cpt; /* state flags -- protected by lpni_lock */ unsigned lpni_state; - /* # refs from lnet_route_t::lr_gateway */ - int lpni_rtr_refcount; + /* status of the peer NI as reported by the peer */ + __u32 lpni_ns_status; /* sequence number used to round robin over peer nis within a net */ __u32 lpni_seq; /* sequence number used to round robin over gateways */ __u32 lpni_gw_seq; - /* health flag */ - bool lpni_healthy; /* returned RC ping features. Protected with lpni_lock */ unsigned int lpni_ping_feats; - /* routes on this peer */ - struct list_head lpni_routes; + /* time last message was received from the peer */ + time64_t lpni_last_alive; /* preferred local nids: if only one, use lpni_pref.nid */ union lpni_pref { lnet_nid_t nid; - lnet_nid_t *nids; + struct list_head nids; } lpni_pref; + /* list of router nids preferred for this peer NI */ + struct list_head lpni_rtr_pref_nids; + /* The relative selection priority of this peer NI */ + __u32 lpni_sel_priority; /* number of preferred NIDs in lnpi_pref_nids */ __u32 lpni_pref_nnids; - /* router checker state */ - struct lnet_rc_data *lpni_rcd; }; /* Preferred path added due to traffic on non-MR peer_ni */ -#define LNET_PEER_NI_NON_MR_PREF (1 << 0) +#define LNET_PEER_NI_NON_MR_PREF BIT(0) /* peer is being recovered. */ -#define LNET_PEER_NI_RECOVERY_PENDING (1 << 1) +#define LNET_PEER_NI_RECOVERY_PENDING BIT(1) +/* recovery ping failed */ +#define LNET_PEER_NI_RECOVERY_FAILED BIT(2) /* peer is being deleted */ -#define LNET_PEER_NI_DELETING (1 << 2) +#define LNET_PEER_NI_DELETING BIT(3) struct lnet_peer { /* chain on pt_peer_list */ @@ -618,19 +644,43 @@ struct lnet_peer { /* list of messages pending discovery*/ struct list_head lp_dc_pendq; + /* chain on router list */ + struct list_head lp_rtr_list; + /* primary NID of the peer */ lnet_nid_t lp_primary_nid; + /* source NID to use during discovery */ + lnet_nid_t lp_disc_src_nid; + + /* net to perform discovery on */ + __u32 lp_disc_net_id; + /* CPT of peer_table */ int lp_cpt; /* number of NIDs on this peer */ int lp_nnis; + /* # refs from lnet_route::lr_gateway */ + int lp_rtr_refcount; + + /* + * peer specific health sensitivity value to decrement peer nis in + * this peer with if set to something other than 0 + */ + __u32 lp_health_sensitivity; + + /* messages blocking for router credits */ + struct list_head lp_rtrq; + + /* routes on this peer */ + struct list_head lp_routes; + /* reference count */ atomic_t lp_refcount; - /* lock protecting peer state flags */ + /* lock protecting peer state flags and lpni_rtrq */ spinlock_t lp_lock; /* peer state flags */ @@ -674,6 +724,9 @@ struct lnet_peer { /* tasks waiting on discovery of this peer */ wait_queue_head_t lp_dc_waitq; + + /* cached peer aliveness */ + bool lp_alive; }; /* @@ -685,9 +738,13 @@ struct lnet_peer { * * A peer is marked NO_DISCOVERY if the LNET_PING_FEAT_DISCOVERY bit was * NOT set when the peer was pinged by discovery. + * + * A peer is marked ROUTER if it indicates so in the feature bit. */ -#define LNET_PEER_MULTI_RAIL (1 << 0) /* Multi-rail aware */ -#define LNET_PEER_NO_DISCOVERY (1 << 1) /* Peer disabled discovery */ +#define LNET_PEER_MULTI_RAIL BIT(0) /* Multi-rail aware */ +#define LNET_PEER_NO_DISCOVERY BIT(1) /* Peer disabled discovery */ +#define LNET_PEER_ROUTER_ENABLED BIT(2) /* router feature enabled */ + /* * A peer is marked CONFIGURED if it was configured by DLC. * @@ -701,28 +758,41 @@ struct lnet_peer { * A peer that was created as the result of inbound traffic will not * be marked at all. */ -#define LNET_PEER_CONFIGURED (1 << 2) /* Configured via DLC */ -#define LNET_PEER_DISCOVERED (1 << 3) /* Peer was discovered */ -#define LNET_PEER_REDISCOVER (1 << 4) /* Discovery was disabled */ +#define LNET_PEER_CONFIGURED BIT(3) /* Configured via DLC */ +#define LNET_PEER_DISCOVERED BIT(4) /* Peer was discovered */ +#define LNET_PEER_REDISCOVER BIT(5) /* Discovery was disabled */ /* * A peer is marked DISCOVERING when discovery is in progress. * The other flags below correspond to stages of discovery. */ -#define LNET_PEER_DISCOVERING (1 << 5) /* Discovering */ -#define LNET_PEER_DATA_PRESENT (1 << 6) /* Remote peer data present */ -#define LNET_PEER_NIDS_UPTODATE (1 << 7) /* Remote peer info uptodate */ -#define LNET_PEER_PING_SENT (1 << 8) /* Waiting for REPLY to Ping */ -#define LNET_PEER_PUSH_SENT (1 << 9) /* Waiting for ACK of Push */ -#define LNET_PEER_PING_FAILED (1 << 10) /* Ping send failure */ -#define LNET_PEER_PUSH_FAILED (1 << 11) /* Push send failure */ +#define LNET_PEER_DISCOVERING BIT(6) /* Discovering */ +#define LNET_PEER_DATA_PRESENT BIT(7) /* Remote peer data present */ +#define LNET_PEER_NIDS_UPTODATE BIT(8) /* Remote peer info uptodate */ +#define LNET_PEER_PING_SENT BIT(9) /* Waiting for REPLY to Ping */ +#define LNET_PEER_PUSH_SENT BIT(10) /* Waiting for ACK of Push */ +#define LNET_PEER_PING_FAILED BIT(11) /* Ping send failure */ +#define LNET_PEER_PUSH_FAILED BIT(12) /* Push send failure */ /* * A ping can be forced as a way to fix up state, or as a manual * intervention by an admin. * A push can be forced in circumstances that would normally not * allow for one to happen. */ -#define LNET_PEER_FORCE_PING (1 << 12) /* Forced Ping */ -#define LNET_PEER_FORCE_PUSH (1 << 13) /* Forced Push */ +#define LNET_PEER_FORCE_PING BIT(13) /* Forced Ping */ +#define LNET_PEER_FORCE_PUSH BIT(14) /* Forced Push */ + +/* force delete even if router */ +#define LNET_PEER_RTR_NI_FORCE_DEL BIT(15) + +/* gw undergoing alive discovery */ +#define LNET_PEER_RTR_DISCOVERY BIT(16) +/* gw has undergone discovery (does not indicate success or failure) */ +#define LNET_PEER_RTR_DISCOVERED BIT(17) + +/* peer is marked for deletion */ +#define LNET_PEER_MARK_DELETION BIT(18) +/* lnet_peer_del()/lnet_peer_del_locked() has been called on the peer */ +#define LNET_PEER_MARK_DELETED BIT(19) struct lnet_peer_net { /* chain on lp_peer_nets */ @@ -737,6 +807,18 @@ struct lnet_peer_net { /* Net ID */ __u32 lpn_net_id; + /* peer net health */ + int lpn_healthv; + + /* time of next router ping on this net */ + time64_t lpn_next_ping; + + /* selection sequence number */ + __u32 lpn_seq; + + /* relative peer net selection priority */ + __u32 lpn_sel_priority; + /* reference count */ atomic_t lpn_refcount; }; @@ -750,7 +832,6 @@ struct lnet_peer_net { * * protected by lnet_net_lock/EX for update * pt_version - * pt_number * pt_hash[...] * pt_peer_list * pt_peers @@ -762,7 +843,6 @@ struct lnet_peer_net { */ struct lnet_peer_table { int pt_version; /* /proc validity stamp */ - int pt_number; /* # peers_ni extant */ struct list_head *pt_hash; /* NID->peer hash */ struct list_head pt_peer_list; /* peers */ int pt_peers; /* # peers */ @@ -781,12 +861,15 @@ struct lnet_peer_table { struct lnet_route { struct list_head lr_list; /* chain on net */ struct list_head lr_gwlist; /* chain on gateway */ - struct lnet_peer_ni *lr_gateway; /* router node */ + struct lnet_peer *lr_gateway; /* router node */ + lnet_nid_t lr_nid; /* NID used to add route */ __u32 lr_net; /* remote network number */ + __u32 lr_lnet; /* local network number */ int lr_seq; /* sequence for round-robin */ - unsigned int lr_downis; /* number of down NIs */ __u32 lr_hops; /* how far I am */ unsigned int lr_priority; /* route priority */ + atomic_t lr_alive; /* cached route aliveness */ + bool lr_single_hop; /* this route is single-hop */ }; #define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) @@ -829,28 +912,28 @@ struct lnet_rtrbufpool { struct lnet_rtrbuf { struct list_head rb_list; /* chain on rbp_bufs */ struct lnet_rtrbufpool *rb_pool; /* owning pool */ - lnet_kiov_t rb_kiov[0]; /* the buffer space */ + struct bio_vec rb_kiov[0]; /* the buffer space */ }; #define LNET_PEER_HASHSIZE 503 /* prime! */ enum lnet_match_flags { /* Didn't match anything */ - LNET_MATCHMD_NONE = (1 << 0), + LNET_MATCHMD_NONE = BIT(0), /* Matched OK */ - LNET_MATCHMD_OK = (1 << 1), + LNET_MATCHMD_OK = BIT(1), /* Must be discarded */ - LNET_MATCHMD_DROP = (1 << 2), + LNET_MATCHMD_DROP = BIT(2), /* match and buffer is exhausted */ - LNET_MATCHMD_EXHAUSTED = (1 << 3), + LNET_MATCHMD_EXHAUSTED = BIT(3), /* match or drop */ LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), }; /* Options for struct lnet_portal::ptl_options */ -#define LNET_PTL_LAZY (1 << 0) -#define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */ -#define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, request portal */ +#define LNET_PTL_LAZY BIT(0) +#define LNET_PTL_MATCH_UNIQUE BIT(1) /* unique match, for RDMA */ +#define LNET_PTL_MATCH_WILDCARD BIT(2) /* wildcard match, request portal */ /* parameter for matching operations (GET, PUT) */ struct lnet_match_info { @@ -938,9 +1021,56 @@ struct lnet_msg_container { int msc_nfinalizers; /* msgs waiting to complete finalizing */ struct list_head msc_finalizing; + /* msgs waiting to be resent */ + struct list_head msc_resending; struct list_head msc_active; /* active message list */ /* threads doing finalization */ void **msc_finalizers; + /* threads doing resends */ + void **msc_resenders; +}; + +/* This UDSP structures need to match the user space liblnetconfig structures + * in order for the marshall and unmarshall functions to be common. + */ + +/* Net is described as a + * 1. net type + * 2. num range + */ +struct lnet_ud_net_descr { + __u32 udn_net_type; + struct list_head udn_net_num_range; +}; + +/* each NID range is defined as + * 1. net descriptor + * 2. address range descriptor + */ +struct lnet_ud_nid_descr { + struct lnet_ud_net_descr ud_net_id; + struct list_head ud_addr_range; + __u32 ud_mem_size; +}; + +/* a UDSP rule can have up to three user defined NID descriptors + * - src: defines the local NID range for the rule + * - dst: defines the peer NID range for the rule + * - rte: defines the router NID range for the rule + * + * An action union defines the action to take when the rule + * is matched + */ +struct lnet_udsp { + struct list_head udsp_on_list; + __u32 udsp_idx; + struct lnet_ud_nid_descr udsp_src; + struct lnet_ud_nid_descr udsp_dst; + struct lnet_ud_nid_descr udsp_rte; + enum lnet_udsp_action_type udsp_action_type; + union { + __u32 udsp_priority; + } udsp_action; }; /* Peer Discovery states */ @@ -971,14 +1101,11 @@ struct lnet { int ln_nportals; /* the vector of portals */ struct lnet_portal **ln_portals; - /* percpt ME containers */ - struct lnet_res_container **ln_me_containers; /* percpt MD container */ struct lnet_res_container **ln_md_containers; /* Event Queue container */ struct lnet_res_container ln_eq_container; - wait_queue_head_t ln_eq_waitq; spinlock_t ln_eq_wait_lock; unsigned int ln_remote_nets_hbits; @@ -1025,7 +1152,7 @@ struct lnet { * ln_api_mutex. */ struct lnet_handle_md ln_ping_target_md; - struct lnet_handle_eq ln_ping_target_eq; + lnet_handler_t ln_ping_target_handler; struct lnet_ping_buffer *ln_ping_target; atomic_t ln_ping_target_seqno; @@ -1037,13 +1164,13 @@ struct lnet { * buffer may linger a while after it has been unlinked, in * which case the event handler cleans up. */ - struct lnet_handle_eq ln_push_target_eq; + lnet_handler_t ln_push_target_handler; struct lnet_handle_md ln_push_target_md; struct lnet_ping_buffer *ln_push_target; int ln_push_target_nnis; /* discovery event queue handle */ - struct lnet_handle_eq ln_dc_eqh; + lnet_handler_t ln_dc_handler; /* discovery requests */ struct list_head ln_dc_request; /* discovery working list */ @@ -1057,12 +1184,6 @@ struct lnet { /* monitor thread startup/shutdown state */ int ln_mt_state; - /* router checker's event queue */ - struct lnet_handle_eq ln_rc_eqh; - /* rcd still pending on net */ - struct list_head ln_rcd_deathrow; - /* rcd ready for free */ - struct list_head ln_rcd_zombie; /* serialise startup/shutdown */ struct semaphore ln_mt_signal; @@ -1080,10 +1201,10 @@ struct lnet { /* uniquely identifies this ni in this epoch */ __u64 ln_interface_cookie; /* registered LNDs */ - struct list_head ln_lnds; + const struct lnet_lnd *ln_lnds[NUM_LNDS]; /* test protocol compatibility flags */ - int ln_testprotocompat; + unsigned long ln_testprotocompat; /* 0 - load the NIs from the mod params * 1 - do not load the NIs from the mod params @@ -1093,10 +1214,10 @@ struct lnet { bool ln_nis_from_mod_params; /* - * waitq for the monitor thread. The monitor thread takes care of + * completion for the monitor thread. The monitor thread takes care of * checking routes, timedout messages and resending messages. */ - wait_queue_head_t ln_mt_waitq; + struct completion ln_mt_wait_complete; /* per-cpt resend queues */ struct list_head **ln_mt_resendqs; @@ -1112,9 +1233,23 @@ struct lnet { * based on the mdh cookie. */ struct list_head **ln_mt_rstq; - /* recovery eq handler */ - struct lnet_handle_eq ln_mt_eqh; + /* + * A response tracker becomes a zombie when the associated MD is queued + * for unlink before the response tracker is detached from the MD. An + * entry on a zombie list can be freed when either the remaining + * operations on the MD complete or when LNet has shut down. + */ + struct list_head **ln_mt_zombie_rstqs; + /* recovery handler */ + lnet_handler_t ln_mt_handler; + /* + * Completed when the discovery and monitor threads can enter their + * work loops + */ + struct completion ln_started; + /* UDSP list */ + struct list_head ln_udsp_list; }; #endif