X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Finclude%2Flnet%2Flib-types.h;h=f4a64af6c8a6e6eec0133e8414b14d9ca43ba874;hp=c83d53792b1260aadb34a82697ae03304c4b71e1;hb=82835a1952dcb37e8aac8c9d0666016557ea58e7;hpb=086962e37737b4f61317b8b3ee87ab5527f89e96 diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index c83d537..f4a64af 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lnet/include/lnet/lib-types.h * @@ -46,20 +45,26 @@ #include #include #include +#include +#include +#include #include #include +#include /* Max payload size */ #define LNET_MAX_PAYLOAD LNET_MTU -#define LNET_MAX_IOV (LNET_MAX_PAYLOAD >> PAGE_SHIFT) +/** limit on the number of fragments in discontiguous MDs */ +#define LNET_MAX_IOV 256 /* * This is the maximum health value. * All local and peer NIs created have their health default to this value. */ #define LNET_MAX_HEALTH_VALUE 1000 +#define LNET_MAX_SELECTION_PRIORITY UINT_MAX /* forward refs */ struct lnet_libmd; @@ -85,7 +90,7 @@ struct lnet_rsp_tracker { /* cpt to lock */ int rspt_cpt; /* nid of next hop */ - lnet_nid_t rspt_next_hop_nid; + struct lnet_nid rspt_next_hop_nid; /* deadline of the REPLY/ACK */ ktime_t rspt_deadline; /* parent MD */ @@ -120,6 +125,8 @@ struct lnet_msg { enum lnet_msg_hstatus msg_health_status; /* This is a recovery message */ bool msg_recovery; + /* force an RDMA even if the message size is < 4K */ + bool msg_rdma_force; /* the number of times a transmission has been retried */ int msg_retry_count; /* flag to indicate that we do not want to resend this message */ @@ -166,8 +173,7 @@ struct lnet_msg { unsigned int msg_wanted; unsigned int msg_offset; unsigned int msg_niov; - struct kvec *msg_iov; - lnet_kiov_t *msg_kiov; + struct bio_vec *msg_kiov; struct lnet_event msg_ev; struct lnet_hdr msg_hdr; @@ -181,21 +187,10 @@ struct lnet_libhandle { #define lh_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) -struct lnet_eq { - struct list_head eq_list; - struct lnet_libhandle eq_lh; - unsigned long eq_enq_seq; - unsigned long eq_deq_seq; - unsigned int eq_size; - lnet_eq_handler_t eq_callback; - struct lnet_event *eq_events; - int **eq_refs; /* percpt refcount for EQ */ -}; - struct lnet_me { struct list_head me_list; - struct lnet_libhandle me_lh; - struct lnet_process_id me_match_id; + int me_cpt; + struct lnet_processid me_match_id; unsigned int me_portal; unsigned int me_pos; /* hash offset in mt_hash */ __u64 me_match_bits; @@ -219,22 +214,29 @@ struct lnet_libmd { unsigned int md_niov; /* # frags at end of struct */ void *md_user_ptr; struct lnet_rsp_tracker *md_rspt_ptr; - struct lnet_eq *md_eq; + lnet_handler_t md_handler; struct lnet_handle_md md_bulk_handle; - union { - struct kvec iov[LNET_MAX_IOV]; - lnet_kiov_t kiov[LNET_MAX_IOV]; - } md_iov; + struct bio_vec md_kiov[LNET_MAX_IOV]; }; -#define LNET_MD_FLAG_ZOMBIE (1 << 0) -#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) -#define LNET_MD_FLAG_ABORTED (1 << 2) +#define LNET_MD_FLAG_ZOMBIE BIT(0) +#define LNET_MD_FLAG_AUTO_UNLINK BIT(1) +#define LNET_MD_FLAG_ABORTED BIT(2) +/* LNET_MD_FLAG_HANDLING is set when a non-unlink event handler + * is being called for an event relating to the md. + * It ensures only one such handler runs at a time. + * The final "unlink" event is only called once the + * md_refcount has reached zero, and this flag has been cleared, + * ensuring that it doesn't race with any other event handler + * call. + */ +#define LNET_MD_FLAG_HANDLING BIT(3) +#define LNET_MD_FLAG_DISCARD BIT(4) struct lnet_test_peer { /* info about peers we are trying to fail */ struct list_head tp_list; /* ln_test_peers */ - lnet_nid_t tp_nid; /* matching nid */ + struct lnet_nid tp_nid; /* matching nid */ unsigned int tp_threshold; /* # failures to simulate */ }; @@ -244,14 +246,29 @@ struct lnet_test_peer { #define LNET_COOKIE_TYPE_BITS 2 #define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) +struct netstrfns { + u32 nf_type; + char *nf_name; + char *nf_modname; + void (*nf_addr2str)(u32 addr, char *str, size_t size); + void (*nf_addr2str_size)(const __be32 *addr, size_t asize, + char *str, size_t size); + int (*nf_str2addr)(const char *str, int nob, u32 *addr); + int (*nf_str2addr_size)(const char *str, int nob, + __be32 *addr, size_t *asize); + int (*nf_parse_addrlist)(char *str, int len, + struct list_head *list); + int (*nf_print_addrlist)(char *buffer, int count, + struct list_head *list); + int (*nf_match_addr)(u32 addr, struct list_head *list); + int (*nf_min_max)(struct list_head *nidlist, u32 *min_nid, + u32 *max_nid); +}; + struct lnet_ni; /* forward ref */ struct socket; struct lnet_lnd { - /* fields managed by portals */ - struct list_head lnd_list; /* stash in the LND table */ - int lnd_refcount; /* # active instances */ - /* fields initialized by the LND */ __u32 lnd_type; @@ -260,11 +277,7 @@ struct lnet_lnd { int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); /* In data movement APIs below, payload buffers are described as a set - * of 'niov' fragments which are... - * EITHER - * in virtual memory (struct kvec *iov != NULL) - * OR - * in pages (kernel only: plt_kiov_t *kiov != NULL). + * of 'niov' fragments which are in pages. * The LND may NOT overwrite these fragment descriptors. * An 'offset' and may specify a byte offset within the set of * fragments to start from @@ -285,7 +298,7 @@ struct lnet_lnd { * credit if the LND does flow control. */ int (*lnd_recv)(struct lnet_ni *ni, void *private, struct lnet_msg *msg, int delayed, unsigned int niov, - struct kvec *iov, lnet_kiov_t *kiov, + struct bio_vec *kiov, unsigned int offset, unsigned int mlen, unsigned int rlen); /* lnet_parse() has had to delay processing of this message @@ -300,11 +313,12 @@ struct lnet_lnd { /* notification of peer down */ void (*lnd_notify_peer_down)(lnet_nid_t peer); - /* query of peer aliveness */ - void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, time64_t *when); - /* accept a new connection */ int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock); + + /* get dma_dev priority */ + unsigned int (*lnd_get_dev_prio)(struct lnet_ni *ni, + unsigned int dev_idx); }; struct lnet_tx_queue { @@ -383,8 +397,8 @@ struct lnet_net { * lnet/include/lnet/nidstr.h */ __u32 net_id; - /* priority of the network */ - __u32 net_prio; + /* round robin selection */ + __u32 net_seq; /* total number of CPTs in the array */ __u32 net_ncpts; @@ -392,6 +406,9 @@ struct lnet_net { /* cumulative CPTs of all NIs in this net */ __u32 *net_cpts; + /* relative net selection priority */ + __u32 net_sel_priority; + /* network tunables */ struct lnet_ioctl_config_lnd_cmn_tunables net_tunables; @@ -402,7 +419,7 @@ struct lnet_net { bool net_tunables_set; /* procedural interface */ - struct lnet_lnd *net_lnd; + const struct lnet_lnd *net_lnd; /* list of NIs on this net */ struct list_head net_ni_list; @@ -413,8 +430,14 @@ struct lnet_net { /* dying LND instances */ struct list_head net_ni_zombie; - /* network state */ - enum lnet_net_state net_state; + /* when I was last alive */ + time64_t net_last_alive; + + /* protects access to net_last_alive */ + spinlock_t net_lock; + + /* list of router nids preferred for this network */ + struct list_head net_rtr_pref_nids; }; struct lnet_ni { @@ -436,7 +459,7 @@ struct lnet_ni { __u32 *ni_cpts; /* interface's NID */ - lnet_nid_t ni_nid; + struct lnet_nid ni_nid; /* instance-specific data */ void *ni_data; @@ -450,9 +473,6 @@ struct lnet_ni { /* percpt reference count */ int **ni_refs; - /* when I was last alive */ - time64_t ni_last_alive; - /* pointer to parent network */ struct lnet_net *ni_net; @@ -465,6 +485,13 @@ struct lnet_ni { /* Recovery state. Protected by lnet_ni_lock() */ __u32 ni_recovery_state; + /* When to send the next recovery ping */ + time64_t ni_next_ping; + /* How many pings sent during current recovery period did not receive + * a reply. NB: reset whenever _any_ message arrives on this NI + */ + unsigned int ni_ping_count; + /* per NI LND tunables */ struct lnet_lnd_tunables ni_lnd_tunables; @@ -497,11 +524,13 @@ struct lnet_ni { */ atomic_t ni_fatal_error_on; + /* the relative selection priority of this NI */ + __u32 ni_sel_priority; + /* - * equivalent interfaces to use - * This is an array because socklnd bonding can still be configured + * equivalent interface to use */ - char *ni_interfaces[LNET_INTERFACES_NUM]; + char *ni_interface; struct net *ni_net_ns; /* original net namespace */ }; @@ -516,6 +545,7 @@ struct lnet_ni { struct lnet_ping_buffer { int pb_nnis; atomic_t pb_refcnt; + bool pb_needs_post; struct lnet_ping_info pb_info; }; @@ -527,6 +557,11 @@ struct lnet_ping_buffer { #define LNET_PING_INFO_TO_BUFFER(PINFO) \ container_of((PINFO), struct lnet_ping_buffer, pb_info) +struct lnet_nid_list { + struct list_head nl_list; + struct lnet_nid nl_nid; +}; + struct lnet_peer_ni { /* chain on lpn_peer_nis */ struct list_head lpni_peer_nis; @@ -562,13 +597,19 @@ struct lnet_peer_ni { /* network peer is on */ struct lnet_net *lpni_net; /* peer's NID */ - lnet_nid_t lpni_nid; + struct lnet_nid lpni_nid; /* # refs */ - atomic_t lpni_refcount; + struct kref lpni_kref; /* health value for the peer */ atomic_t lpni_healthv; /* recovery ping mdh */ struct lnet_handle_md lpni_recovery_ping_mdh; + /* When to send the next recovery ping */ + time64_t lpni_next_ping; + /* How many pings sent during current recovery period did not receive + * a reply. NB: reset whenever _any_ message arrives from this peer NI + */ + unsigned int lpni_ping_count; /* CPT this peer attached on */ int lpni_cpt; /* state flags -- protected by lpni_lock */ @@ -581,23 +622,29 @@ struct lnet_peer_ni { __u32 lpni_gw_seq; /* returned RC ping features. Protected with lpni_lock */ unsigned int lpni_ping_feats; + /* time last message was received from the peer */ + time64_t lpni_last_alive; /* preferred local nids: if only one, use lpni_pref.nid */ union lpni_pref { - lnet_nid_t nid; - lnet_nid_t *nids; + struct lnet_nid nid; + struct list_head nids; } lpni_pref; + /* list of router nids preferred for this peer NI */ + struct list_head lpni_rtr_pref_nids; + /* The relative selection priority of this peer NI */ + __u32 lpni_sel_priority; /* number of preferred NIDs in lnpi_pref_nids */ __u32 lpni_pref_nnids; }; /* Preferred path added due to traffic on non-MR peer_ni */ -#define LNET_PEER_NI_NON_MR_PREF (1 << 0) +#define LNET_PEER_NI_NON_MR_PREF BIT(0) /* peer is being recovered. */ -#define LNET_PEER_NI_RECOVERY_PENDING (1 << 1) +#define LNET_PEER_NI_RECOVERY_PENDING BIT(1) /* recovery ping failed */ -#define LNET_PEER_NI_RECOVERY_FAILED (1 << 2) +#define LNET_PEER_NI_RECOVERY_FAILED BIT(2) /* peer is being deleted */ -#define LNET_PEER_NI_DELETING (1 << 3) +#define LNET_PEER_NI_DELETING BIT(3) struct lnet_peer { /* chain on pt_peer_list */ @@ -613,7 +660,15 @@ struct lnet_peer { struct list_head lp_rtr_list; /* primary NID of the peer */ - lnet_nid_t lp_primary_nid; + struct lnet_nid lp_primary_nid; + + /* source NID to use during discovery */ + struct lnet_nid lp_disc_src_nid; + /* destination NID to use during discovery */ + struct lnet_nid lp_disc_dst_nid; + + /* net to perform discovery on */ + __u32 lp_disc_net_id; /* CPT of peer_table */ int lp_cpt; @@ -621,18 +676,21 @@ struct lnet_peer { /* number of NIDs on this peer */ int lp_nnis; - /* # refs from lnet_route_t::lr_gateway */ + /* # refs from lnet_route::lr_gateway */ int lp_rtr_refcount; + /* + * peer specific health sensitivity value to decrement peer nis in + * this peer with if set to something other than 0 + */ + __u32 lp_health_sensitivity; + /* messages blocking for router credits */ struct list_head lp_rtrq; /* routes on this peer */ struct list_head lp_routes; - /* time of last router check attempt */ - time64_t lp_rtrcheck_timestamp; - /* reference count */ atomic_t lp_refcount; @@ -680,6 +738,9 @@ struct lnet_peer { /* tasks waiting on discovery of this peer */ wait_queue_head_t lp_dc_waitq; + + /* cached peer aliveness */ + bool lp_alive; }; /* @@ -694,9 +755,9 @@ struct lnet_peer { * * A peer is marked ROUTER if it indicates so in the feature bit. */ -#define LNET_PEER_MULTI_RAIL (1 << 0) /* Multi-rail aware */ -#define LNET_PEER_NO_DISCOVERY (1 << 1) /* Peer disabled discovery */ -#define LNET_PEER_ROUTER_ENABLED (1 << 2) /* router feature enabled */ +#define LNET_PEER_MULTI_RAIL BIT(0) /* Multi-rail aware */ +#define LNET_PEER_NO_DISCOVERY BIT(1) /* Peer disabled discovery */ +#define LNET_PEER_ROUTER_ENABLED BIT(2) /* router feature enabled */ /* * A peer is marked CONFIGURED if it was configured by DLC. @@ -711,34 +772,48 @@ struct lnet_peer { * A peer that was created as the result of inbound traffic will not * be marked at all. */ -#define LNET_PEER_CONFIGURED (1 << 3) /* Configured via DLC */ -#define LNET_PEER_DISCOVERED (1 << 4) /* Peer was discovered */ -#define LNET_PEER_REDISCOVER (1 << 5) /* Discovery was disabled */ +#define LNET_PEER_CONFIGURED BIT(3) /* Configured via DLC */ +#define LNET_PEER_DISCOVERED BIT(4) /* Peer was discovered */ +#define LNET_PEER_REDISCOVER BIT(5) /* Discovery was disabled */ /* * A peer is marked DISCOVERING when discovery is in progress. * The other flags below correspond to stages of discovery. */ -#define LNET_PEER_DISCOVERING (1 << 6) /* Discovering */ -#define LNET_PEER_DATA_PRESENT (1 << 7) /* Remote peer data present */ -#define LNET_PEER_NIDS_UPTODATE (1 << 8) /* Remote peer info uptodate */ -#define LNET_PEER_PING_SENT (1 << 9) /* Waiting for REPLY to Ping */ -#define LNET_PEER_PUSH_SENT (1 << 10) /* Waiting for ACK of Push */ -#define LNET_PEER_PING_FAILED (1 << 11) /* Ping send failure */ -#define LNET_PEER_PUSH_FAILED (1 << 12) /* Push send failure */ +#define LNET_PEER_DISCOVERING BIT(6) /* Discovering */ +#define LNET_PEER_DATA_PRESENT BIT(7) /* Remote peer data present */ +#define LNET_PEER_NIDS_UPTODATE BIT(8) /* Remote peer info uptodate */ +#define LNET_PEER_PING_SENT BIT(9) /* Waiting for REPLY to Ping */ +#define LNET_PEER_PUSH_SENT BIT(10) /* Waiting for ACK of Push */ +#define LNET_PEER_PING_FAILED BIT(11) /* Ping send failure */ +#define LNET_PEER_PUSH_FAILED BIT(12) /* Push send failure */ /* * A ping can be forced as a way to fix up state, or as a manual * intervention by an admin. * A push can be forced in circumstances that would normally not * allow for one to happen. */ -#define LNET_PEER_FORCE_PING (1 << 13) /* Forced Ping */ -#define LNET_PEER_FORCE_PUSH (1 << 14) /* Forced Push */ +#define LNET_PEER_FORCE_PING BIT(13) /* Forced Ping */ +#define LNET_PEER_FORCE_PUSH BIT(14) /* Forced Push */ /* force delete even if router */ -#define LNET_PEER_RTR_NI_FORCE_DEL (1 << 15) +#define LNET_PEER_RTR_NI_FORCE_DEL BIT(15) /* gw undergoing alive discovery */ -#define LNET_PEER_RTR_DISCOVERY (1 << 16) +#define LNET_PEER_RTR_DISCOVERY BIT(16) +/* gw has undergone discovery (does not indicate success or failure) */ +#define LNET_PEER_RTR_DISCOVERED BIT(17) + +/* peer is marked for deletion */ +#define LNET_PEER_MARK_DELETION BIT(18) +/* lnet_peer_del()/lnet_peer_del_locked() has been called on the peer */ +#define LNET_PEER_MARK_DELETED BIT(19) +/* lock primary NID to what's requested by ULP */ +#define LNET_PEER_LOCK_PRIMARY BIT(20) +/* this is for informational purposes only. It is set if a peer gets + * configured from Lustre with a primary NID which belongs to another peer + * which is also configured by Lustre as the primary NID. + */ +#define LNET_PEER_BAD_CONFIG BIT(21) struct lnet_peer_net { /* chain on lp_peer_nets */ @@ -753,6 +828,18 @@ struct lnet_peer_net { /* Net ID */ __u32 lpn_net_id; + /* peer net health */ + int lpn_healthv; + + /* time of next router ping on this net */ + time64_t lpn_next_ping; + + /* selection sequence number */ + __u32 lpn_seq; + + /* relative peer net selection priority */ + __u32 lpn_sel_priority; + /* reference count */ atomic_t lpn_refcount; }; @@ -766,7 +853,6 @@ struct lnet_peer_net { * * protected by lnet_net_lock/EX for update * pt_version - * pt_number * pt_hash[...] * pt_peer_list * pt_peers @@ -778,7 +864,6 @@ struct lnet_peer_net { */ struct lnet_peer_table { int pt_version; /* /proc validity stamp */ - int pt_number; /* # peers_ni extant */ struct list_head *pt_hash; /* NID->peer hash */ struct list_head pt_peer_list; /* peers */ int pt_peers; /* # peers */ @@ -798,12 +883,14 @@ struct lnet_route { struct list_head lr_list; /* chain on net */ struct list_head lr_gwlist; /* chain on gateway */ struct lnet_peer *lr_gateway; /* router node */ - lnet_nid_t lr_nid; /* NID used to add route */ + struct lnet_nid lr_nid; /* NID used to add route */ __u32 lr_net; /* remote network number */ __u32 lr_lnet; /* local network number */ int lr_seq; /* sequence for round-robin */ __u32 lr_hops; /* how far I am */ unsigned int lr_priority; /* route priority */ + atomic_t lr_alive; /* cached route aliveness */ + bool lr_single_hop; /* this route is single-hop */ }; #define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) @@ -846,33 +933,33 @@ struct lnet_rtrbufpool { struct lnet_rtrbuf { struct list_head rb_list; /* chain on rbp_bufs */ struct lnet_rtrbufpool *rb_pool; /* owning pool */ - lnet_kiov_t rb_kiov[0]; /* the buffer space */ + struct bio_vec rb_kiov[0]; /* the buffer space */ }; #define LNET_PEER_HASHSIZE 503 /* prime! */ enum lnet_match_flags { /* Didn't match anything */ - LNET_MATCHMD_NONE = (1 << 0), + LNET_MATCHMD_NONE = BIT(0), /* Matched OK */ - LNET_MATCHMD_OK = (1 << 1), + LNET_MATCHMD_OK = BIT(1), /* Must be discarded */ - LNET_MATCHMD_DROP = (1 << 2), + LNET_MATCHMD_DROP = BIT(2), /* match and buffer is exhausted */ - LNET_MATCHMD_EXHAUSTED = (1 << 3), + LNET_MATCHMD_EXHAUSTED = BIT(3), /* match or drop */ LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), }; /* Options for struct lnet_portal::ptl_options */ -#define LNET_PTL_LAZY (1 << 0) -#define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */ -#define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, request portal */ +#define LNET_PTL_LAZY BIT(0) +#define LNET_PTL_MATCH_UNIQUE BIT(1) /* unique match, for RDMA */ +#define LNET_PTL_MATCH_WILDCARD BIT(2) /* wildcard match, request portal */ /* parameter for matching operations (GET, PUT) */ struct lnet_match_info { __u64 mi_mbits; - struct lnet_process_id mi_id; + struct lnet_processid mi_id; unsigned int mi_cpt; unsigned int mi_opc; unsigned int mi_portal; @@ -955,9 +1042,56 @@ struct lnet_msg_container { int msc_nfinalizers; /* msgs waiting to complete finalizing */ struct list_head msc_finalizing; + /* msgs waiting to be resent */ + struct list_head msc_resending; struct list_head msc_active; /* active message list */ /* threads doing finalization */ void **msc_finalizers; + /* threads doing resends */ + void **msc_resenders; +}; + +/* This UDSP structures need to match the user space liblnetconfig structures + * in order for the marshall and unmarshall functions to be common. + */ + +/* Net is described as a + * 1. net type + * 2. num range + */ +struct lnet_ud_net_descr { + __u32 udn_net_type; + struct list_head udn_net_num_range; +}; + +/* each NID range is defined as + * 1. net descriptor + * 2. address range descriptor + */ +struct lnet_ud_nid_descr { + struct lnet_ud_net_descr ud_net_id; + struct list_head ud_addr_range; + __u32 ud_mem_size; +}; + +/* a UDSP rule can have up to three user defined NID descriptors + * - src: defines the local NID range for the rule + * - dst: defines the peer NID range for the rule + * - rte: defines the router NID range for the rule + * + * An action union defines the action to take when the rule + * is matched + */ +struct lnet_udsp { + struct list_head udsp_on_list; + __u32 udsp_idx; + struct lnet_ud_nid_descr udsp_src; + struct lnet_ud_nid_descr udsp_dst; + struct lnet_ud_nid_descr udsp_rte; + enum lnet_udsp_action_type udsp_action_type; + union { + __u32 udsp_priority; + } udsp_action; }; /* Peer Discovery states */ @@ -988,14 +1122,11 @@ struct lnet { int ln_nportals; /* the vector of portals */ struct lnet_portal **ln_portals; - /* percpt ME containers */ - struct lnet_res_container **ln_me_containers; /* percpt MD container */ struct lnet_res_container **ln_md_containers; /* Event Queue container */ struct lnet_res_container ln_eq_container; - wait_queue_head_t ln_eq_waitq; spinlock_t ln_eq_wait_lock; unsigned int ln_remote_nets_hbits; @@ -1042,7 +1173,7 @@ struct lnet { * ln_api_mutex. */ struct lnet_handle_md ln_ping_target_md; - struct lnet_handle_eq ln_ping_target_eq; + lnet_handler_t ln_ping_target_handler; struct lnet_ping_buffer *ln_ping_target; atomic_t ln_ping_target_seqno; @@ -1054,13 +1185,13 @@ struct lnet { * buffer may linger a while after it has been unlinked, in * which case the event handler cleans up. */ - struct lnet_handle_eq ln_push_target_eq; + lnet_handler_t ln_push_target_handler; struct lnet_handle_md ln_push_target_md; struct lnet_ping_buffer *ln_push_target; int ln_push_target_nnis; /* discovery event queue handle */ - struct lnet_handle_eq ln_dc_eqh; + lnet_handler_t ln_dc_handler; /* discovery requests */ struct list_head ln_dc_request; /* discovery working list */ @@ -1091,10 +1222,10 @@ struct lnet { /* uniquely identifies this ni in this epoch */ __u64 ln_interface_cookie; /* registered LNDs */ - struct list_head ln_lnds; + const struct lnet_lnd *ln_lnds[NUM_LNDS]; /* test protocol compatibility flags */ - int ln_testprotocompat; + unsigned long ln_testprotocompat; /* 0 - load the NIs from the mod params * 1 - do not load the NIs from the mod params @@ -1104,10 +1235,10 @@ struct lnet { bool ln_nis_from_mod_params; /* - * waitq for the monitor thread. The monitor thread takes care of + * completion for the monitor thread. The monitor thread takes care of * checking routes, timedout messages and resending messages. */ - wait_queue_head_t ln_mt_waitq; + struct completion ln_mt_wait_complete; /* per-cpt resend queues */ struct list_head **ln_mt_resendqs; @@ -1123,9 +1254,69 @@ struct lnet { * based on the mdh cookie. */ struct list_head **ln_mt_rstq; - /* recovery eq handler */ - struct lnet_handle_eq ln_mt_eqh; + /* + * A response tracker becomes a zombie when the associated MD is queued + * for unlink before the response tracker is detached from the MD. An + * entry on a zombie list can be freed when either the remaining + * operations on the MD complete or when LNet has shut down. + */ + struct list_head **ln_mt_zombie_rstqs; + /* recovery handler */ + lnet_handler_t ln_mt_handler; + + /* + * Completed when the discovery and monitor threads can enter their + * work loops + */ + struct completion ln_started; + /* UDSP list */ + struct list_head ln_udsp_list; +}; +struct genl_filter_list { + struct list_head lp_list; + void *lp_cursor; + bool lp_first; }; +static const struct nla_policy scalar_attr_policy[LN_SCALAR_MAX + 1] = { + [LN_SCALAR_ATTR_LIST] = { .type = NLA_NESTED }, + [LN_SCALAR_ATTR_LIST_SIZE] = { .type = NLA_U16 }, + [LN_SCALAR_ATTR_INDEX] = { .type = NLA_U16 }, + [LN_SCALAR_ATTR_NLA_TYPE] = { .type = NLA_U16 }, + [LN_SCALAR_ATTR_VALUE] = { .type = NLA_STRING }, + [LN_SCALAR_ATTR_KEY_FORMAT] = { .type = NLA_U16 }, +}; + +int lnet_genl_send_scalar_list(struct sk_buff *msg, u32 portid, u32 seq, + const struct genl_family *family, int flags, + u8 cmd, const struct ln_key_list *data[]); + +/* Special workaround for pre-4.19 kernels to send error messages + * from dumpit routines. Newer kernels will send message with + * NL_SET_ERR_MSG information by default if NETLINK_EXT_ACK is set. + */ +static inline int lnet_nl_send_error(struct sk_buff *msg, int portid, int seq, + int error) +{ +#ifndef HAVE_NL_DUMP_WITH_EXT_ACK + struct nlmsghdr *nlh; + + if (!error) + return 0; + + nlh = nlmsg_put(msg, portid, seq, NLMSG_ERROR, sizeof(error), 0); + if (!nlh) + return -ENOMEM; +#ifdef HAVE_NL_PARSE_WITH_EXT_ACK + netlink_ack(msg, nlh, error, NULL); +#else + netlink_ack(msg, nlh, error); +#endif + return nlmsg_len(nlh); +#else + return error; +#endif +} + #endif