X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Finclude%2Flnet%2Flib-types.h;h=51afa78ffec36f50c0d5aada6ad34c4cd4e003c7;hp=eee4b141d6f24300c887ab03385d5ed304d32f1a;hb=8fdf2bc62a;hpb=d277f2ae95d0d0580059561706face3accfe3618 diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index eee4b14..51afa78 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lnet/include/lnet/lib-types.h * @@ -46,6 +45,7 @@ #include #include #include +#include #include #include @@ -54,13 +54,15 @@ /* Max payload size */ #define LNET_MAX_PAYLOAD LNET_MTU -#define LNET_MAX_IOV (LNET_MAX_PAYLOAD >> PAGE_SHIFT) +/** limit on the number of fragments in discontiguous MDs */ +#define LNET_MAX_IOV 256 /* * This is the maximum health value. * All local and peer NIs created have their health default to this value. */ #define LNET_MAX_HEALTH_VALUE 1000 +#define LNET_MAX_SELECTION_PRIORITY UINT_MAX /* forward refs */ struct lnet_libmd; @@ -167,8 +169,7 @@ struct lnet_msg { unsigned int msg_wanted; unsigned int msg_offset; unsigned int msg_niov; - struct kvec *msg_iov; - lnet_kiov_t *msg_kiov; + struct bio_vec *msg_kiov; struct lnet_event msg_ev; struct lnet_hdr msg_hdr; @@ -182,11 +183,6 @@ struct lnet_libhandle { #define lh_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) -struct lnet_eq { - lnet_eq_handler_t eq_callback; - int **eq_refs; /* percpt refcount for EQ */ -}; - struct lnet_me { struct list_head me_list; int me_cpt; @@ -214,17 +210,24 @@ struct lnet_libmd { unsigned int md_niov; /* # frags at end of struct */ void *md_user_ptr; struct lnet_rsp_tracker *md_rspt_ptr; - struct lnet_eq *md_eq; + lnet_handler_t md_handler; struct lnet_handle_md md_bulk_handle; - union { - struct kvec iov[LNET_MAX_IOV]; - lnet_kiov_t kiov[LNET_MAX_IOV]; - } md_iov; + struct bio_vec md_kiov[LNET_MAX_IOV]; }; -#define LNET_MD_FLAG_ZOMBIE (1 << 0) -#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) -#define LNET_MD_FLAG_ABORTED (1 << 2) +#define LNET_MD_FLAG_ZOMBIE BIT(0) +#define LNET_MD_FLAG_AUTO_UNLINK BIT(1) +#define LNET_MD_FLAG_ABORTED BIT(2) +/* LNET_MD_FLAG_HANDLING is set when a non-unlink event handler + * is being called for an event relating to the md. + * It ensures only one such handler runs at a time. + * The final "unlink" event is only called once the + * md_refcount has reached zero, and this flag has been cleared, + * ensuring that it doesn't race with any other event handler + * call. + */ +#define LNET_MD_FLAG_HANDLING BIT(3) +#define LNET_MD_FLAG_DISCARD BIT(4) struct lnet_test_peer { /* info about peers we are trying to fail */ @@ -239,6 +242,21 @@ struct lnet_test_peer { #define LNET_COOKIE_TYPE_BITS 2 #define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) +struct netstrfns { + u32 nf_type; + char *nf_name; + char *nf_modname; + void (*nf_addr2str)(u32 addr, char *str, size_t size); + int (*nf_str2addr)(const char *str, int nob, u32 *addr); + int (*nf_parse_addrlist)(char *str, int len, + struct list_head *list); + int (*nf_print_addrlist)(char *buffer, int count, + struct list_head *list); + int (*nf_match_addr)(u32 addr, struct list_head *list); + int (*nf_min_max)(struct list_head *nidlist, u32 *min_nid, + u32 *max_nid); +}; + struct lnet_ni; /* forward ref */ struct socket; @@ -251,11 +269,7 @@ struct lnet_lnd { int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); /* In data movement APIs below, payload buffers are described as a set - * of 'niov' fragments which are... - * EITHER - * in virtual memory (struct kvec *iov != NULL) - * OR - * in pages (kernel only: plt_kiov_t *kiov != NULL). + * of 'niov' fragments which are in pages. * The LND may NOT overwrite these fragment descriptors. * An 'offset' and may specify a byte offset within the set of * fragments to start from @@ -276,7 +290,7 @@ struct lnet_lnd { * credit if the LND does flow control. */ int (*lnd_recv)(struct lnet_ni *ni, void *private, struct lnet_msg *msg, int delayed, unsigned int niov, - struct kvec *iov, lnet_kiov_t *kiov, + struct bio_vec *kiov, unsigned int offset, unsigned int mlen, unsigned int rlen); /* lnet_parse() has had to delay processing of this message @@ -371,8 +385,8 @@ struct lnet_net { * lnet/include/lnet/nidstr.h */ __u32 net_id; - /* priority of the network */ - __u32 net_prio; + /* round robin selection */ + __u32 net_seq; /* total number of CPTs in the array */ __u32 net_ncpts; @@ -380,6 +394,9 @@ struct lnet_net { /* cumulative CPTs of all NIs in this net */ __u32 *net_cpts; + /* relative net selection priority */ + __u32 net_sel_priority; + /* network tunables */ struct lnet_ioctl_config_lnd_cmn_tunables net_tunables; @@ -406,6 +423,9 @@ struct lnet_net { /* protects access to net_last_alive */ spinlock_t net_lock; + + /* list of router nids preferred for this network */ + struct list_head net_rtr_pref_nids; }; struct lnet_ni { @@ -453,6 +473,13 @@ struct lnet_ni { /* Recovery state. Protected by lnet_ni_lock() */ __u32 ni_recovery_state; + /* When to send the next recovery ping */ + time64_t ni_next_ping; + /* How many pings sent during current recovery period did not receive + * a reply. NB: reset whenever _any_ message arrives on this NI + */ + unsigned int ni_ping_count; + /* per NI LND tunables */ struct lnet_lnd_tunables ni_lnd_tunables; @@ -485,11 +512,13 @@ struct lnet_ni { */ atomic_t ni_fatal_error_on; + /* the relative selection priority of this NI */ + __u32 ni_sel_priority; + /* - * equivalent interfaces to use - * This is an array because socklnd bonding can still be configured + * equivalent interface to use */ - char *ni_interfaces[LNET_INTERFACES_NUM]; + char *ni_interface; struct net *ni_net_ns; /* original net namespace */ }; @@ -504,6 +533,7 @@ struct lnet_ni { struct lnet_ping_buffer { int pb_nnis; atomic_t pb_refcnt; + bool pb_needs_post; struct lnet_ping_info pb_info; }; @@ -515,6 +545,11 @@ struct lnet_ping_buffer { #define LNET_PING_INFO_TO_BUFFER(PINFO) \ container_of((PINFO), struct lnet_ping_buffer, pb_info) +struct lnet_nid_list { + struct list_head nl_list; + lnet_nid_t nl_nid; +}; + struct lnet_peer_ni { /* chain on lpn_peer_nis */ struct list_head lpni_peer_nis; @@ -552,11 +587,17 @@ struct lnet_peer_ni { /* peer's NID */ lnet_nid_t lpni_nid; /* # refs */ - atomic_t lpni_refcount; + struct kref lpni_kref; /* health value for the peer */ atomic_t lpni_healthv; /* recovery ping mdh */ struct lnet_handle_md lpni_recovery_ping_mdh; + /* When to send the next recovery ping */ + time64_t lpni_next_ping; + /* How many pings sent during current recovery period did not receive + * a reply. NB: reset whenever _any_ message arrives from this peer NI + */ + unsigned int lpni_ping_count; /* CPT this peer attached on */ int lpni_cpt; /* state flags -- protected by lpni_lock */ @@ -574,20 +615,24 @@ struct lnet_peer_ni { /* preferred local nids: if only one, use lpni_pref.nid */ union lpni_pref { lnet_nid_t nid; - lnet_nid_t *nids; + struct list_head nids; } lpni_pref; + /* list of router nids preferred for this peer NI */ + struct list_head lpni_rtr_pref_nids; + /* The relative selection priority of this peer NI */ + __u32 lpni_sel_priority; /* number of preferred NIDs in lnpi_pref_nids */ __u32 lpni_pref_nnids; }; /* Preferred path added due to traffic on non-MR peer_ni */ -#define LNET_PEER_NI_NON_MR_PREF (1 << 0) +#define LNET_PEER_NI_NON_MR_PREF BIT(0) /* peer is being recovered. */ -#define LNET_PEER_NI_RECOVERY_PENDING (1 << 1) +#define LNET_PEER_NI_RECOVERY_PENDING BIT(1) /* recovery ping failed */ -#define LNET_PEER_NI_RECOVERY_FAILED (1 << 2) +#define LNET_PEER_NI_RECOVERY_FAILED BIT(2) /* peer is being deleted */ -#define LNET_PEER_NI_DELETING (1 << 3) +#define LNET_PEER_NI_DELETING BIT(3) struct lnet_peer { /* chain on pt_peer_list */ @@ -605,6 +650,9 @@ struct lnet_peer { /* primary NID of the peer */ lnet_nid_t lp_primary_nid; + /* source NID to use during discovery */ + lnet_nid_t lp_disc_src_nid; + /* net to perform discovery on */ __u32 lp_disc_net_id; @@ -693,9 +741,9 @@ struct lnet_peer { * * A peer is marked ROUTER if it indicates so in the feature bit. */ -#define LNET_PEER_MULTI_RAIL (1 << 0) /* Multi-rail aware */ -#define LNET_PEER_NO_DISCOVERY (1 << 1) /* Peer disabled discovery */ -#define LNET_PEER_ROUTER_ENABLED (1 << 2) /* router feature enabled */ +#define LNET_PEER_MULTI_RAIL BIT(0) /* Multi-rail aware */ +#define LNET_PEER_NO_DISCOVERY BIT(1) /* Peer disabled discovery */ +#define LNET_PEER_ROUTER_ENABLED BIT(2) /* router feature enabled */ /* * A peer is marked CONFIGURED if it was configured by DLC. @@ -710,36 +758,41 @@ struct lnet_peer { * A peer that was created as the result of inbound traffic will not * be marked at all. */ -#define LNET_PEER_CONFIGURED (1 << 3) /* Configured via DLC */ -#define LNET_PEER_DISCOVERED (1 << 4) /* Peer was discovered */ -#define LNET_PEER_REDISCOVER (1 << 5) /* Discovery was disabled */ +#define LNET_PEER_CONFIGURED BIT(3) /* Configured via DLC */ +#define LNET_PEER_DISCOVERED BIT(4) /* Peer was discovered */ +#define LNET_PEER_REDISCOVER BIT(5) /* Discovery was disabled */ /* * A peer is marked DISCOVERING when discovery is in progress. * The other flags below correspond to stages of discovery. */ -#define LNET_PEER_DISCOVERING (1 << 6) /* Discovering */ -#define LNET_PEER_DATA_PRESENT (1 << 7) /* Remote peer data present */ -#define LNET_PEER_NIDS_UPTODATE (1 << 8) /* Remote peer info uptodate */ -#define LNET_PEER_PING_SENT (1 << 9) /* Waiting for REPLY to Ping */ -#define LNET_PEER_PUSH_SENT (1 << 10) /* Waiting for ACK of Push */ -#define LNET_PEER_PING_FAILED (1 << 11) /* Ping send failure */ -#define LNET_PEER_PUSH_FAILED (1 << 12) /* Push send failure */ +#define LNET_PEER_DISCOVERING BIT(6) /* Discovering */ +#define LNET_PEER_DATA_PRESENT BIT(7) /* Remote peer data present */ +#define LNET_PEER_NIDS_UPTODATE BIT(8) /* Remote peer info uptodate */ +#define LNET_PEER_PING_SENT BIT(9) /* Waiting for REPLY to Ping */ +#define LNET_PEER_PUSH_SENT BIT(10) /* Waiting for ACK of Push */ +#define LNET_PEER_PING_FAILED BIT(11) /* Ping send failure */ +#define LNET_PEER_PUSH_FAILED BIT(12) /* Push send failure */ /* * A ping can be forced as a way to fix up state, or as a manual * intervention by an admin. * A push can be forced in circumstances that would normally not * allow for one to happen. */ -#define LNET_PEER_FORCE_PING (1 << 13) /* Forced Ping */ -#define LNET_PEER_FORCE_PUSH (1 << 14) /* Forced Push */ +#define LNET_PEER_FORCE_PING BIT(13) /* Forced Ping */ +#define LNET_PEER_FORCE_PUSH BIT(14) /* Forced Push */ /* force delete even if router */ -#define LNET_PEER_RTR_NI_FORCE_DEL (1 << 15) +#define LNET_PEER_RTR_NI_FORCE_DEL BIT(15) /* gw undergoing alive discovery */ -#define LNET_PEER_RTR_DISCOVERY (1 << 16) +#define LNET_PEER_RTR_DISCOVERY BIT(16) /* gw has undergone discovery (does not indicate success or failure) */ -#define LNET_PEER_RTR_DISCOVERED (1 << 17) +#define LNET_PEER_RTR_DISCOVERED BIT(17) + +/* peer is marked for deletion */ +#define LNET_PEER_MARK_DELETION BIT(18) +/* lnet_peer_del()/lnet_peer_del_locked() has been called on the peer */ +#define LNET_PEER_MARK_DELETED BIT(19) struct lnet_peer_net { /* chain on lp_peer_nets */ @@ -763,6 +816,9 @@ struct lnet_peer_net { /* selection sequence number */ __u32 lpn_seq; + /* relative peer net selection priority */ + __u32 lpn_sel_priority; + /* reference count */ atomic_t lpn_refcount; }; @@ -812,7 +868,8 @@ struct lnet_route { int lr_seq; /* sequence for round-robin */ __u32 lr_hops; /* how far I am */ unsigned int lr_priority; /* route priority */ - bool lr_alive; /* cached route aliveness */ + atomic_t lr_alive; /* cached route aliveness */ + bool lr_single_hop; /* this route is single-hop */ }; #define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) @@ -855,28 +912,28 @@ struct lnet_rtrbufpool { struct lnet_rtrbuf { struct list_head rb_list; /* chain on rbp_bufs */ struct lnet_rtrbufpool *rb_pool; /* owning pool */ - lnet_kiov_t rb_kiov[0]; /* the buffer space */ + struct bio_vec rb_kiov[0]; /* the buffer space */ }; #define LNET_PEER_HASHSIZE 503 /* prime! */ enum lnet_match_flags { /* Didn't match anything */ - LNET_MATCHMD_NONE = (1 << 0), + LNET_MATCHMD_NONE = BIT(0), /* Matched OK */ - LNET_MATCHMD_OK = (1 << 1), + LNET_MATCHMD_OK = BIT(1), /* Must be discarded */ - LNET_MATCHMD_DROP = (1 << 2), + LNET_MATCHMD_DROP = BIT(2), /* match and buffer is exhausted */ - LNET_MATCHMD_EXHAUSTED = (1 << 3), + LNET_MATCHMD_EXHAUSTED = BIT(3), /* match or drop */ LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), }; /* Options for struct lnet_portal::ptl_options */ -#define LNET_PTL_LAZY (1 << 0) -#define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */ -#define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, request portal */ +#define LNET_PTL_LAZY BIT(0) +#define LNET_PTL_MATCH_UNIQUE BIT(1) /* unique match, for RDMA */ +#define LNET_PTL_MATCH_WILDCARD BIT(2) /* wildcard match, request portal */ /* parameter for matching operations (GET, PUT) */ struct lnet_match_info { @@ -973,6 +1030,49 @@ struct lnet_msg_container { void **msc_resenders; }; +/* This UDSP structures need to match the user space liblnetconfig structures + * in order for the marshall and unmarshall functions to be common. + */ + +/* Net is described as a + * 1. net type + * 2. num range + */ +struct lnet_ud_net_descr { + __u32 udn_net_type; + struct list_head udn_net_num_range; +}; + +/* each NID range is defined as + * 1. net descriptor + * 2. address range descriptor + */ +struct lnet_ud_nid_descr { + struct lnet_ud_net_descr ud_net_id; + struct list_head ud_addr_range; + __u32 ud_mem_size; +}; + +/* a UDSP rule can have up to three user defined NID descriptors + * - src: defines the local NID range for the rule + * - dst: defines the peer NID range for the rule + * - rte: defines the router NID range for the rule + * + * An action union defines the action to take when the rule + * is matched + */ +struct lnet_udsp { + struct list_head udsp_on_list; + __u32 udsp_idx; + struct lnet_ud_nid_descr udsp_src; + struct lnet_ud_nid_descr udsp_dst; + struct lnet_ud_nid_descr udsp_rte; + enum lnet_udsp_action_type udsp_action_type; + union { + __u32 udsp_priority; + } udsp_action; +}; + /* Peer Discovery states */ #define LNET_DC_STATE_SHUTDOWN 0 /* not started */ #define LNET_DC_STATE_RUNNING 1 /* started up OK */ @@ -1052,7 +1152,7 @@ struct lnet { * ln_api_mutex. */ struct lnet_handle_md ln_ping_target_md; - struct lnet_eq *ln_ping_target_eq; + lnet_handler_t ln_ping_target_handler; struct lnet_ping_buffer *ln_ping_target; atomic_t ln_ping_target_seqno; @@ -1064,13 +1164,13 @@ struct lnet { * buffer may linger a while after it has been unlinked, in * which case the event handler cleans up. */ - struct lnet_eq *ln_push_target_eq; + lnet_handler_t ln_push_target_handler; struct lnet_handle_md ln_push_target_md; struct lnet_ping_buffer *ln_push_target; int ln_push_target_nnis; /* discovery event queue handle */ - struct lnet_eq *ln_dc_eq; + lnet_handler_t ln_dc_handler; /* discovery requests */ struct list_head ln_dc_request; /* discovery working list */ @@ -1140,14 +1240,16 @@ struct lnet { * operations on the MD complete or when LNet has shut down. */ struct list_head **ln_mt_zombie_rstqs; - /* recovery eq handler */ - struct lnet_eq *ln_mt_eq; + /* recovery handler */ + lnet_handler_t ln_mt_handler; /* * Completed when the discovery and monitor threads can enter their * work loops */ struct completion ln_started; + /* UDSP list */ + struct list_head ln_udsp_list; }; #endif