X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Finclude%2Flnet%2Flib-types.h;h=c3feaea0621dba2dfdaf96ee57783eb75dcf33c8;hp=3817bcdb0fb2c269b4791e877a77cbd743392abf;hb=9283e2ed6655e89fe693d35313c9dcf1d5a6703a;hpb=b01e6fce1c988139b5fe59484c7568362992f37b diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 3817bcd..c3feaea 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -64,6 +64,34 @@ /* forward refs */ struct lnet_libmd; +enum lnet_msg_hstatus { + LNET_MSG_STATUS_OK = 0, + LNET_MSG_STATUS_LOCAL_INTERRUPT, + LNET_MSG_STATUS_LOCAL_DROPPED, + LNET_MSG_STATUS_LOCAL_ABORTED, + LNET_MSG_STATUS_LOCAL_NO_ROUTE, + LNET_MSG_STATUS_LOCAL_ERROR, + LNET_MSG_STATUS_LOCAL_TIMEOUT, + LNET_MSG_STATUS_REMOTE_ERROR, + LNET_MSG_STATUS_REMOTE_DROPPED, + LNET_MSG_STATUS_REMOTE_TIMEOUT, + LNET_MSG_STATUS_NETWORK_TIMEOUT, + LNET_MSG_STATUS_END, +}; + +struct lnet_rsp_tracker { + /* chain on the waiting list */ + struct list_head rspt_on_list; + /* cpt to lock */ + int rspt_cpt; + /* nid of next hop */ + lnet_nid_t rspt_next_hop_nid; + /* deadline of the REPLY/ACK */ + ktime_t rspt_deadline; + /* parent MD */ + struct lnet_handle_md rspt_mdh; +}; + struct lnet_msg { struct list_head msg_activelist; struct list_head msg_list; /* Q for credits/MD */ @@ -88,6 +116,15 @@ struct lnet_msg { */ ktime_t msg_deadline; + /* The message health status. */ + enum lnet_msg_hstatus msg_health_status; + /* This is a recovery message */ + bool msg_recovery; + /* the number of times a transmission has been retried */ + int msg_retry_count; + /* flag to indicate that we do not want to resend this message */ + bool msg_no_resend; + /* committed for sending */ unsigned int msg_tx_committed:1; /* CPT # this message committed for sending */ @@ -168,24 +205,25 @@ struct lnet_me { }; struct lnet_libmd { - struct list_head md_list; - struct lnet_libhandle md_lh; - struct lnet_me *md_me; - char *md_start; - unsigned int md_offset; - unsigned int md_length; - unsigned int md_max_size; - int md_threshold; - int md_refcount; - unsigned int md_options; - unsigned int md_flags; - unsigned int md_niov; /* # frags at end of struct */ - void *md_user_ptr; - struct lnet_eq *md_eq; - struct lnet_handle_md md_bulk_handle; + struct list_head md_list; + struct lnet_libhandle md_lh; + struct lnet_me *md_me; + char *md_start; + unsigned int md_offset; + unsigned int md_length; + unsigned int md_max_size; + int md_threshold; + int md_refcount; + unsigned int md_options; + unsigned int md_flags; + unsigned int md_niov; /* # frags at end of struct */ + void *md_user_ptr; + struct lnet_rsp_tracker *md_rspt_ptr; + struct lnet_eq *md_eq; + struct lnet_handle_md md_bulk_handle; union { - struct kvec iov[LNET_MAX_IOV]; - lnet_kiov_t kiov[LNET_MAX_IOV]; + struct kvec iov[LNET_MAX_IOV]; + lnet_kiov_t kiov[LNET_MAX_IOV]; } md_iov; }; @@ -259,8 +297,8 @@ struct lnet_lnd { int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, struct lnet_msg *msg, void **new_privatep); - /* notification of peer health */ - void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); + /* notification of peer down */ + void (*lnd_notify_peer_down)(lnet_nid_t peer); /* query of peer aliveness */ void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, time64_t *when); @@ -288,18 +326,17 @@ enum lnet_net_state { }; enum lnet_ni_state { - /* set when NI block is allocated */ + /* initial state when NI is created */ LNET_NI_STATE_INIT = 0, - /* set when NI is started successfully */ + /* set when NI is brought up */ LNET_NI_STATE_ACTIVE, - /* set when LND notifies NI failed */ - LNET_NI_STATE_FAILED, - /* set when LND notifies NI degraded */ - LNET_NI_STATE_DEGRADED, - /* set when shuttding down NI */ - LNET_NI_STATE_DELETING + /* set when NI is being shutdown */ + LNET_NI_STATE_DELETING, }; +#define LNET_NI_RECOVERY_PENDING BIT(0) +#define LNET_NI_RECOVERY_FAILED BIT(1) + enum lnet_stats_type { LNET_STATS_TYPE_SEND = 0, LNET_STATS_TYPE_RECV, @@ -320,6 +357,22 @@ struct lnet_element_stats { struct lnet_comm_count el_drop_stats; }; +struct lnet_health_local_stats { + atomic_t hlt_local_interrupt; + atomic_t hlt_local_dropped; + atomic_t hlt_local_aborted; + atomic_t hlt_local_no_route; + atomic_t hlt_local_timeout; + atomic_t hlt_local_error; +}; + +struct lnet_health_remote_stats { + atomic_t hlt_remote_dropped; + atomic_t hlt_remote_timeout; + atomic_t hlt_remote_error; + atomic_t hlt_network_timeout; +}; + struct lnet_net { /* chain on the ln_nets */ struct list_head net_list; @@ -362,14 +415,23 @@ struct lnet_net { /* network state */ enum lnet_net_state net_state; + + /* when I was last alive */ + time64_t net_last_alive; + + /* protects access to net_last_alive */ + spinlock_t net_lock; }; struct lnet_ni { /* chain on the lnet_net structure */ struct list_head ni_netlist; - /* chain on net_ni_cpt */ - struct list_head ni_cptlist; + /* chain on the recovery queue */ + struct list_head ni_recovery; + + /* MD handle for recovery ping */ + struct lnet_handle_md ni_ping_mdh; spinlock_t ni_lock; @@ -394,18 +456,18 @@ struct lnet_ni { /* percpt reference count */ int **ni_refs; - /* when I was last alive */ - time64_t ni_last_alive; - /* pointer to parent network */ struct lnet_net *ni_net; /* my health status */ struct lnet_ni_status *ni_status; - /* NI FSM */ + /* NI FSM. Protected by lnet_ni_lock() */ enum lnet_ni_state ni_state; + /* Recovery state. Protected by lnet_ni_lock() */ + __u32 ni_recovery_state; + /* per NI LND tunables */ struct lnet_lnd_tunables ni_lnd_tunables; @@ -414,6 +476,7 @@ struct lnet_ni { /* NI statistics */ struct lnet_element_stats ni_stats; + struct lnet_health_local_stats ni_hstats; /* physical device CPT */ int ni_dev_cpt; @@ -431,6 +494,13 @@ struct lnet_ni { atomic_t ni_healthv; /* + * Set to 1 by the LND when it receives an event telling it the device + * has gone into a fatal state. Set to 0 when the LND receives an + * even telling it the device is back online. + */ + atomic_t ni_fatal_error_on; + + /* * equivalent interfaces to use * This is an array because socklnd bonding can still be configured */ @@ -460,89 +530,62 @@ struct lnet_ping_buffer { #define LNET_PING_INFO_TO_BUFFER(PINFO) \ container_of((PINFO), struct lnet_ping_buffer, pb_info) -/* router checker data, per router */ -struct lnet_rc_data { - /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ - struct list_head rcd_list; - struct lnet_handle_md rcd_mdh; /* ping buffer MD */ - struct lnet_peer_ni *rcd_gateway; /* reference to gateway */ - struct lnet_ping_buffer *rcd_pingbuffer;/* ping buffer */ - int rcd_nnis; /* desired size of buffer */ -}; - struct lnet_peer_ni { /* chain on lpn_peer_nis */ struct list_head lpni_peer_nis; /* chain on remote peer list */ struct list_head lpni_on_remote_peer_ni_list; + /* chain on recovery queue */ + struct list_head lpni_recovery; /* chain on peer hash */ struct list_head lpni_hashlist; /* messages blocking for tx credits */ struct list_head lpni_txq; - /* messages blocking for router credits */ - struct list_head lpni_rtrq; - /* chain on router list */ - struct list_head lpni_rtr_list; /* pointer to peer net I'm part of */ struct lnet_peer_net *lpni_peer_net; /* statistics kept on each peer NI */ struct lnet_element_stats lpni_stats; - /* spin lock protecting credits and lpni_txq / lpni_rtrq */ + struct lnet_health_remote_stats lpni_hstats; + /* spin lock protecting credits and lpni_txq */ spinlock_t lpni_lock; /* # tx credits available */ int lpni_txcredits; /* low water mark */ int lpni_mintxcredits; + /* + * Each peer_ni in a gateway maintains its own credits. This + * allows more traffic to gateways that have multiple interfaces. + */ /* # router credits */ int lpni_rtrcredits; /* low water mark */ int lpni_minrtrcredits; /* bytes queued for sending */ long lpni_txqnob; - /* alive/dead? */ - bool lpni_alive; - /* notification outstanding? */ - bool lpni_notify; - /* outstanding notification for LND? */ - bool lpni_notifylnd; - /* some thread is handling notification */ - bool lpni_notifying; - /* SEND event outstanding from ping */ - bool lpni_ping_notsent; - /* # times router went dead<->alive. Protected with lpni_lock */ - int lpni_alive_count; - /* time of last aliveness news */ - time64_t lpni_timestamp; - /* time of last ping attempt */ - time64_t lpni_ping_timestamp; - /* != 0 if ping reply expected */ - time64_t lpni_ping_deadline; - /* when I was last alive */ - time64_t lpni_last_alive; - /* when lpni_ni was queried last time */ - time64_t lpni_last_query; /* network peer is on */ struct lnet_net *lpni_net; /* peer's NID */ lnet_nid_t lpni_nid; /* # refs */ atomic_t lpni_refcount; + /* health value for the peer */ + atomic_t lpni_healthv; + /* recovery ping mdh */ + struct lnet_handle_md lpni_recovery_ping_mdh; /* CPT this peer attached on */ int lpni_cpt; /* state flags -- protected by lpni_lock */ unsigned lpni_state; - /* # refs from lnet_route_t::lr_gateway */ - int lpni_rtr_refcount; + /* status of the peer NI as reported by the peer */ + __u32 lpni_ns_status; /* sequence number used to round robin over peer nis within a net */ __u32 lpni_seq; /* sequence number used to round robin over gateways */ __u32 lpni_gw_seq; - /* health flag */ - bool lpni_healthy; /* returned RC ping features. Protected with lpni_lock */ unsigned int lpni_ping_feats; - /* routes on this peer */ - struct list_head lpni_routes; + /* time last message was received from the peer */ + time64_t lpni_last_alive; /* preferred local nids: if only one, use lpni_pref.nid */ union lpni_pref { lnet_nid_t nid; @@ -550,12 +593,16 @@ struct lnet_peer_ni { } lpni_pref; /* number of preferred NIDs in lnpi_pref_nids */ __u32 lpni_pref_nnids; - /* router checker state */ - struct lnet_rc_data *lpni_rcd; }; /* Preferred path added due to traffic on non-MR peer_ni */ #define LNET_PEER_NI_NON_MR_PREF (1 << 0) +/* peer is being recovered. */ +#define LNET_PEER_NI_RECOVERY_PENDING (1 << 1) +/* recovery ping failed */ +#define LNET_PEER_NI_RECOVERY_FAILED (1 << 2) +/* peer is being deleted */ +#define LNET_PEER_NI_DELETING (1 << 3) struct lnet_peer { /* chain on pt_peer_list */ @@ -567,19 +614,40 @@ struct lnet_peer { /* list of messages pending discovery*/ struct list_head lp_dc_pendq; + /* chain on router list */ + struct list_head lp_rtr_list; + /* primary NID of the peer */ lnet_nid_t lp_primary_nid; + /* net to perform discovery on */ + __u32 lp_disc_net_id; + /* CPT of peer_table */ int lp_cpt; /* number of NIDs on this peer */ int lp_nnis; + /* # refs from lnet_route_t::lr_gateway */ + int lp_rtr_refcount; + + /* + * peer specific health sensitivity value to decrement peer nis in + * this peer with if set to something other than 0 + */ + __u32 lp_health_sensitivity; + + /* messages blocking for router credits */ + struct list_head lp_rtrq; + + /* routes on this peer */ + struct list_head lp_routes; + /* reference count */ atomic_t lp_refcount; - /* lock protecting peer state flags */ + /* lock protecting peer state flags and lpni_rtrq */ spinlock_t lp_lock; /* peer state flags */ @@ -634,9 +702,13 @@ struct lnet_peer { * * A peer is marked NO_DISCOVERY if the LNET_PING_FEAT_DISCOVERY bit was * NOT set when the peer was pinged by discovery. + * + * A peer is marked ROUTER if it indicates so in the feature bit. */ #define LNET_PEER_MULTI_RAIL (1 << 0) /* Multi-rail aware */ #define LNET_PEER_NO_DISCOVERY (1 << 1) /* Peer disabled discovery */ +#define LNET_PEER_ROUTER_ENABLED (1 << 2) /* router feature enabled */ + /* * A peer is marked CONFIGURED if it was configured by DLC. * @@ -650,28 +722,34 @@ struct lnet_peer { * A peer that was created as the result of inbound traffic will not * be marked at all. */ -#define LNET_PEER_CONFIGURED (1 << 2) /* Configured via DLC */ -#define LNET_PEER_DISCOVERED (1 << 3) /* Peer was discovered */ -#define LNET_PEER_REDISCOVER (1 << 4) /* Discovery was disabled */ +#define LNET_PEER_CONFIGURED (1 << 3) /* Configured via DLC */ +#define LNET_PEER_DISCOVERED (1 << 4) /* Peer was discovered */ +#define LNET_PEER_REDISCOVER (1 << 5) /* Discovery was disabled */ /* * A peer is marked DISCOVERING when discovery is in progress. * The other flags below correspond to stages of discovery. */ -#define LNET_PEER_DISCOVERING (1 << 5) /* Discovering */ -#define LNET_PEER_DATA_PRESENT (1 << 6) /* Remote peer data present */ -#define LNET_PEER_NIDS_UPTODATE (1 << 7) /* Remote peer info uptodate */ -#define LNET_PEER_PING_SENT (1 << 8) /* Waiting for REPLY to Ping */ -#define LNET_PEER_PUSH_SENT (1 << 9) /* Waiting for ACK of Push */ -#define LNET_PEER_PING_FAILED (1 << 10) /* Ping send failure */ -#define LNET_PEER_PUSH_FAILED (1 << 11) /* Push send failure */ +#define LNET_PEER_DISCOVERING (1 << 6) /* Discovering */ +#define LNET_PEER_DATA_PRESENT (1 << 7) /* Remote peer data present */ +#define LNET_PEER_NIDS_UPTODATE (1 << 8) /* Remote peer info uptodate */ +#define LNET_PEER_PING_SENT (1 << 9) /* Waiting for REPLY to Ping */ +#define LNET_PEER_PUSH_SENT (1 << 10) /* Waiting for ACK of Push */ +#define LNET_PEER_PING_FAILED (1 << 11) /* Ping send failure */ +#define LNET_PEER_PUSH_FAILED (1 << 12) /* Push send failure */ /* * A ping can be forced as a way to fix up state, or as a manual * intervention by an admin. * A push can be forced in circumstances that would normally not * allow for one to happen. */ -#define LNET_PEER_FORCE_PING (1 << 12) /* Forced Ping */ -#define LNET_PEER_FORCE_PUSH (1 << 13) /* Forced Push */ +#define LNET_PEER_FORCE_PING (1 << 13) /* Forced Ping */ +#define LNET_PEER_FORCE_PUSH (1 << 14) /* Forced Push */ + +/* force delete even if router */ +#define LNET_PEER_RTR_NI_FORCE_DEL (1 << 15) + +/* gw undergoing alive discovery */ +#define LNET_PEER_RTR_DISCOVERY (1 << 16) struct lnet_peer_net { /* chain on lp_peer_nets */ @@ -686,6 +764,12 @@ struct lnet_peer_net { /* Net ID */ __u32 lpn_net_id; + /* time of last router net check attempt */ + time64_t lpn_rtrcheck_timestamp; + + /* selection sequence number */ + __u32 lpn_seq; + /* reference count */ atomic_t lpn_refcount; }; @@ -730,10 +814,11 @@ struct lnet_peer_table { struct lnet_route { struct list_head lr_list; /* chain on net */ struct list_head lr_gwlist; /* chain on gateway */ - struct lnet_peer_ni *lr_gateway; /* router node */ + struct lnet_peer *lr_gateway; /* router node */ + lnet_nid_t lr_nid; /* NID used to add route */ __u32 lr_net; /* remote network number */ + __u32 lr_lnet; /* local network number */ int lr_seq; /* sequence for round-robin */ - unsigned int lr_downis; /* number of down NIs */ __u32 lr_hops; /* how far I am */ unsigned int lr_priority; /* route priority */ }; @@ -1006,12 +1091,6 @@ struct lnet { /* monitor thread startup/shutdown state */ int ln_mt_state; - /* router checker's event queue */ - struct lnet_handle_eq ln_rc_eqh; - /* rcd still pending on net */ - struct list_head ln_rcd_deathrow; - /* rcd ready for free */ - struct list_head ln_rcd_zombie; /* serialise startup/shutdown */ struct semaphore ln_mt_signal; @@ -1046,6 +1125,29 @@ struct lnet { * checking routes, timedout messages and resending messages. */ wait_queue_head_t ln_mt_waitq; + + /* per-cpt resend queues */ + struct list_head **ln_mt_resendqs; + /* local NIs to recover */ + struct list_head ln_mt_localNIRecovq; + /* local NIs to recover */ + struct list_head ln_mt_peerNIRecovq; + /* + * An array of queues for GET/PUT waiting for REPLY/ACK respectively. + * There are CPT number of queues. Since response trackers will be + * added on the fast path we can't afford to grab the exclusive + * net lock to protect these queues. The CPT will be calculated + * based on the mdh cookie. + */ + struct list_head **ln_mt_rstq; + /* recovery eq handler */ + struct lnet_handle_eq ln_mt_eqh; + + /* + * Completed when the discovery and monitor threads can enter their + * work loops + */ + struct completion ln_started; }; #endif