X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Finclude%2Flnet%2Flib-types.h;h=bd1df1e4058cf7622dfffa19f6ff7deda03db394;hp=7c7a9bd7c56f0d5acf51e7407ec0a23da10f0337;hb=2be10428ac22426c5868b699b6c0b80c040465dc;hpb=ed052504713d1db49531454a87055b2ee54399f0;ds=sidebyside diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 7c7a9bd..bd1df1e 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -23,7 +23,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, 2016, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -44,27 +44,55 @@ #include #include +#include #include #include #include /* Max payload size */ -#ifndef CONFIG_LNET_MAX_PAYLOAD -# error "CONFIG_LNET_MAX_PAYLOAD must be defined in config.h" -#endif +#define LNET_MAX_PAYLOAD LNET_MTU -#define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD -#if (LNET_MAX_PAYLOAD < LNET_MTU) -# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" -#elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) -# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" -#endif +#define LNET_MAX_IOV (LNET_MAX_PAYLOAD >> PAGE_SHIFT) + +/* + * This is the maximum health value. + * All local and peer NIs created have their health default to this value. + */ +#define LNET_MAX_HEALTH_VALUE 1000 /* forward refs */ struct lnet_libmd; -typedef struct lnet_msg { +enum lnet_msg_hstatus { + LNET_MSG_STATUS_OK = 0, + LNET_MSG_STATUS_LOCAL_INTERRUPT, + LNET_MSG_STATUS_LOCAL_DROPPED, + LNET_MSG_STATUS_LOCAL_ABORTED, + LNET_MSG_STATUS_LOCAL_NO_ROUTE, + LNET_MSG_STATUS_LOCAL_ERROR, + LNET_MSG_STATUS_LOCAL_TIMEOUT, + LNET_MSG_STATUS_REMOTE_ERROR, + LNET_MSG_STATUS_REMOTE_DROPPED, + LNET_MSG_STATUS_REMOTE_TIMEOUT, + LNET_MSG_STATUS_NETWORK_TIMEOUT, + LNET_MSG_STATUS_END, +}; + +struct lnet_rsp_tracker { + /* chain on the waiting list */ + struct list_head rspt_on_list; + /* cpt to lock */ + int rspt_cpt; + /* nid of next hop */ + lnet_nid_t rspt_next_hop_nid; + /* deadline of the REPLY/ACK */ + ktime_t rspt_deadline; + /* parent MD */ + struct lnet_handle_md rspt_mdh; +}; + +struct lnet_msg { struct list_head msg_activelist; struct list_head msg_list; /* Q for credits/MD */ @@ -82,6 +110,21 @@ typedef struct lnet_msg { lnet_nid_t msg_src_nid_param; lnet_nid_t msg_rtr_nid_param; + /* + * Deadline for the message after which it will be finalized if it + * has not completed. + */ + ktime_t msg_deadline; + + /* The message health status. */ + enum lnet_msg_hstatus msg_health_status; + /* This is a recovery message */ + bool msg_recovery; + /* the number of times a transmission has been retried */ + int msg_retry_count; + /* flag to indicate that we do not want to resend this message */ + bool msg_no_resend; + /* committed for sending */ unsigned int msg_tx_committed:1; /* CPT # this message committed for sending */ @@ -128,17 +171,17 @@ typedef struct lnet_msg { struct lnet_event msg_ev; struct lnet_hdr msg_hdr; -} lnet_msg_t; +}; -typedef struct lnet_libhandle { +struct lnet_libhandle { struct list_head lh_hash_chain; __u64 lh_cookie; -} lnet_libhandle_t; +}; #define lh_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) -typedef struct lnet_eq { +struct lnet_eq { struct list_head eq_list; struct lnet_libhandle eq_lh; unsigned long eq_enq_seq; @@ -147,9 +190,9 @@ typedef struct lnet_eq { lnet_eq_handler_t eq_callback; struct lnet_event *eq_events; int **eq_refs; /* percpt refcount for EQ */ -} lnet_eq_t; +}; -typedef struct lnet_me { +struct lnet_me { struct list_head me_list; struct lnet_libhandle me_lh; struct lnet_process_id me_match_id; @@ -159,40 +202,41 @@ typedef struct lnet_me { __u64 me_ignore_bits; enum lnet_unlink me_unlink; struct lnet_libmd *me_md; -} lnet_me_t; - -typedef struct lnet_libmd { - struct list_head md_list; - struct lnet_libhandle md_lh; - struct lnet_me *md_me; - char *md_start; - unsigned int md_offset; - unsigned int md_length; - unsigned int md_max_size; - int md_threshold; - int md_refcount; - unsigned int md_options; - unsigned int md_flags; - unsigned int md_niov; /* # frags at end of struct */ - void *md_user_ptr; - struct lnet_eq *md_eq; - struct lnet_handle_md md_bulk_handle; +}; + +struct lnet_libmd { + struct list_head md_list; + struct lnet_libhandle md_lh; + struct lnet_me *md_me; + char *md_start; + unsigned int md_offset; + unsigned int md_length; + unsigned int md_max_size; + int md_threshold; + int md_refcount; + unsigned int md_options; + unsigned int md_flags; + unsigned int md_niov; /* # frags at end of struct */ + void *md_user_ptr; + struct lnet_rsp_tracker *md_rspt_ptr; + struct lnet_eq *md_eq; + struct lnet_handle_md md_bulk_handle; union { - struct kvec iov[LNET_MAX_IOV]; - lnet_kiov_t kiov[LNET_MAX_IOV]; + struct kvec iov[LNET_MAX_IOV]; + lnet_kiov_t kiov[LNET_MAX_IOV]; } md_iov; -} lnet_libmd_t; +}; #define LNET_MD_FLAG_ZOMBIE (1 << 0) #define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) #define LNET_MD_FLAG_ABORTED (1 << 2) -typedef struct lnet_test_peer { +struct lnet_test_peer { /* info about peers we are trying to fail */ struct list_head tp_list; /* ln_test_peers */ lnet_nid_t tp_nid; /* matching nid */ unsigned int tp_threshold; /* # failures to simulate */ -} lnet_test_peer_t; +}; #define LNET_COOKIE_TYPE_MD 1 #define LNET_COOKIE_TYPE_ME 2 @@ -203,7 +247,7 @@ typedef struct lnet_test_peer { struct lnet_ni; /* forward ref */ struct socket; -typedef struct lnet_lnd { +struct lnet_lnd { /* fields managed by portals */ struct list_head lnd_list; /* stash in the LND table */ int lnd_refcount; /* # active instances */ @@ -257,11 +301,11 @@ typedef struct lnet_lnd { void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); /* query of peer aliveness */ - void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, cfs_time_t *when); + void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, time64_t *when); /* accept a new connection */ int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock); -} lnd_t; +}; struct lnet_tx_queue { int tq_credits; /* # tx credits free */ @@ -282,18 +326,17 @@ enum lnet_net_state { }; enum lnet_ni_state { - /* set when NI block is allocated */ + /* initial state when NI is created */ LNET_NI_STATE_INIT = 0, - /* set when NI is started successfully */ + /* set when NI is brought up */ LNET_NI_STATE_ACTIVE, - /* set when LND notifies NI failed */ - LNET_NI_STATE_FAILED, - /* set when LND notifies NI degraded */ - LNET_NI_STATE_DEGRADED, - /* set when shuttding down NI */ - LNET_NI_STATE_DELETING + /* set when NI is being shutdown */ + LNET_NI_STATE_DELETING, }; +#define LNET_NI_RECOVERY_PENDING BIT(0) +#define LNET_NI_RECOVERY_FAILED BIT(1) + enum lnet_stats_type { LNET_STATS_TYPE_SEND = 0, LNET_STATS_TYPE_RECV, @@ -314,6 +357,22 @@ struct lnet_element_stats { struct lnet_comm_count el_drop_stats; }; +struct lnet_health_local_stats { + atomic_t hlt_local_interrupt; + atomic_t hlt_local_dropped; + atomic_t hlt_local_aborted; + atomic_t hlt_local_no_route; + atomic_t hlt_local_timeout; + atomic_t hlt_local_error; +}; + +struct lnet_health_remote_stats { + atomic_t hlt_remote_dropped; + atomic_t hlt_remote_timeout; + atomic_t hlt_remote_error; + atomic_t hlt_network_timeout; +}; + struct lnet_net { /* chain on the ln_nets */ struct list_head net_list; @@ -358,12 +417,15 @@ struct lnet_net { enum lnet_net_state net_state; }; -typedef struct lnet_ni { +struct lnet_ni { /* chain on the lnet_net structure */ struct list_head ni_netlist; - /* chain on net_ni_cpt */ - struct list_head ni_cptlist; + /* chain on the recovery queue */ + struct list_head ni_recovery; + + /* MD handle for recovery ping */ + struct lnet_handle_md ni_ping_mdh; spinlock_t ni_lock; @@ -389,7 +451,7 @@ typedef struct lnet_ni { int **ni_refs; /* when I was last alive */ - long ni_last_alive; + time64_t ni_last_alive; /* pointer to parent network */ struct lnet_net *ni_net; @@ -397,9 +459,12 @@ typedef struct lnet_ni { /* my health status */ struct lnet_ni_status *ni_status; - /* NI FSM */ + /* NI FSM. Protected by lnet_ni_lock() */ enum lnet_ni_state ni_state; + /* Recovery state. Protected by lnet_ni_lock() */ + __u32 ni_recovery_state; + /* per NI LND tunables */ struct lnet_lnd_tunables ni_lnd_tunables; @@ -408,6 +473,7 @@ typedef struct lnet_ni { /* NI statistics */ struct lnet_element_stats ni_stats; + struct lnet_health_local_stats ni_hstats; /* physical device CPT */ int ni_dev_cpt; @@ -416,12 +482,28 @@ typedef struct lnet_ni { __u32 ni_seq; /* + * health value + * initialized to LNET_MAX_HEALTH_VALUE + * Value is decremented every time we fail to send a message over + * this NI because of a NI specific failure. + * Value is incremented if we successfully send a message. + */ + atomic_t ni_healthv; + + /* + * Set to 1 by the LND when it receives an event telling it the device + * has gone into a fatal state. Set to 0 when the LND receives an + * even telling it the device is back online. + */ + atomic_t ni_fatal_error_on; + + /* * equivalent interfaces to use * This is an array because socklnd bonding can still be configured */ char *ni_interfaces[LNET_INTERFACES_NUM]; struct net *ni_net_ns; /* original net namespace */ -} lnet_ni_t; +}; #define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL @@ -446,20 +528,22 @@ struct lnet_ping_buffer { container_of((PINFO), struct lnet_ping_buffer, pb_info) /* router checker data, per router */ -typedef struct lnet_rc_data { +struct lnet_rc_data { /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ struct list_head rcd_list; struct lnet_handle_md rcd_mdh; /* ping buffer MD */ struct lnet_peer_ni *rcd_gateway; /* reference to gateway */ struct lnet_ping_buffer *rcd_pingbuffer;/* ping buffer */ int rcd_nnis; /* desired size of buffer */ -} lnet_rc_data_t; +}; struct lnet_peer_ni { /* chain on lpn_peer_nis */ struct list_head lpni_peer_nis; /* chain on remote peer list */ struct list_head lpni_on_remote_peer_ni_list; + /* chain on recovery queue */ + struct list_head lpni_recovery; /* chain on peer hash */ struct list_head lpni_hashlist; /* messages blocking for tx credits */ @@ -472,6 +556,7 @@ struct lnet_peer_ni { struct lnet_peer_net *lpni_peer_net; /* statistics kept on each peer NI */ struct lnet_element_stats lpni_stats; + struct lnet_health_remote_stats lpni_hstats; /* spin lock protecting credits and lpni_txq / lpni_rtrq */ spinlock_t lpni_lock; /* # tx credits available */ @@ -497,21 +582,25 @@ struct lnet_peer_ni { /* # times router went dead<->alive. Protected with lpni_lock */ int lpni_alive_count; /* time of last aliveness news */ - cfs_time_t lpni_timestamp; + time64_t lpni_timestamp; /* time of last ping attempt */ - cfs_time_t lpni_ping_timestamp; + time64_t lpni_ping_timestamp; /* != 0 if ping reply expected */ - cfs_time_t lpni_ping_deadline; + time64_t lpni_ping_deadline; /* when I was last alive */ - cfs_time_t lpni_last_alive; + time64_t lpni_last_alive; /* when lpni_ni was queried last time */ - cfs_time_t lpni_last_query; + time64_t lpni_last_query; /* network peer is on */ struct lnet_net *lpni_net; /* peer's NID */ lnet_nid_t lpni_nid; /* # refs */ atomic_t lpni_refcount; + /* health value for the peer */ + atomic_t lpni_healthv; + /* recovery ping mdh */ + struct lnet_handle_md lpni_recovery_ping_mdh; /* CPT this peer attached on */ int lpni_cpt; /* state flags -- protected by lpni_lock */ @@ -541,6 +630,12 @@ struct lnet_peer_ni { /* Preferred path added due to traffic on non-MR peer_ni */ #define LNET_PEER_NI_NON_MR_PREF (1 << 0) +/* peer is being recovered. */ +#define LNET_PEER_NI_RECOVERY_PENDING (1 << 1) +/* recovery ping failed */ +#define LNET_PEER_NI_RECOVERY_FAILED (1 << 2) +/* peer is being deleted */ +#define LNET_PEER_NI_DELETING (1 << 3) struct lnet_peer { /* chain on pt_peer_list */ @@ -574,10 +669,10 @@ struct lnet_peer { struct lnet_ping_buffer *lp_data; /* MD handle for ping in progress */ - lnet_handle_md_t lp_ping_mdh; + struct lnet_handle_md lp_ping_mdh; /* MD handle for push in progress */ - lnet_handle_md_t lp_push_mdh; + struct lnet_handle_md lp_push_mdh; /* number of NIDs for sizing push data */ int lp_data_nnis; @@ -712,7 +807,7 @@ struct lnet_peer_table { ((lp)->lpni_net) && \ (lp)->lpni_net->net_tunables.lct_peer_timeout > 0) -typedef struct lnet_route { +struct lnet_route { struct list_head lr_list; /* chain on net */ struct list_head lr_gwlist; /* chain on gateway */ struct lnet_peer_ni *lr_gateway; /* router node */ @@ -721,20 +816,20 @@ typedef struct lnet_route { unsigned int lr_downis; /* number of down NIs */ __u32 lr_hops; /* how far I am */ unsigned int lr_priority; /* route priority */ -} lnet_route_t; +}; #define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) #define LNET_REMOTE_NETS_HASH_MAX (1U << 16) #define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits) -typedef struct lnet_remotenet { +struct lnet_remotenet { /* chain on ln_remote_nets_hash */ struct list_head lrn_list; /* routes to me */ struct list_head lrn_routes; /* my net number */ __u32 lrn_net; -} lnet_remotenet_t; +}; /** lnet message has credit and can be submitted to lnd for send/receive */ #define LNET_CREDIT_OK 0 @@ -743,7 +838,7 @@ typedef struct lnet_remotenet { /** lnet message is waiting for discovery */ #define LNET_DC_WAIT 2 -typedef struct lnet_rtrbufpool { +struct lnet_rtrbufpool { /* my free buffer pool */ struct list_head rbp_bufs; /* messages blocking for a buffer */ @@ -758,13 +853,13 @@ typedef struct lnet_rtrbufpool { int rbp_credits; /* low water mark */ int rbp_mincredits; -} lnet_rtrbufpool_t; +}; -typedef struct lnet_rtrbuf { +struct lnet_rtrbuf { struct list_head rb_list; /* chain on rbp_bufs */ struct lnet_rtrbufpool *rb_pool; /* owning pool */ lnet_kiov_t rb_kiov[0]; /* the buffer space */ -} lnet_rtrbuf_t; +}; #define LNET_PEER_HASHSIZE 503 /* prime! */ @@ -834,7 +929,7 @@ struct lnet_match_table { /* dispatch routed PUT message by hashing source NID for wildcard portals */ #define LNET_PTL_ROTOR_HASH_RT 3 -typedef struct lnet_portal { +struct lnet_portal { spinlock_t ptl_lock; unsigned int ptl_index; /* portal ID, reserved */ /* flags on this portal: lazy, unique... */ @@ -851,7 +946,7 @@ typedef struct lnet_portal { int ptl_mt_nmaps; /* array of active entries' cpu-partition-id */ int ptl_mt_maps[0]; -} lnet_portal_t; +}; #define LNET_LH_HASH_BITS 12 #define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS) @@ -883,16 +978,16 @@ struct lnet_msg_container { #define LNET_DC_STATE_STOPPING 2 /* telling thread to stop */ /* Router Checker states */ -#define LNET_RC_STATE_SHUTDOWN 0 /* not started */ -#define LNET_RC_STATE_RUNNING 1 /* started up OK */ -#define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */ +#define LNET_MT_STATE_SHUTDOWN 0 /* not started */ +#define LNET_MT_STATE_RUNNING 1 /* started up OK */ +#define LNET_MT_STATE_STOPPING 2 /* telling thread to stop */ /* LNet states */ #define LNET_STATE_SHUTDOWN 0 /* not started */ #define LNET_STATE_RUNNING 1 /* started up OK */ #define LNET_STATE_STOPPING 2 /* telling thread to stop */ -typedef struct lnet { +struct lnet { /* CPU partition table of LNet */ struct cfs_cpt_table *ln_cpt_table; /* number of CPTs in ln_cpt_table */ @@ -935,6 +1030,10 @@ typedef struct lnet { struct lnet_ni *ln_loni; /* network zombie list */ struct list_head ln_net_zombie; + /* resend messages list */ + struct list_head ln_msg_resend; + /* spin lock to protect the msg resend list */ + spinlock_t ln_msg_resend_lock; /* remote networks with routes to them */ struct list_head *ln_remote_nets_hash; @@ -967,13 +1066,13 @@ typedef struct lnet { * buffer may linger a while after it has been unlinked, in * which case the event handler cleans up. */ - lnet_handle_eq_t ln_push_target_eq; - lnet_handle_md_t ln_push_target_md; + struct lnet_handle_eq ln_push_target_eq; + struct lnet_handle_md ln_push_target_md; struct lnet_ping_buffer *ln_push_target; int ln_push_target_nnis; /* discovery event queue handle */ - lnet_handle_eq_t ln_dc_eqh; + struct lnet_handle_eq ln_dc_eqh; /* discovery requests */ struct list_head ln_dc_request; /* discovery working list */ @@ -985,8 +1084,8 @@ typedef struct lnet { /* discovery startup/shutdown state */ int ln_dc_state; - /* router checker startup/shutdown state */ - int ln_rc_state; + /* monitor thread startup/shutdown state */ + int ln_mt_state; /* router checker's event queue */ struct lnet_handle_eq ln_rc_eqh; /* rcd still pending on net */ @@ -994,7 +1093,7 @@ typedef struct lnet { /* rcd ready for free */ struct list_head ln_rcd_zombie; /* serialise startup/shutdown */ - struct semaphore ln_rc_signal; + struct semaphore ln_mt_signal; struct mutex ln_api_mutex; struct mutex ln_lnd_mutex; @@ -1022,10 +1121,29 @@ typedef struct lnet { */ bool ln_nis_from_mod_params; - /* waitq for router checker. As long as there are no routes in - * the list, the router checker will sleep on this queue. when - * routes are added the thread will wake up */ - wait_queue_head_t ln_rc_waitq; -} lnet_t; + /* + * waitq for the monitor thread. The monitor thread takes care of + * checking routes, timedout messages and resending messages. + */ + wait_queue_head_t ln_mt_waitq; + + /* per-cpt resend queues */ + struct list_head **ln_mt_resendqs; + /* local NIs to recover */ + struct list_head ln_mt_localNIRecovq; + /* local NIs to recover */ + struct list_head ln_mt_peerNIRecovq; + /* + * An array of queues for GET/PUT waiting for REPLY/ACK respectively. + * There are CPT number of queues. Since response trackers will be + * added on the fast path we can't afford to grab the exclusive + * net lock to protect these queues. The CPT will be calculated + * based on the mdh cookie. + */ + struct list_head **ln_mt_rstq; + /* recovery eq handler */ + struct lnet_handle_eq ln_mt_eqh; + +}; #endif