#include <linux/kthread.h>
#include <linux/uio.h>
+#include <linux/semaphore.h>
#include <linux/types.h>
#include <uapi/linux/lnet/lnet-dlc.h>
#include <uapi/linux/lnet/lnetctl.h>
/* Max payload size */
-#ifndef CONFIG_LNET_MAX_PAYLOAD
-# error "CONFIG_LNET_MAX_PAYLOAD must be defined in config.h"
-#endif
+#define LNET_MAX_PAYLOAD LNET_MTU
-#define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD
-#if (LNET_MAX_PAYLOAD < LNET_MTU)
-# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb"
-#elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV))
-# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb"
-#endif
+#define LNET_MAX_IOV (LNET_MAX_PAYLOAD >> PAGE_SHIFT)
+
+/*
+ * This is the maximum health value.
+ * All local and peer NIs created have their health default to this value.
+ */
+#define LNET_MAX_HEALTH_VALUE 1000
/* forward refs */
struct lnet_libmd;
+enum lnet_msg_hstatus {
+ LNET_MSG_STATUS_OK = 0,
+ LNET_MSG_STATUS_LOCAL_INTERRUPT,
+ LNET_MSG_STATUS_LOCAL_DROPPED,
+ LNET_MSG_STATUS_LOCAL_ABORTED,
+ LNET_MSG_STATUS_LOCAL_NO_ROUTE,
+ LNET_MSG_STATUS_LOCAL_ERROR,
+ LNET_MSG_STATUS_LOCAL_TIMEOUT,
+ LNET_MSG_STATUS_REMOTE_ERROR,
+ LNET_MSG_STATUS_REMOTE_DROPPED,
+ LNET_MSG_STATUS_REMOTE_TIMEOUT,
+ LNET_MSG_STATUS_NETWORK_TIMEOUT,
+ LNET_MSG_STATUS_END,
+};
+
+struct lnet_rsp_tracker {
+ /* chain on the waiting list */
+ struct list_head rspt_on_list;
+ /* cpt to lock */
+ int rspt_cpt;
+ /* deadline of the REPLY/ACK */
+ ktime_t rspt_deadline;
+ /* parent MD */
+ struct lnet_handle_md rspt_mdh;
+};
+
struct lnet_msg {
struct list_head msg_activelist;
struct list_head msg_list; /* Q for credits/MD */
lnet_nid_t msg_src_nid_param;
lnet_nid_t msg_rtr_nid_param;
+ /*
+ * Deadline for the message after which it will be finalized if it
+ * has not completed.
+ */
+ ktime_t msg_deadline;
+
+ /* The message health status. */
+ enum lnet_msg_hstatus msg_health_status;
+ /* This is a recovery message */
+ bool msg_recovery;
+ /* the number of times a transmission has been retried */
+ int msg_retry_count;
+ /* flag to indicate that we do not want to resend this message */
+ bool msg_no_resend;
+
/* committed for sending */
unsigned int msg_tx_committed:1;
/* CPT # this message committed for sending */
};
struct lnet_libmd {
- struct list_head md_list;
- struct lnet_libhandle md_lh;
- struct lnet_me *md_me;
- char *md_start;
- unsigned int md_offset;
- unsigned int md_length;
- unsigned int md_max_size;
- int md_threshold;
- int md_refcount;
- unsigned int md_options;
- unsigned int md_flags;
- unsigned int md_niov; /* # frags at end of struct */
- void *md_user_ptr;
- struct lnet_eq *md_eq;
- struct lnet_handle_md md_bulk_handle;
+ struct list_head md_list;
+ struct lnet_libhandle md_lh;
+ struct lnet_me *md_me;
+ char *md_start;
+ unsigned int md_offset;
+ unsigned int md_length;
+ unsigned int md_max_size;
+ int md_threshold;
+ int md_refcount;
+ unsigned int md_options;
+ unsigned int md_flags;
+ unsigned int md_niov; /* # frags at end of struct */
+ void *md_user_ptr;
+ struct lnet_rsp_tracker *md_rspt_ptr;
+ struct lnet_eq *md_eq;
+ struct lnet_handle_md md_bulk_handle;
union {
- struct kvec iov[LNET_MAX_IOV];
- lnet_kiov_t kiov[LNET_MAX_IOV];
+ struct kvec iov[LNET_MAX_IOV];
+ lnet_kiov_t kiov[LNET_MAX_IOV];
} md_iov;
};
void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
/* query of peer aliveness */
- void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, cfs_time_t *when);
+ void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, time64_t *when);
/* accept a new connection */
int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock);
LNET_NET_STATE_DELETING
};
-enum lnet_ni_state {
- /* set when NI block is allocated */
- LNET_NI_STATE_INIT = 0,
- /* set when NI is started successfully */
- LNET_NI_STATE_ACTIVE,
- /* set when LND notifies NI failed */
- LNET_NI_STATE_FAILED,
- /* set when LND notifies NI degraded */
- LNET_NI_STATE_DEGRADED,
- /* set when shuttding down NI */
- LNET_NI_STATE_DELETING
-};
+#define LNET_NI_STATE_INIT (1 << 0)
+#define LNET_NI_STATE_ACTIVE (1 << 1)
+#define LNET_NI_STATE_FAILED (1 << 2)
+#define LNET_NI_STATE_RECOVERY_PENDING (1 << 3)
+#define LNET_NI_STATE_DELETING (1 << 4)
enum lnet_stats_type {
LNET_STATS_TYPE_SEND = 0,
struct lnet_comm_count el_drop_stats;
};
+struct lnet_health_local_stats {
+ atomic_t hlt_local_interrupt;
+ atomic_t hlt_local_dropped;
+ atomic_t hlt_local_aborted;
+ atomic_t hlt_local_no_route;
+ atomic_t hlt_local_timeout;
+ atomic_t hlt_local_error;
+};
+
+struct lnet_health_remote_stats {
+ atomic_t hlt_remote_dropped;
+ atomic_t hlt_remote_timeout;
+ atomic_t hlt_remote_error;
+ atomic_t hlt_network_timeout;
+};
+
struct lnet_net {
/* chain on the ln_nets */
struct list_head net_list;
/* chain on net_ni_cpt */
struct list_head ni_cptlist;
+ /* chain on the recovery queue */
+ struct list_head ni_recovery;
+
+ /* MD handle for recovery ping */
+ struct lnet_handle_md ni_ping_mdh;
+
spinlock_t ni_lock;
/* number of CPTs */
int **ni_refs;
/* when I was last alive */
- long ni_last_alive;
+ time64_t ni_last_alive;
/* pointer to parent network */
struct lnet_net *ni_net;
struct lnet_ni_status *ni_status;
/* NI FSM */
- enum lnet_ni_state ni_state;
+ __u32 ni_state;
/* per NI LND tunables */
struct lnet_lnd_tunables ni_lnd_tunables;
/* NI statistics */
struct lnet_element_stats ni_stats;
+ struct lnet_health_local_stats ni_hstats;
/* physical device CPT */
int ni_dev_cpt;
__u32 ni_seq;
/*
+ * health value
+ * initialized to LNET_MAX_HEALTH_VALUE
+ * Value is decremented every time we fail to send a message over
+ * this NI because of a NI specific failure.
+ * Value is incremented if we successfully send a message.
+ */
+ atomic_t ni_healthv;
+
+ /*
+ * Set to 1 by the LND when it receives an event telling it the device
+ * has gone into a fatal state. Set to 0 when the LND receives an
+ * even telling it the device is back online.
+ */
+ atomic_t ni_fatal_error_on;
+
+ /*
* equivalent interfaces to use
* This is an array because socklnd bonding can still be configured
*/
struct list_head lpni_peer_nis;
/* chain on remote peer list */
struct list_head lpni_on_remote_peer_ni_list;
+ /* chain on recovery queue */
+ struct list_head lpni_recovery;
/* chain on peer hash */
struct list_head lpni_hashlist;
/* messages blocking for tx credits */
struct lnet_peer_net *lpni_peer_net;
/* statistics kept on each peer NI */
struct lnet_element_stats lpni_stats;
+ struct lnet_health_remote_stats lpni_hstats;
/* spin lock protecting credits and lpni_txq / lpni_rtrq */
spinlock_t lpni_lock;
/* # tx credits available */
/* # times router went dead<->alive. Protected with lpni_lock */
int lpni_alive_count;
/* time of last aliveness news */
- cfs_time_t lpni_timestamp;
+ time64_t lpni_timestamp;
/* time of last ping attempt */
- cfs_time_t lpni_ping_timestamp;
+ time64_t lpni_ping_timestamp;
/* != 0 if ping reply expected */
- cfs_time_t lpni_ping_deadline;
+ time64_t lpni_ping_deadline;
/* when I was last alive */
- cfs_time_t lpni_last_alive;
+ time64_t lpni_last_alive;
/* when lpni_ni was queried last time */
- cfs_time_t lpni_last_query;
+ time64_t lpni_last_query;
/* network peer is on */
struct lnet_net *lpni_net;
/* peer's NID */
lnet_nid_t lpni_nid;
/* # refs */
atomic_t lpni_refcount;
+ /* health value for the peer */
+ atomic_t lpni_healthv;
+ /* recovery ping mdh */
+ struct lnet_handle_md lpni_recovery_ping_mdh;
/* CPT this peer attached on */
int lpni_cpt;
/* state flags -- protected by lpni_lock */
/* Preferred path added due to traffic on non-MR peer_ni */
#define LNET_PEER_NI_NON_MR_PREF (1 << 0)
+/* peer is being recovered. */
+#define LNET_PEER_NI_RECOVERY_PENDING (1 << 1)
+/* peer is being deleted */
+#define LNET_PEER_NI_DELETING (1 << 2)
struct lnet_peer {
/* chain on pt_peer_list */
#define LNET_DC_STATE_STOPPING 2 /* telling thread to stop */
/* Router Checker states */
-#define LNET_RC_STATE_SHUTDOWN 0 /* not started */
-#define LNET_RC_STATE_RUNNING 1 /* started up OK */
-#define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */
+#define LNET_MT_STATE_SHUTDOWN 0 /* not started */
+#define LNET_MT_STATE_RUNNING 1 /* started up OK */
+#define LNET_MT_STATE_STOPPING 2 /* telling thread to stop */
/* LNet states */
#define LNET_STATE_SHUTDOWN 0 /* not started */
/* discovery startup/shutdown state */
int ln_dc_state;
- /* router checker startup/shutdown state */
- int ln_rc_state;
+ /* monitor thread startup/shutdown state */
+ int ln_mt_state;
/* router checker's event queue */
struct lnet_handle_eq ln_rc_eqh;
/* rcd still pending on net */
/* rcd ready for free */
struct list_head ln_rcd_zombie;
/* serialise startup/shutdown */
- struct semaphore ln_rc_signal;
+ struct semaphore ln_mt_signal;
struct mutex ln_api_mutex;
struct mutex ln_lnd_mutex;
*/
bool ln_nis_from_mod_params;
- /* waitq for router checker. As long as there are no routes in
- * the list, the router checker will sleep on this queue. when
- * routes are added the thread will wake up */
- wait_queue_head_t ln_rc_waitq;
+ /*
+ * waitq for the monitor thread. The monitor thread takes care of
+ * checking routes, timedout messages and resending messages.
+ */
+ wait_queue_head_t ln_mt_waitq;
+
+ /* per-cpt resend queues */
+ struct list_head **ln_mt_resendqs;
+ /* local NIs to recover */
+ struct list_head ln_mt_localNIRecovq;
+ /* local NIs to recover */
+ struct list_head ln_mt_peerNIRecovq;
+ /*
+ * An array of queues for GET/PUT waiting for REPLY/ACK respectively.
+ * There are CPT number of queues. Since response trackers will be
+ * added on the fast path we can't afford to grab the exclusive
+ * net lock to protect these queues. The CPT will be calculated
+ * based on the mdh cookie.
+ */
+ struct list_head **ln_mt_rstq;
+ /* recovery eq handler */
+ struct lnet_handle_eq ln_mt_eqh;
+
};
#endif