#ifndef __LNET_LIB_LNET_H__
#define __LNET_LIB_LNET_H__
+/* LNET has 0xeXXX */
+#define CFS_FAIL_PTLRPC_OST_BULK_CB2 0xe000
+
#ifndef __KERNEL__
# error This include is only for kernel use.
#endif
/* default timeout */
#define DEFAULT_PEER_TIMEOUT 180
+#define LNET_LND_DEFAULT_TIMEOUT 5
+
+#ifdef HAVE_KERN_SOCK_GETNAME_2ARGS
+#define lnet_kernel_getpeername(sock, addr, addrlen) \
+ kernel_getpeername(sock, addr)
+#define lnet_kernel_getsockname(sock, addr, addrlen) \
+ kernel_getsockname(sock, addr)
+#else
+#define lnet_kernel_getpeername(sock, addr, addrlen) \
+ kernel_getpeername(sock, addr, addrlen)
+#define lnet_kernel_getsockname(sock, addr, addrlen) \
+ kernel_getsockname(sock, addr, addrlen)
+#endif
-static inline int lnet_is_route_alive(struct lnet_route *route)
-{
- if (!route->lr_gateway->lpni_alive)
- return 0; /* gateway is down */
- if ((route->lr_gateway->lpni_ping_feats &
- LNET_PING_FEAT_NI_STATUS) == 0)
- return 1; /* no NI status, assume it's alive */
- /* has NI status, check # down NIs */
- return route->lr_downis == 0;
-}
+bool lnet_is_route_alive(struct lnet_route *route);
+bool lnet_is_gateway_alive(struct lnet_peer *gw);
static inline int lnet_is_wire_handle_none(struct lnet_handle_wire *wh)
{
}
static inline int
-lnet_isrouter(struct lnet_peer_ni *lp)
+lnet_isrouter(struct lnet_peer_ni *lpni)
{
- return lp->lpni_rtr_refcount != 0;
+ return lpni->lpni_peer_net->lpn_peer->lp_rtr_refcount != 0;
}
static inline void
LIBCFS_FREE(msg, sizeof(*msg));
}
+static inline struct lnet_rsp_tracker *
+lnet_rspt_alloc(int cpt)
+{
+ struct lnet_rsp_tracker *rspt;
+ LIBCFS_ALLOC(rspt, sizeof(*rspt));
+ lnet_net_lock(cpt);
+ the_lnet.ln_counters[cpt]->lct_health.lch_rst_alloc++;
+ lnet_net_unlock(cpt);
+ return rspt;
+}
+
+static inline void
+lnet_rspt_free(struct lnet_rsp_tracker *rspt, int cpt)
+{
+ LIBCFS_FREE(rspt, sizeof(*rspt));
+ lnet_net_lock(cpt);
+ the_lnet.ln_counters[cpt]->lct_health.lch_rst_alloc--;
+ lnet_net_unlock(cpt);
+}
+
void lnet_ni_free(struct lnet_ni *ni);
void lnet_net_free(struct lnet_net *net);
extern struct lnet_ni *lnet_nid2ni_addref(lnet_nid_t nid);
extern struct lnet_ni *lnet_net2ni_locked(__u32 net, int cpt);
extern struct lnet_ni *lnet_net2ni_addref(__u32 net);
-bool lnet_is_ni_healthy_locked(struct lnet_ni *ni);
struct lnet_net *lnet_get_net_locked(__u32 net_id);
int lnet_lib_init(void);
void lnet_lib_exit(void);
extern unsigned lnet_transaction_timeout;
+extern unsigned lnet_retry_count;
extern unsigned int lnet_numa_range;
extern unsigned int lnet_health_sensitivity;
+extern unsigned int lnet_recovery_interval;
extern unsigned int lnet_peer_discovery_disabled;
+extern unsigned int lnet_drop_asym_route;
+extern unsigned int router_sensitivity_percentage;
+extern int alive_router_check_interval;
extern int portal_rotor;
-int lnet_notify(struct lnet_ni *ni, lnet_nid_t peer, int alive,
+void lnet_mt_event_handler(struct lnet_event *event);
+
+int lnet_notify(struct lnet_ni *ni, lnet_nid_t peer, bool alive, bool reset,
time64_t when);
void lnet_notify_locked(struct lnet_peer_ni *lp, int notifylnd, int alive,
time64_t when);
int lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway_nid,
- unsigned int priority);
-int lnet_check_routes(void);
+ __u32 priority, __u32 sensitivity);
int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
void lnet_destroy_routes(void);
int lnet_get_route(int idx, __u32 *net, __u32 *hops,
- lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
+ lnet_nid_t *gateway, __u32 *alive, __u32 *priority,
+ __u32 *sensitivity);
int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
struct lnet_ni *lnet_get_next_ni_locked(struct lnet_net *mynet,
struct lnet_ni *prev);
struct lnet_ni *lnet_get_ni_idx_locked(int idx);
-struct libcfs_ioctl_handler {
- struct list_head item;
- int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
-};
-
-#define DECLARE_IOCTL_HANDLER(ident, func) \
- static struct libcfs_ioctl_handler ident = { \
- .item = LIST_HEAD_INIT(ident.item), \
- .handle_ioctl = func \
- }
-
-extern int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
-extern int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
extern int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
struct libcfs_ioctl_hdr __user *uparam);
extern int lnet_get_peer_list(__u32 *countp, __u32 *sizep,
struct lnet_process_id __user *ids);
+extern void lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all);
+extern void lnet_peer_ni_add_to_recoveryq_locked(struct lnet_peer_ni *lpni);
void lnet_router_debugfs_init(void);
void lnet_router_debugfs_fini(void);
int lnet_rtrpools_enable(void);
void lnet_rtrpools_disable(void);
void lnet_rtrpools_free(int keep_pools);
+void lnet_rtr_transfer_to_peer(struct lnet_peer *src,
+ struct lnet_peer *target);
struct lnet_remotenet *lnet_find_rnet_locked(__u32 net);
int lnet_dyn_add_net(struct lnet_ioctl_config_data *conf);
int lnet_dyn_del_net(__u32 net);
void lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md,
unsigned int offset, unsigned int mlen);
-void lnet_msg_detach_md(struct lnet_msg *msg, int status);
void lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev);
void lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type);
void lnet_msg_commit(struct lnet_msg *msg, int cpt);
struct lnet_process_id target, unsigned int offset,
unsigned int len);
int lnet_send(lnet_nid_t nid, struct lnet_msg *msg, lnet_nid_t rtr_nid);
+int lnet_send_ping(lnet_nid_t dest_nid, struct lnet_handle_md *mdh, int nnis,
+ void *user_ptr, struct lnet_handle_eq eqh, bool recovery);
void lnet_return_tx_credits_locked(struct lnet_msg *msg);
void lnet_return_rx_credits_locked(struct lnet_msg *msg);
void lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp);
struct lnet_msg *get_msg);
void lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *msg,
unsigned int len);
+void lnet_detach_rsp_tracker(struct lnet_libmd *md, int cpt);
void lnet_finalize(struct lnet_msg *msg, int rc);
+bool lnet_send_error_simulation(struct lnet_msg *msg,
+ enum lnet_msg_hstatus *hstatus);
+void lnet_handle_remote_failure_locked(struct lnet_peer_ni *lpni);
void lnet_drop_message(struct lnet_ni *ni, int cpt, void *private,
unsigned int nob, __u32 msg_type);
void lnet_msg_containers_destroy(void);
int lnet_msg_containers_create(void);
+char *lnet_health_error2str(enum lnet_msg_hstatus hstatus);
char *lnet_msgtyp2str(int type);
void lnet_print_hdr(struct lnet_hdr *hdr);
int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold);
int lnet_fault_init(void);
void lnet_fault_fini(void);
-bool lnet_drop_rule_match(struct lnet_hdr *hdr);
+bool lnet_drop_rule_match(struct lnet_hdr *hdr, lnet_nid_t local_nid,
+ enum lnet_msg_hstatus *hstatus);
int lnet_delay_rule_add(struct lnet_fault_attr *attr);
int lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown);
/** @} lnet_fault_simulation */
+void lnet_counters_get_common(struct lnet_counters_common *common);
void lnet_counters_get(struct lnet_counters *counters);
void lnet_counters_reset(void);
struct page *lnet_kvaddr_to_page(unsigned long vaddr);
int lnet_cpt_of_md(struct lnet_libmd *md, unsigned int offset);
+unsigned int lnet_get_lnd_timeout(void);
void lnet_register_lnd(struct lnet_lnd *lnd);
void lnet_unregister_lnd(struct lnet_lnd *lnd);
int lnet_peers_start_down(void);
int lnet_peer_buffer_credits(struct lnet_net *net);
+void lnet_consolidate_routes_locked(struct lnet_peer *orig_lp,
+ struct lnet_peer *new_lp);
+void lnet_router_discovery_complete(struct lnet_peer *lp);
int lnet_monitor_thr_start(void);
void lnet_monitor_thr_stop(void);
bool lnet_router_checker_active(void);
void lnet_check_routers(void);
-int lnet_router_pre_mt_start(void);
-void lnet_router_post_mt_start(void);
-void lnet_prune_rc_data(int wait_unlink);
-void lnet_router_cleanup(void);
-void lnet_router_ni_update_locked(struct lnet_peer_ni *gw, __u32 net);
+void lnet_wait_router_start(void);
void lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf);
int lnet_ping_info_validate(struct lnet_ping_info *pinfo);
struct lnet_peer_ni *lnet_nid2peerni_locked(lnet_nid_t nid, lnet_nid_t pref,
int cpt);
struct lnet_peer_ni *lnet_nid2peerni_ex(lnet_nid_t nid, int cpt);
+struct lnet_peer_ni *lnet_peer_get_ni_locked(struct lnet_peer *lp,
+ lnet_nid_t nid);
struct lnet_peer_ni *lnet_find_peer_ni_locked(lnet_nid_t nid);
struct lnet_peer *lnet_find_peer(lnet_nid_t nid);
void lnet_peer_net_added(struct lnet_net *net);
__u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
__u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis,
__u32 *peer_tx_qnob);
-
-static inline bool
-lnet_is_peer_ni_healthy_locked(struct lnet_peer_ni *lpni)
-{
- return lpni->lpni_healthy;
-}
-
-static inline void
-lnet_set_peer_ni_health_locked(struct lnet_peer_ni *lpni, bool health)
-{
- lpni->lpni_healthy = health;
-}
-
-static inline bool
-lnet_is_peer_net_healthy_locked(struct lnet_peer_net *peer_net)
-{
- struct lnet_peer_ni *lpni;
-
- list_for_each_entry(lpni, &peer_net->lpn_peer_nis,
- lpni_peer_nis) {
- if (lnet_is_peer_ni_healthy_locked(lpni))
- return true;
- }
-
- return false;
-}
-
-static inline bool
-lnet_is_peer_healthy_locked(struct lnet_peer *peer)
-{
- struct lnet_peer_net *peer_net;
-
- list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_peer_nets) {
- if (lnet_is_peer_net_healthy_locked(peer_net))
- return true;
- }
-
- return false;
-}
+int lnet_get_peer_ni_hstats(struct lnet_ioctl_peer_ni_hstats *stats);
static inline struct lnet_peer_net *
lnet_find_peer_net_locked(struct lnet_peer *peer, __u32 net_id)
return NULL;
}
-static inline void
-lnet_peer_set_alive(struct lnet_peer_ni *lp)
-{
- lp->lpni_last_alive = ktime_get_seconds();
- lp->lpni_last_query = lp->lpni_last_alive;
- if (!lp->lpni_alive)
- lnet_notify_locked(lp, 0, 1, lp->lpni_last_alive);
-}
-
static inline bool
lnet_peer_is_multi_rail(struct lnet_peer *lp)
{
}
bool lnet_peer_is_uptodate(struct lnet_peer *lp);
+bool lnet_is_discovery_disabled(struct lnet_peer *lp);
+bool lnet_peer_gw_discovery(struct lnet_peer *lp);
static inline bool
lnet_peer_needs_push(struct lnet_peer *lp)
return true;
if (lp->lp_state & LNET_PEER_NO_DISCOVERY)
return false;
+ /* if discovery is not enabled then no need to push */
+ if (lnet_peer_discovery_disabled)
+ return false;
if (lp->lp_node_seqno < atomic_read(&the_lnet.ln_ping_target_seqno))
return true;
return false;
}
+/*
+ * A peer is alive if it satisfies the following two conditions:
+ * 1. peer health >= LNET_MAX_HEALTH_VALUE * router_sensitivity_percentage
+ * 2. the cached NI status received when we discover the peer is UP
+ */
+static inline bool
+lnet_is_peer_ni_alive(struct lnet_peer_ni *lpni)
+{
+ bool halive = false;
+
+ halive = (atomic_read(&lpni->lpni_healthv) >=
+ (LNET_MAX_HEALTH_VALUE * router_sensitivity_percentage / 100));
+
+ return halive && lpni->lpni_ns_status == LNET_NI_STATUS_UP;
+}
+
+static inline void
+lnet_set_healthv(atomic_t *healthv, int value)
+{
+ atomic_set(healthv, value);
+}
+
+static inline void
+lnet_inc_healthv(atomic_t *healthv)
+{
+ atomic_add_unless(healthv, 1, LNET_MAX_HEALTH_VALUE);
+}
+
void lnet_incr_stats(struct lnet_element_stats *stats,
enum lnet_msg_type msg_type,
enum lnet_stats_type stats_type);