void lnet_notify_locked(struct lnet_peer_ni *lp, int notifylnd, int alive,
time64_t when);
int lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway_nid,
- unsigned int priority);
+ __u32 priority, __u32 sensitivity);
int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
void lnet_destroy_routes(void);
int lnet_get_route(int idx, __u32 *net, __u32 *hops,
- lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
+ lnet_nid_t *gateway, __u32 *alive, __u32 *priority,
+ __u32 *sensitivity);
int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
struct lnet_ni *lnet_get_next_ni_locked(struct lnet_net *mynet,
struct lnet_ni *prev);
/* # refs from lnet_route_t::lr_gateway */
int lp_rtr_refcount;
+ /*
+ * peer specific health sensitivity value to decrement peer nis in
+ * this peer with if set to something other than 0
+ */
+ __u32 lp_health_sensitivity;
+
/* messages blocking for router credits */
struct list_head lp_rtrq;
__u32 rtr_hop;
__u32 rtr_priority;
__u32 rtr_flags;
+ __u32 rtr_sensitivity;
} cfg_route;
struct {
char net_intf[LNET_MAX_STR_LEN];
case IOC_LIBCFS_FAIL_NID:
return lnet_fail_nid(data->ioc_nid, data->ioc_count);
- case IOC_LIBCFS_ADD_ROUTE:
+ case IOC_LIBCFS_ADD_ROUTE: {
+ /* default router sensitivity to 1 */
+ unsigned int sensitivity = 1;
config = arg;
if (config->cfg_hdr.ioc_len < sizeof(*config))
return -EINVAL;
+ if (config->cfg_config_u.cfg_route.rtr_sensitivity) {
+ sensitivity =
+ config->cfg_config_u.cfg_route.rtr_sensitivity;
+ }
+
mutex_lock(&the_lnet.ln_api_mutex);
rc = lnet_add_route(config->cfg_net,
config->cfg_config_u.cfg_route.rtr_hop,
config->cfg_nid,
config->cfg_config_u.cfg_route.
- rtr_priority);
+ rtr_priority, sensitivity);
mutex_unlock(&the_lnet.ln_api_mutex);
return rc;
+ }
case IOC_LIBCFS_DEL_ROUTE:
config = arg;
&config->cfg_nid,
&config->cfg_config_u.cfg_route.rtr_flags,
&config->cfg_config_u.cfg_route.
- rtr_priority);
+ rtr_priority,
+ &config->cfg_config_u.cfg_route.
+ rtr_sensitivity);
mutex_unlock(&the_lnet.ln_api_mutex);
return rc;
continue;
}
- rc = lnet_add_route(net, hops, nid, priority);
+ rc = lnet_add_route(net, hops, nid, priority, 1);
if (rc != 0 && rc != -EEXIST && rc != -EHOSTUNREACH) {
CERROR("Can't create route "
"to %s via %s\n",
}
static void
-lnet_dec_healthv_locked(atomic_t *healthv)
+lnet_dec_healthv_locked(atomic_t *healthv, int sensitivity)
{
int h = atomic_read(healthv);
- if (h < lnet_health_sensitivity) {
+ if (h < sensitivity) {
atomic_set(healthv, 0);
} else {
- h -= lnet_health_sensitivity;
+ h -= sensitivity;
atomic_set(healthv, h);
}
}
return;
}
- lnet_dec_healthv_locked(&local_ni->ni_healthv);
+ lnet_dec_healthv_locked(&local_ni->ni_healthv, lnet_health_sensitivity);
/*
* add the NI to the recovery queue if it's not already there
* and it's health value is actually below the maximum. It's
void
lnet_handle_remote_failure_locked(struct lnet_peer_ni *lpni)
{
+ __u32 sensitivity = lnet_health_sensitivity;
+ __u32 lp_sensitivity;
+
/* lpni could be NULL if we're in the LOLND case */
if (!lpni)
return;
- lnet_dec_healthv_locked(&lpni->lpni_healthv);
+ /*
+ * If there is a health sensitivity in the peer then use that
+ * instead of the globally set one.
+ */
+ lp_sensitivity = lpni->lpni_peer_net->lpn_peer->lp_health_sensitivity;
+ if (lp_sensitivity)
+ sensitivity = lp_sensitivity;
+
+ lnet_dec_healthv_locked(&lpni->lpni_healthv, sensitivity);
/*
* add the peer NI to the recovery queue if it's not already there
* and it's health value is actually below the maximum. It's
init_waitqueue_head(&lp->lp_dc_waitq);
spin_lock_init(&lp->lp_lock);
lp->lp_primary_nid = nid;
+
+ /*
+ * all peers created on a router should have health on
+ * if it's not already on.
+ */
+ if (the_lnet.ln_routing && !lnet_health_sensitivity)
+ lp->lp_health_sensitivity = 1;
+
/*
* Turn off discovery for loopback peer. If you're creating a peer
* for the loopback interface then that was initiated when we
int
lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
- unsigned int priority)
+ __u32 priority, __u32 sensitivity)
{
struct list_head *route_entry;
struct lnet_remotenet *rnet;
* to move the routes from the peer that's being deleted to the
* consolidated peer lp_routes list
*/
- if (add_route)
+ if (add_route) {
+ gw->lp_health_sensitivity = sensitivity;
lnet_add_route_to_rnet(rnet2, route);
+ }
/*
* get rid of the reference on the lpni.
int
lnet_get_route(int idx, __u32 *net, __u32 *hops,
- lnet_nid_t *gateway, __u32 *alive, __u32 *priority)
+ lnet_nid_t *gateway, __u32 *alive, __u32 *priority, __u32 *sensitivity)
{
- struct list_head *e1;
- struct list_head *e2;
struct lnet_remotenet *rnet;
- struct lnet_route *route;
- int cpt;
- int i;
struct list_head *rn_list;
+ struct lnet_route *route;
+ struct list_head *e1;
+ struct list_head *e2;
+ int cpt;
+ int i;
cpt = lnet_net_lock_current();
*gateway = route->lr_nid;
*hops = route->lr_hops;
*priority = route->lr_priority;
+ *sensitivity = route->lr_gateway->
+ lp_health_sensitivity;
*alive = lnet_is_route_alive(route);
lnet_net_unlock(cpt);
return 0;