/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
*
* This file is part of Portals
* http://sourceforge.net/projects/sandiaportals/
CFS_MODULE_PARM(forwarding, "s", charp, 0444,
"Explicitly enable/disable forwarding between networks");
-static int tiny_router_buffers = 512;
+static int tiny_router_buffers = 1024;
CFS_MODULE_PARM(tiny_router_buffers, "i", int, 0444,
"# of 0 payload messages to buffer in the router");
-static int small_router_buffers = 256;
+static int small_router_buffers = 8192;
CFS_MODULE_PARM(small_router_buffers, "i", int, 0444,
"# of small (1 page) messages to buffer in the router");
-static int large_router_buffers = 32;
+static int large_router_buffers = 512;
CFS_MODULE_PARM(large_router_buffers, "i", int, 0444,
"# of large messages to buffer in the router");
+static int peer_buffer_credits = 0;
+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
+ "# router buffer credits per peer");
static int auto_down = 1;
CFS_MODULE_PARM(auto_down, "i", int, 0444,
CFS_MODULE_PARM(router_ping_timeout, "i", int, 0444,
"Seconds to wait for the reply to a router health query");
-typedef struct
-{
- work_struct_t kpru_tq;
- lnet_nid_t kpru_nid;
- int kpru_alive;
- time_t kpru_when;
-} kpr_upcall_t;
-
-void
-kpr_do_upcall (void *arg)
-{
- kpr_upcall_t *u = (kpr_upcall_t *)arg;
-
-#ifndef __WINNT__
-
- char nidstr[36];
- char whenstr[36];
- char *argv[] = {
- NULL,
- "ROUTER_NOTIFY",
- nidstr,
- u->kpru_alive ? "up" : "down",
- whenstr,
- NULL};
-
- snprintf (nidstr, sizeof(nidstr), "%s", libcfs_nid2str(u->kpru_nid));
- snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when);
-
- libcfs_run_upcall (argv);
-
-#endif /* __WINNT__ */
-
- LIBCFS_FREE(u, sizeof(*u));
-}
-
-void
-kpr_upcall (lnet_nid_t gw_nid, int alive, time_t when)
+int
+lnet_peers_start_down(void)
{
- /* May be in arbitrary context */
- kpr_upcall_t *u;
-
- LIBCFS_ALLOC_ATOMIC(u, sizeof(*u));
- if (u == NULL) {
- CERROR ("Upcall out of memory: nid %s %s\n",
- libcfs_nid2str(gw_nid), alive ? "up" : "down");
- return;
- }
-
- u->kpru_nid = gw_nid;
- u->kpru_alive = alive;
- u->kpru_when = when;
-
- prepare_work (&u->kpru_tq, kpr_do_upcall, u);
- schedule_work (&u->kpru_tq);
+ return check_routers_before_use;
}
int
-lnet_peers_start_down(void)
+lnet_peer_buffer_credits(lnet_ni_t *ni)
{
- return check_routers_before_use;
+ /* NI option overrides LNet default */
+ if (ni->ni_peerrtrcredits > 0)
+ return ni->ni_peerrtrcredits;
+ if (peer_buffer_credits > 0)
+ return peer_buffer_credits;
+
+ /* As an approximation, allow this peer the same number of router
+ * buffers as it is allowed outstanding sends */
+ return ni->ni_peertxcredits;
}
void
lp->lp_alive_count++;
lp->lp_alive = !(!alive); /* 1 bit! */
lp->lp_notify = 1;
- lp->lp_notifylnd = notifylnd;
+ lp->lp_notifylnd |= notifylnd;
CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
}
void
-lnet_do_notify (lnet_peer_t *lp)
+lnet_do_notify (lnet_peer_t *lp)
{
lnet_ni_t *ni = lp->lp_ni;
int alive;
- time_t when;
- int lnd;
-
+ int notifylnd;
+
LNET_LOCK();
-
+
/* Notify only in 1 thread at any time to ensure ordered notification.
* NB individual events can be missed; the only guarantee is that you
* always get the most recent news */
}
lp->lp_notifying = 1;
-
+
while (lp->lp_notify) {
- alive = lp->lp_alive;
- when = lp->lp_timestamp;
- lnd = lp->lp_notifylnd;
+ alive = lp->lp_alive;
+ notifylnd = lp->lp_notifylnd;
- lp->lp_notify = 0;
+ lp->lp_notifylnd = 0;
+ lp->lp_notify = 0;
- LNET_UNLOCK();
+ if (notifylnd && ni->ni_lnd->lnd_notify != NULL) {
+ LNET_UNLOCK();
- /* A new notification could happen now; I'll handle it when
- * control returns to me */
-
- if (!lnd) {
- CDEBUG(D_NET, "Upcall: NID %s is %s\n",
- libcfs_nid2str(lp->lp_nid),
- alive ? "alive" : "dead");
- kpr_upcall(lp->lp_nid, alive, when);
- } else {
- if (ni->ni_lnd->lnd_notify != NULL)
- (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive);
- }
+ /* A new notification could happen now; I'll handle it
+ * when control returns to me */
- LNET_LOCK();
+ (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive);
+
+ LNET_LOCK();
+ }
}
lp->lp_notifying = 0;
CDEBUG(D_NET, "Auto-down disabled\n");
return 0;
}
-
+
LNET_LOCK();
lp = lnet_find_peer_locked(nid);
return 0;
}
+ /* We can't fully trust LND on reporting exact peer last_alive
+ * if he notifies us about dead peer. For example ksocklnd can
+ * call us with when == _time_when_the_node_was_booted_ if
+ * no connections were successfully established */
+ if (ni != NULL && !alive && when < lp->lp_last_alive)
+ when = lp->lp_last_alive;
+
lnet_notify_locked(lp, ni == NULL, alive, when);
LNET_UNLOCK();
-
+
lnet_do_notify(lp);
-
+
LNET_LOCK();
lnet_peer_decref_locked(lp);
return -EOPNOTSUPP;
}
+void
+lnet_notify_locked (lnet_peer_t *lp, int notifylnd, int alive, time_t when)
+{
+ return;
+}
+
#endif
static void
lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
{
struct list_head zombies;
- struct list_head *e;
- lnet_remotenet_t *rnet;
- lnet_remotenet_t *rnet2;
- lnet_route_t *route;
- lnet_route_t *route2;
+ struct list_head *e;
+ lnet_remotenet_t *rnet;
+ lnet_remotenet_t *rnet2;
+ lnet_route_t *route;
+ lnet_route_t *route2;
lnet_ni_t *ni;
int add_route;
int rc;
return -ENOMEM;
}
- INIT_LIST_HEAD(&rnet->lrn_routes);
+ CFS_INIT_LIST_HEAD(&rnet->lrn_routes);
rnet->lrn_net = net;
rnet->lrn_hops = hops;
LASSERT (route2->lr_gateway->lp_nid != gateway);
}
}
-
+
if (add_route) {
ni = route->lr_gateway->lp_ni;
lnet_ni_addref_locked(ni);
-
+
LASSERT (rc == 0);
list_add_tail(&route->lr_list, &rnet2->lrn_routes);
the_lnet.ln_remote_nets_version++;
while (!list_empty(&zombies)) {
route = list_entry(zombies.next, lnet_route_t, lr_list);
list_del(&route->lr_list);
-
+
LNET_LOCK();
+ lnet_rtr_decref_locked(route->lr_gateway);
lnet_peer_decref_locked(route->lr_gateway);
LNET_UNLOCK();
LIBCFS_FREE(route, sizeof(*route));
else if (route->lr_gateway->lp_ni !=
route2->lr_gateway->lp_ni) {
LNET_UNLOCK();
-
+
CERROR("Routes to %s via %s and %s not supported\n",
libcfs_net2str(rnet->lrn_net),
libcfs_nid2str(route->lr_gateway->lp_nid),
}
}
}
-
+
LNET_UNLOCK();
return 0;
}
lnet_get_route (int idx, __u32 *net, __u32 *hops,
lnet_nid_t *gateway, __u32 *alive)
{
- struct list_head *e1;
- struct list_head *e2;
+ struct list_head *e1;
+ struct list_head *e2;
lnet_remotenet_t *rnet;
lnet_route_t *route;
/* The router checker thread has unlinked the rc_md
* and exited. */
LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_UNLINKING);
- the_lnet.ln_rc_state = LNET_RC_STATE_UNLINKED;
- mutex_up(&the_lnet.ln_rc_signal);
+ the_lnet.ln_rc_state = LNET_RC_STATE_UNLINKED;
+ mutex_up(&the_lnet.ln_rc_signal);
return;
}
- LASSERT (event->type == LNET_EVENT_SEND ||
+ LASSERT (event->type == LNET_EVENT_SEND ||
event->type == LNET_EVENT_REPLY);
-
+
nid = (event->type == LNET_EVENT_SEND) ?
event->target.nid : event->initiator.nid;
if (lnet_isrouter(lp) && /* ignore if no longer a router */
(event->status != 0 ||
event->type == LNET_EVENT_REPLY)) {
-
+
/* A successful REPLY means the router is up. If _any_ comms
* to the router fail I assume it's down (this will happen if
* we ping alive routers to try to detect router death before
int rc;
lnet_handle_md_t mdh;
lnet_peer_t *rtr;
+ lnet_md_t md = {0};
struct list_head *entry;
time_t now;
lnet_process_id_t rtr_id;
int secs;
- cfs_daemonize("router_checker");
- cfs_block_allsigs();
+ cfs_daemonize("router_checker");
+ cfs_block_allsigs();
rtr_id.pid = LUSTRE_SRV_LNET_PID;
LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
- rc = LNetMDBind((lnet_md_t){.start = &pinginfo,
- .length = sizeof(pinginfo),
- .threshold = LNET_MD_THRESH_INF,
- .options = LNET_MD_TRUNCATE,
- .eq_handle = the_lnet.ln_rc_eqh},
- LNET_UNLINK,
- &mdh);
+ /* initialize md content */
+ md.start = &pinginfo;
+ md.length = sizeof(pinginfo);
+ md.threshold = LNET_MD_THRESH_INF;
+ md.max_size = 0;
+ md.options = LNET_MD_TRUNCATE,
+ md.user_ptr = NULL;
+ md.eq_handle = the_lnet.ln_rc_eqh;
+
+ rc = LNetMDBind(md, LNET_UNLINK, &mdh);
if (rc < 0) {
CERROR("Can't bind MD: %d\n", rc);
the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
mutex_up(&the_lnet.ln_rc_signal); /* let my parent go */
- while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
+ while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
__u64 version;
LNET_LOCK();
}
if (secs <= 0)
secs = 0;
-
+
if (secs != 0 &&
!rtr->lp_ping_notsent &&
now > rtr->lp_ping_timestamp + secs) {
LNET_RESERVED_PORTAL,
LNET_PROTO_PING_MATCHBITS, 0);
}
-
+
LNET_LOCK();
lnet_peer_decref_locked(rtr);
/* Call cfs_pause() here always adds 1 to load average
* because kernel counts # active tasks as nr_running
* + nr_uninterruptible. */
- set_current_state(CFS_TASK_INTERRUPTIBLE);
cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE,
cfs_time_seconds(1));
- }
+ }
LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_STOPTHREAD);
the_lnet.ln_rc_state = LNET_RC_STATE_UNLINKING;
-
+
rc = LNetMDUnlink(mdh);
LASSERT (rc == 0);
/* The unlink event callback will signal final completion */
-
- return 0;
+ return 0;
}
for (;;) {
LNET_LOCK();
-
+
all_known = 1;
list_for_each (entry, &the_lnet.ln_routers) {
rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
-
+
if (rtr->lp_alive_count == 0) {
all_known = 0;
break;
return;
the_lnet.ln_rc_state = LNET_RC_STATE_STOPTHREAD;
- /* block until event callback signals exit */
- mutex_down(&the_lnet.ln_rc_signal);
+ /* block until event callback signals exit */
+ mutex_down(&the_lnet.ln_rc_signal);
LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_UNLINKED);
rc = LNetEQFree(the_lnet.ln_rc_eqh);
LASSERT (rc == 0);
-
+
the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
}
if (check_routers_before_use &&
dead_router_check_interval <= 0) {
- LCONSOLE_ERROR("'dead_router_check_interval' must be set if "
- "'check_routers_before_use' is set\n");
+ LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be"
+ " set if 'check_routers_before_use' is set"
+ "\n");
return -EINVAL;
}
-
+
if (live_router_check_interval <= 0 &&
dead_router_check_interval <= 0)
return 0;
- init_mutex_locked(&the_lnet.ln_rc_signal);
+ init_mutex_locked(&the_lnet.ln_rc_signal);
/* EQ size doesn't matter; the callback is guaranteed to get every
* event */
return -ENOMEM;
}
- rc = (int)cfs_kernel_thread(lnet_router_checker, NULL, 0);
- if (rc < 0) {
- CERROR("Can't start router checker thread: %d\n", rc);
+ rc = (int)cfs_kernel_thread(lnet_router_checker, NULL, 0);
+ if (rc < 0) {
+ CERROR("Can't start router checker thread: %d\n", rc);
goto failed;
- }
+ }
- mutex_down(&the_lnet.ln_rc_signal); /* wait for checker to startup */
+ mutex_down(&the_lnet.ln_rc_signal); /* wait for checker to startup */
rc = the_lnet.ln_rc_state;
if (rc < 0) {
the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
goto failed;
}
-
+
LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
if (check_routers_before_use) {
* may have to a previous instance of me. */
lnet_wait_known_routerstate();
}
-
+
return 0;
-
+
failed:
rc = LNetEQFree(the_lnet.ln_rc_eqh);
LASSERT (rc == 0);
int i;
LIBCFS_ALLOC(rb, sz);
+ if (rb == NULL)
+ return NULL;
rb->rb_pool = rbp;
LASSERT (rbp->rbp_nbuffers == nbufs);
return 0;
}
-
+
for (i = 0; i < nbufs; i++) {
rb = lnet_new_rtrbuf(rbp);
lnet_alloc_rtrpools(int im_a_router)
{
int rc;
-
+
if (!strcmp(forwarding, "")) {
/* not set either way */
if (!im_a_router)
} else if (!strcmp(forwarding, "enabled")) {
/* explicitly enabled */
} else {
- LCONSOLE_ERROR("'forwarding' not set to either "
- "'enabled' or 'disabled'\n");
+ LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either "
+ "'enabled' or 'disabled'\n");
return -EINVAL;
}
-
+
if (tiny_router_buffers <= 0) {
- LCONSOLE_ERROR("tiny_router_buffers=%d invalid when "
- "routing enabled\n", tiny_router_buffers);
+ LCONSOLE_ERROR_MSG(0x10c, "tiny_router_buffers=%d invalid when "
+ "routing enabled\n", tiny_router_buffers);
rc = -EINVAL;
goto failed;
}
goto failed;
if (small_router_buffers <= 0) {
- LCONSOLE_ERROR("small_router_buffers=%d invalid when "
- "routing enabled\n", small_router_buffers);
+ LCONSOLE_ERROR_MSG(0x10d, "small_router_buffers=%d invalid when"
+ " routing enabled\n", small_router_buffers);
rc = -EINVAL;
goto failed;
}
goto failed;
if (large_router_buffers <= 0) {
- LCONSOLE_ERROR("large_router_buffers=%d invalid when "
- "routing enabled\n", large_router_buffers);
+ LCONSOLE_ERROR_MSG(0x10e, "large_router_buffers=%d invalid when"
+ " routing enabled\n", large_router_buffers);
rc = -EINVAL;
goto failed;
}
LNET_LOCK();
the_lnet.ln_routing = 1;
LNET_UNLOCK();
-
+
return 0;
failed:
return 0;
}
+int
+lnet_peer_buffer_credits(lnet_ni_t *ni)
+{
+ return 0;
+}
+
void
lnet_router_checker_stop(void)
{