From efdcf04c9024fd1f8589c1725fc521cde73e893b Mon Sep 17 00:00:00 2001 From: eeb Date: Thu, 27 Oct 2005 02:26:07 +0000 Subject: [PATCH] * iiblnd: drain disconnecting sockets * lnet: explicit configure/teardown for routers ("lctl network up" as well as "lctl network down"). config_on_load=0 is the new default (setting it effectively does "lctl network up" at module load time). Added "net" as an alias for "network" to lctl so you don't have to type "work" when you run "lctl net up" from the shell. Also fixed a couple of bugs which required lnet to be unloaded before it could be brought up again. * lnet routing: restored automatic route disabling when comms to a router fails (currently kernel elan and tcp only). --- lnet/include/libcfs/kp30.h | 1 + lnet/klnds/iiblnd/iiblnd_cb.c | 19 +++++++ lnet/klnds/qswlnd/qswlnd_cb.c | 3 +- lnet/klnds/socklnd/socklnd.c | 46 ++++++++++++----- lnet/klnds/socklnd/socklnd.h | 3 +- lnet/klnds/socklnd/socklnd_cb.c | 1 + lnet/lnet/api-ni.c | 13 ++++- lnet/lnet/module.c | 109 ++++++++++++++++++++++------------------ lnet/lnet/peer.c | 5 +- lnet/lnet/router.c | 37 ++++++++------ lnet/utils/portals.c | 19 ++++++- lnet/utils/ptlctl.c | 3 +- 12 files changed, 171 insertions(+), 88 deletions(-) diff --git a/lnet/include/libcfs/kp30.h b/lnet/include/libcfs/kp30.h index cf08e99..c3c4a8c 100644 --- a/lnet/include/libcfs/kp30.h +++ b/lnet/include/libcfs/kp30.h @@ -437,6 +437,7 @@ extern int libcfs_ioctl_getdata(char *buf, char *end, void *arg); #define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE) #define IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE) #define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE) /* lnd ioctls */ #define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE) #define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE) diff --git a/lnet/klnds/iiblnd/iiblnd_cb.c b/lnet/klnds/iiblnd/iiblnd_cb.c index 8f0da57..ee6b4e2 100644 --- a/lnet/klnds/iiblnd/iiblnd_cb.c +++ b/lnet/klnds/iiblnd/iiblnd_cb.c @@ -834,6 +834,25 @@ kibnal_check_sends (kib_conn_t *conn) LASSERT (conn->ibc_credits >= 0); LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE); + if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) { + list_del (&tx->tx_list); + tx->tx_queued = 0; + tx->tx_status -ECONNABORTED; + tx->tx_waiting = 0; + done = (tx->tx_sending == 0); + if (!done) + list_add_tail(&tx->tx_list, + &conn->ibc_active_txs); + spin_unlock(&conn->ibc_lock); + + CDEBUG (D_NET, "Abort transmit to %s (closing)\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid)); + + if (done) + kibnal_tx_done(tx); + continue; + } + if (conn->ibc_nsends_posted == IBNAL_MSG_QUEUE_SIZE) { CDEBUG(D_NET, "%s: posted enough\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index 526c6d9..7a49faa 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -32,8 +32,7 @@ kqswnal_notify_peer_down(kqswnal_tx_t *ktx) do_gettimeofday (&now); then = now.tv_sec - (jiffies - ktx->ktx_launchtime)/HZ; - /* no auto-down for now */ - // lnet_notify(kqswnal_data.kqn_ni, ktx->ktx_nid, 0, then); + lnet_notify(kqswnal_data.kqn_ni, ktx->ktx_nid, 0, then); } void diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 4dae1667..4e3345d 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1323,6 +1323,34 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) } void +ksocknal_peer_failed (ksock_peer_t *peer) +{ + time_t last_alive = 0; + int notify = 0; + + /* There has been a connection failure or comms error; but I'll only + * tell LNET I think the peer is dead if there are no connections or + * connection attempts in existance. */ + + read_lock (&ksocknal_data.ksnd_global_lock); + + if (list_empty(&peer->ksnp_conns) && + peer->ksnp_accepting == 0 && + ksocknal_find_connecting_route_locked(peer) == NULL) { + notify = 1; + last_alive = cfs_time_current_sec() - + cfs_duration_sec(cfs_time_current() - + peer->ksnp_last_alive); + } + + read_unlock (&ksocknal_data.ksnd_global_lock); + + if (notify) + lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0, + last_alive); +} + +void ksocknal_terminate_conn (ksock_conn_t *conn) { /* This gets called by the reaper (guaranteed thread context) to @@ -1332,9 +1360,7 @@ ksocknal_terminate_conn (ksock_conn_t *conn) unsigned long flags; ksock_peer_t *peer = conn->ksnc_peer; ksock_sched_t *sched = conn->ksnc_scheduler; - struct timeval now; - time_t then = 0; - int notify = 0; + int failed = 0; LASSERT(conn->ksnc_closing); @@ -1368,12 +1394,7 @@ ksocknal_terminate_conn (ksock_conn_t *conn) if (peer->ksnp_error != 0) { /* peer's last conn closed in error */ LASSERT (list_empty (&peer->ksnp_conns)); - - /* convert peer's last-known-alive timestamp from jiffies */ - do_gettimeofday (&now); - then = now.tv_sec - cfs_duration_sec(cfs_time_sub(cfs_time_current(), - peer->ksnp_last_alive)); - notify = 1; + failed = 1; } write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); @@ -1385,9 +1406,8 @@ ksocknal_terminate_conn (ksock_conn_t *conn) * zero-copy transmits will therefore complete in finite time. */ ksocknal_connsock_decref(conn); - /* no auto-down for now */ - // if (notify) - // lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0, then); + if (failed) + ksocknal_peer_failed(peer); } void @@ -1864,7 +1884,7 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) case IOC_LIBCFS_ADD_PEER: { lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LUSTRE_SRV_LNET_PID}; + .pid = LUSTRE_SRV_LNET_PID}; return ksocknal_add_peer (ni, id, data->ioc_u32[0], /* IP */ data->ioc_u32[1]); /* port */ diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 8646e9c..dc1547a 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -434,7 +434,6 @@ ksocknal_peer_decref (ksock_peer_t *peer) ksocknal_destroy_peer (peer); } - int ksocknal_startup (lnet_ni_t *ni); void ksocknal_shutdown (lnet_ni_t *ni); int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); @@ -448,6 +447,7 @@ int ksocknal_accept(lnet_ni_t *ni, struct socket *sock); extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port); extern ksock_peer_t *ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id); extern ksock_peer_t *ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id); +extern void ksocknal_peer_failed (ksock_peer_t *peer); extern int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, struct socket *sock, int type); extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); @@ -463,6 +463,7 @@ extern void ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist); extern void ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive); extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg); extern void ksocknal_thread_fini (void); +extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_t *peer); extern int ksocknal_new_packet (ksock_conn_t *conn, int skip); extern int ksocknal_scheduler (void *arg); extern int ksocknal_connd (void *arg); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 462ed7f..fab6729 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -1739,6 +1739,7 @@ ksocknal_connect (ksock_route_t *route) #endif write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); + ksocknal_peer_failed(peer); ksocknal_txlist_done(peer->ksnp_ni, &zombies); } diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 660ed62..bb26d58 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -51,6 +51,7 @@ lnet_get_routes(void) char * lnet_get_networks(void) { + char *nets; int rc; if (*networks != 0 && *ip2nets != 0) { @@ -60,8 +61,8 @@ lnet_get_networks(void) } if (*ip2nets != 0) { - rc = lnet_parse_ip2nets(&networks, ip2nets); - return (rc == 0) ? networks : NULL; + rc = lnet_parse_ip2nets(&nets, ip2nets); + return (rc == 0) ? nets : NULL; } if (*networks != 0) @@ -1099,6 +1100,8 @@ LNetInit(void) lnet_assert_wire_constants (); LASSERT (!the_lnet.ln_init); + memset(&the_lnet, 0, sizeof(the_lnet)); + rc = lnet_get_portals_compatibility(); if (rc < 0) return rc; @@ -1157,6 +1160,12 @@ LNetNIInit(lnet_pid_t requested_pid) goto out; } + if (requested_pid == LNET_PID_ANY) { + /* Don't instantiate LNET just for me */ + rc = -ENETDOWN; + goto failed0; + } + rc = lnet_prepare(requested_pid); if (rc != 0) goto failed0; diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index 69e9003..730608a 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -25,75 +25,85 @@ #define DEBUG_SUBSYSTEM S_LNET #include -static int config_on_load = 1; +static int config_on_load = 0; CFS_MODULE_PARM(config_on_load, "i", int, 0444, "configure network at module load"); -static int lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data) +static struct semaphore lnet_config_mutex; + +int +lnet_configure (void *arg) { - int initrc; - int rc; - - if (cmd == IOC_LIBCFS_UNCONFIGURE) { - /* ghastly hack to prevent repeated net config */ - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - initrc = the_lnet.ln_niinit_self; - the_lnet.ln_niinit_self = 0; - rc = the_lnet.ln_refcount; - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - - if (initrc) { - rc--; - LNetNIFini(); + /* 'arg' only there so I can be passed to cfs_kernel_thread() */ + int rc = 0; + + LNET_MUTEX_DOWN(&lnet_config_mutex); + + if (!the_lnet.ln_niinit_self) { + rc = LNetNIInit(LUSTRE_SRV_LNET_PID); + if (rc >= 0) { + the_lnet.ln_niinit_self = 1; + rc = 0; } - - return rc == 0 ? 0 : -EBUSY; - } - - initrc = LNetNIInit(LUSTRE_SRV_LNET_PID); - if (initrc < 0) - RETURN (-ENETDOWN); - - rc = LNetCtl(cmd, data); - - if (initrc == 0) { - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - /* I instantiated the network */ - the_lnet.ln_niinit_self = 1; - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - } else { - LNetNIFini(); } - + + LNET_MUTEX_UP(&lnet_config_mutex); return rc; } -DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl); - int -lnet_configure (void *arg) +lnet_unconfigure (void) { - int rc; + int refcount; + + LNET_MUTEX_DOWN(&lnet_config_mutex); + + if (the_lnet.ln_niinit_self) + LNetNIFini(); LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - the_lnet.ln_niinit_self = 1; + refcount = the_lnet.ln_refcount; LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - rc = LNetNIInit(LUSTRE_SRV_LNET_PID); - if (rc < 0) { - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - the_lnet.ln_niinit_self = 0; - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - } + LNET_MUTEX_UP(&lnet_config_mutex); + return (refcount == 0) ? 0 : -EBUSY; +} + +int +lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data) +{ + int rc; + + switch (cmd) { + case IOC_LIBCFS_CONFIGURE: + return lnet_configure(NULL); - return 0; + case IOC_LIBCFS_UNCONFIGURE: + return lnet_unconfigure(); + + default: + /* Passing LNET_PID_ANY only gives me a ref if the net is up + * already; I'll need it to ensure the net can't go down while + * I'm called into it */ + rc = LNetNIInit(LNET_PID_ANY); + if (rc >= 0) { + rc = LNetCtl(cmd, data); + LNetNIFini(); + } + return rc; + } } -static int init_lnet(void) +DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl); + +int +init_lnet(void) { int rc; ENTRY; + init_mutex(&lnet_config_mutex); + rc = LNetInit(); if (rc != 0) { CERROR("LNetInit: error %d\n", rc); @@ -112,7 +122,8 @@ static int init_lnet(void) RETURN(0); } -static void fini_lnet(void) +void +fini_lnet(void) { int rc; @@ -163,4 +174,4 @@ MODULE_AUTHOR("Peter J. Braam "); MODULE_DESCRIPTION("Portals v3.1"); MODULE_LICENSE("GPL"); -cfs_module(portals, "1.0.0", init_lnet, fini_lnet); +cfs_module(lnet, "1.0.0", init_lnet, fini_lnet); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 9fb4f35..acc991e 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -142,7 +142,6 @@ lnet_find_peer_locked (lnet_nid_t nid) int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid) { - struct timeval now; lnet_peer_t *lp; lnet_peer_t *lp2; @@ -160,15 +159,13 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid) return -ENOMEM; } - do_gettimeofday (&now); - memset(lp, 0, sizeof(*lp)); /* zero counters etc */ CFS_INIT_LIST_HEAD(&lp->lp_txq); CFS_INIT_LIST_HEAD(&lp->lp_rtrq); lp->lp_alive = 1; - lp->lp_timestamp = now.tv_sec; + lp->lp_timestamp = cfs_time_current_sec(); lp->lp_nid = nid; lp->lp_refcount = 2; /* 1 for caller; 1 for hash */ diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 0acb9e3..c58aa0b 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -39,6 +39,10 @@ static int large_router_buffers = 32; CFS_MODULE_PARM(large_router_buffers, "i", int, 0444, "# of large messages to buffer in the router"); +static int auto_down_routers = 1; +CFS_MODULE_PARM(auto_down_routers, "i", int, 0444, + "Automatically mark routers down on comms error"); + typedef struct { work_struct_t kpru_tq; @@ -94,7 +98,7 @@ int lnet_notify (lnet_ni_t *ni, lnet_nid_t gateway_nid, int alive, time_t when) { lnet_peer_t *lp = NULL; - struct timeval now; + time_t now = cfs_time_current_sec(); CDEBUG (D_NET, "%s notifying %s: %s\n", (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid), @@ -110,13 +114,12 @@ lnet_notify (lnet_ni_t *ni, lnet_nid_t gateway_nid, int alive, time_t when) } /* can't do predictions... */ - do_gettimeofday (&now); - if (when > now.tv_sec) { + if (when > now) { CWARN ("Ignoring prediction from %s of %s %s " "%ld seconds in the future\n", (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid), libcfs_nid2str(gateway_nid), alive ? "up" : "down", - when - now.tv_sec); + when - now); return -EINVAL; } @@ -126,18 +129,26 @@ lnet_notify (lnet_ni_t *ni, lnet_nid_t gateway_nid, int alive, time_t when) if (lp == NULL) { /* gateway not found */ LNET_UNLOCK(); - CDEBUG (D_NET, "Gateway not found\n"); - return (0); + CDEBUG(D_NET, "Gateway not found\n"); + return 0; } if (when < lp->lp_timestamp) { /* out of date information */ lnet_peer_decref_locked(lp); LNET_UNLOCK(); - CDEBUG (D_NET, "Out of date\n"); - return (0); + CDEBUG(D_NET, "Out of date\n"); + return 0; } + if (ni != NULL && !alive && /* LND telling me she's down */ + !auto_down_routers) { /* auto-down disabled */ + lnet_peer_decref_locked(lp); + LNET_UNLOCK(); + CDEBUG(D_NET, "Auto-down disabled\n"); + return 0; + } + /* update timestamp */ lp->lp_timestamp = when; @@ -145,8 +156,8 @@ lnet_notify (lnet_ni_t *ni, lnet_nid_t gateway_nid, int alive, time_t when) /* new date for old news */ lnet_peer_decref_locked(lp); LNET_UNLOCK(); - CDEBUG (D_NET, "Old news\n"); - return (0); + CDEBUG(D_NET, "Old news\n"); + return 0; } lp->lp_alive = alive; @@ -156,14 +167,13 @@ lnet_notify (lnet_ni_t *ni, lnet_nid_t gateway_nid, int alive, time_t when) CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(gateway_nid), alive); if (ni == NULL) { - /* userland notified me: notify NAL? */ + /* userland notified me: notify LND? */ ni = lp->lp_ni; if (ni->ni_lnd->lnd_notify != NULL) { ni->ni_lnd->lnd_notify(ni, gateway_nid, alive); } } else { - /* It wasn't userland that notified me... */ - LBUG(); /* LND notification disabled for now */ + /* LND notified me: */ CWARN ("Upcall: NID %s is %s\n", libcfs_nid2str(gateway_nid), alive ? "alive" : "dead"); @@ -602,7 +612,6 @@ lnet_alloc_rtrpools(int im_a_router) if (!strcmp(forwarding, "")) { /* not set either way */ - forwarding = im_a_router ? "enabled(implicit)" : "disabled(default)"; if (!im_a_router) return 0; } else if (!strcmp(forwarding, "disabled")) { diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index b0397ec..ed3aa9f 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -277,7 +277,7 @@ int jt_ptl_network(int argc, char **argv) int rc; if (argc < 2) { - fprintf(stderr, "usage: %s |down\n", argv[0]); + fprintf(stderr, "usage: %s |up|down\n", argv[0]); return 0; } @@ -292,13 +292,28 @@ int jt_ptl_network(int argc, char **argv) } if (errno == EBUSY) - fprintf(stderr, "LNET busy"); + fprintf(stderr, "LNET busy\n"); else fprintf(stderr, "LNET unconfigure error %d: %s\n", errno, strerror(errno)); return -1; } + if (!strcmp(argv[1], "configure") || + !strcmp(argv[1], "up")) { + LIBCFS_IOC_INIT(data); + rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_CONFIGURE, &data); + + if (rc == 0) { + printf ("LNET configured\n"); + return 0; + } + + fprintf(stderr, "LNET configure error %d: %s\n", + errno, strerror(errno)); + return -1; + } + net = libcfs_str2net(argv[1]); if (net == LNET_NIDNET(LNET_NID_ANY)) { fprintf(stderr, "Can't parse net %s\n", argv[1]); diff --git a/lnet/utils/ptlctl.c b/lnet/utils/ptlctl.c index ba1ccd6..11dc075 100644 --- a/lnet/utils/ptlctl.c +++ b/lnet/utils/ptlctl.c @@ -29,7 +29,8 @@ command_t list[] = { - {"network", jt_ptl_network, 0,"select a network (args: LND name)"}, + {"network", jt_ptl_network, 0,"select/configure network (args: up|down|LND name)"}, + {"net", jt_ptl_network, 0,"select/configure network (args: up|down|LND name)"}, {"list_nids", jt_ptl_list_nids, 0,"list local NIDs"}, {"which_nid", jt_ptl_which_nid, 0,"select the closest NID"}, {"print_interfaces", jt_ptl_print_interfaces, 0, "print interface entries (no args)"}, -- 1.8.3.1