X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Flnet%2Frouter.c;h=072adcc2c3f2120b81a241793f32a5862d81567a;hb=df6cf859bbb29392064e6ddb701f3357e01b3a13;hp=1c0c3af92f885bd0580041ab76c6516d27f69cf6;hpb=bf5db236dd09f4e671f64123a7b54c115b626853;p=fs%2Flustre-release.git diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 1c0c3af..072adcc 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -1,10 +1,9 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2015, Intel Corporation. * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ + * This file is part of Lustre, https://wiki.hpdd.intel.com/ * * Portals is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -24,8 +23,6 @@ #define DEBUG_SUBSYSTEM S_LNET #include -#if defined(__KERNEL__) && defined(LNET_ROUTER) - #define LNET_NRB_TINY_MIN 512 /* min value for each CPT */ #define LNET_NRB_TINY (LNET_NRB_TINY_MIN * 4) #define LNET_NRB_SMALL_MIN 4096 /* min value for each CPT */ @@ -73,21 +70,12 @@ lnet_peer_buffer_credits(lnet_ni_t *ni) /* forward ref's */ static int lnet_router_checker(void *); -#else - -int -lnet_peer_buffer_credits(lnet_ni_t *ni) -{ - return 0; -} - -#endif static int check_routers_before_use = 0; CFS_MODULE_PARM(check_routers_before_use, "i", int, 0444, "Assume routers are down and ping them before use"); -static int avoid_asym_router_failure = 1; +int avoid_asym_router_failure = 1; CFS_MODULE_PARM(avoid_asym_router_failure, "i", int, 0644, "Avoid asymmetrical router failures (0 to disable)"); @@ -138,7 +126,7 @@ lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when) CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive); } -void +static void lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp) { int alive; @@ -148,7 +136,7 @@ lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp) * NB individual events can be missed; the only guarantee is that you * always get the most recent news */ - if (lp->lp_notifying) + if (lp->lp_notifying || ni == NULL) return; lp->lp_notifying = 1; @@ -248,10 +236,11 @@ lnet_find_net_locked (__u32 net) static void lnet_shuffle_seed(void) { - static int seeded = 0; - int lnd_type, seed[2]; - struct timeval tv; - lnet_ni_t *ni; + static int seeded; + __u32 lnd_type; + __u32 seed[2]; + struct timeval tv; + lnet_ni_t *ni; struct list_head *tmp; if (seeded) @@ -276,8 +265,8 @@ static void lnet_shuffle_seed(void) } /* NB expects LNET_LOCK held */ -void -lnet_add_route_to_rnet (lnet_remotenet_t *rnet, lnet_route_t *route) +static void +lnet_add_route_to_rnet(lnet_remotenet_t *rnet, lnet_route_t *route) { unsigned int len = 0; unsigned int offset = 0; @@ -327,7 +316,7 @@ lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway, return -EINVAL; if (lnet_islocalnet(net)) /* it's a local network */ - return 0; /* ignore the route entry */ + return -EEXIST; /* Assume net, route, all new */ LIBCFS_ALLOC(route, sizeof(*route)); @@ -358,7 +347,7 @@ lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway, LIBCFS_FREE(rnet, sizeof(*rnet)); if (rc == -EHOSTUNREACH) /* gateway is not on a local net. */ - return 0; /* ignore the route entry */ + return rc; /* ignore the route entry */ CERROR("Error %d creating route %s %d %s\n", rc, libcfs_net2str(net), hops, libcfs_nid2str(gateway)); @@ -406,13 +395,20 @@ lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway, lnet_peer_decref_locked(route->lr_gateway); lnet_net_unlock(LNET_LOCK_EX); - if (!add_route) + rc = 0; + + if (!add_route) { + rc = -EEXIST; LIBCFS_FREE(route, sizeof(*route)); + } if (rnet != rnet2) LIBCFS_FREE(rnet, sizeof(*rnet)); - return 0; + /* indicate to startup the router checker if configured */ + wake_up(&the_lnet.ln_rc_waitq); + + return rc; } int @@ -617,9 +613,7 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops, *hops = route->lr_hops; *priority = route->lr_priority; *gateway = route->lr_gateway->lp_nid; - *alive = - route->lr_gateway->lp_alive && - !route->lr_downis; + *alive = lnet_is_route_alive(route); lnet_net_unlock(cpt); return 0; } @@ -731,6 +725,11 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) rte->lr_downis = 0; continue; } + /* if @down is zero and this route is single-hop, it means + * we can't find NI for target network */ + if (down == 0 && rte->lr_hops == 1) + down = 1; + rte->lr_downis = down; } } @@ -788,7 +787,7 @@ lnet_router_checker_event(lnet_event_t *event) lnet_net_unlock(lp->lp_cpt); } -void +static void lnet_wait_known_routerstate(void) { lnet_peer_t *rtr; @@ -815,14 +814,27 @@ lnet_wait_known_routerstate(void) if (all_known) return; -#ifndef __KERNEL__ - lnet_router_checker(); -#endif - cfs_pause(cfs_time_seconds(1)); - } + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1)); + } } void +lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net) +{ + lnet_route_t *rte; + + if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) { + list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) { + if (rte->lr_net == net) { + rte->lr_downis = 0; + break; + } + } + } +} + +static void lnet_update_ni_status_locked(void) { lnet_ni_t *ni; @@ -862,7 +874,7 @@ lnet_update_ni_status_locked(void) } } -void +static void lnet_destroy_rc_data(lnet_rc_data_t *rcd) { LASSERT(list_empty(&rcd->rcd_list)); @@ -883,7 +895,7 @@ lnet_destroy_rc_data(lnet_rc_data_t *rcd) LIBCFS_FREE(rcd, sizeof(*rcd)); } -lnet_rc_data_t * +static lnet_rc_data_t * lnet_create_rc_data_locked(lnet_peer_t *gateway) { lnet_rc_data_t *rcd = NULL; @@ -1042,67 +1054,10 @@ int lnet_router_checker_start(void) { int rc; - int eqsz; -#ifdef __KERNEL__ + int eqsz = 0; struct task_struct *task; -#else /* __KERNEL__ */ - lnet_peer_t *rtr; - __u64 version; - int nrtr = 0; - int router_checker_max_eqsize = 10240; - - LASSERT (check_routers_before_use); - LASSERT (dead_router_check_interval > 0); - - lnet_net_lock(0); - - /* As an approximation, allow each router the same number of - * outstanding events as it is allowed outstanding sends */ - eqsz = 0; - version = the_lnet.ln_routers_version; - list_for_each_entry(rtr, &the_lnet.ln_routers, lp_rtr_list) { - lnet_ni_t *ni = rtr->lp_ni; - lnet_process_id_t id; - nrtr++; - eqsz += ni->ni_peertxcredits; - - /* one async ping reply per router */ - id.nid = rtr->lp_nid; - id.pid = LNET_PID_LUSTRE; - - lnet_net_unlock(0); - - rc = LNetSetAsync(id, 1); - if (rc != 0) { - CWARN("LNetSetAsync %s failed: %d\n", - libcfs_id2str(id), rc); - return rc; - } - - lnet_net_lock(0); - /* NB router list doesn't change in userspace */ - LASSERT(version == the_lnet.ln_routers_version); - } - - lnet_net_unlock(0); - - if (nrtr == 0) { - CDEBUG(D_NET, - "No router found, not starting router checker\n"); - return 0; - } - - /* at least allow a SENT and a REPLY per router */ - if (router_checker_max_eqsize < 2 * nrtr) - router_checker_max_eqsize = 2 * nrtr; - - LASSERT (eqsz > 0); - if (eqsz > router_checker_max_eqsize) - eqsz = router_checker_max_eqsize; -#endif - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); + LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); if (check_routers_before_use && dead_router_check_interval <= 0) { @@ -1112,29 +1067,15 @@ lnet_router_checker_start(void) return -EINVAL; } - if (!the_lnet.ln_routing && - live_router_check_interval <= 0 && - dead_router_check_interval <= 0) - return 0; - -#ifdef __KERNEL__ sema_init(&the_lnet.ln_rc_signal, 0); - /* EQ size doesn't matter; the callback is guaranteed to get every - * event */ - eqsz = 0; - rc = LNetEQAlloc(eqsz, lnet_router_checker_event, - &the_lnet.ln_rc_eqh); -#else - rc = LNetEQAlloc(eqsz, LNET_EQ_HANDLER_NONE, - &the_lnet.ln_rc_eqh); -#endif - if (rc != 0) { - CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc); - return -ENOMEM; - } - the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING; -#ifdef __KERNEL__ + rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh); + if (rc != 0) { + CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc); + return -ENOMEM; + } + + the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING; task = kthread_run(lnet_router_checker, NULL, "router_checker"); if (IS_ERR(task)) { rc = PTR_ERR(task); @@ -1146,7 +1087,6 @@ lnet_router_checker_start(void) the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; return -ENOMEM; } -#endif if (check_routers_before_use) { /* Note that a helpful side-effect of pinging all known routers @@ -1161,20 +1101,18 @@ lnet_router_checker_start(void) void lnet_router_checker_stop (void) { - int rc; + int rc; - if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN) - return; + if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN) + return; - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); + LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING; + /* wakeup the RC thread if it's sleeping */ + wake_up(&the_lnet.ln_rc_waitq); -#ifdef __KERNEL__ /* block until event callback signals exit */ down(&the_lnet.ln_rc_signal); -#else - lnet_router_checker(); -#endif LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); rc = LNetEQFree(the_lnet.ln_rc_eqh); @@ -1254,7 +1192,8 @@ lnet_prune_rc_data(int wait_unlink) i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, "Waiting for rc buffers to unlink\n"); - cfs_pause(cfs_time_seconds(1) / 4); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1) / 4); lnet_net_lock(LNET_LOCK_EX); } @@ -1262,20 +1201,40 @@ lnet_prune_rc_data(int wait_unlink) lnet_net_unlock(LNET_LOCK_EX); } +/* + * This function is called to check if the RC should block indefinitely. + * It's called from lnet_router_checker() as well as being passed to + * wait_event_interruptible() to avoid the lost wake_up problem. + * + * When it's called from wait_event_interruptible() it is necessary to + * also not sleep if the rc state is not running to avoid a deadlock + * when the system is shutting down + */ +static inline bool +lnet_router_checker_active(void) +{ + if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) + return true; + + /* Router Checker thread needs to run when routing is enabled in + * order to call lnet_update_ni_status_locked() */ + if (the_lnet.ln_routing) + return true; -#if defined(__KERNEL__) && defined(LNET_ROUTER) + return !list_empty(&the_lnet.ln_routers) && + (live_router_check_interval > 0 || + dead_router_check_interval > 0); +} static int lnet_router_checker(void *arg) { - lnet_peer_t *rtr; + lnet_peer_t *rtr; struct list_head *entry; - cfs_block_allsigs(); - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); + cfs_block_allsigs(); - while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) { + while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) { __u64 version; int cpt; int cpt2; @@ -1297,14 +1256,14 @@ rescan: goto rescan; } - lnet_ping_router_locked(rtr); + lnet_ping_router_locked(rtr); - /* NB dropped lock */ - if (version != the_lnet.ln_routers_version) { - /* the routers list has changed */ - goto rescan; - } - } + /* NB dropped lock */ + if (version != the_lnet.ln_routers_version) { + /* the routers list has changed */ + goto rescan; + } + } if (the_lnet.ln_routing) lnet_update_ni_status_locked(); @@ -1313,15 +1272,21 @@ rescan: lnet_prune_rc_data(0); /* don't wait for UNLINK */ - /* Call cfs_pause() here always adds 1 to load average + /* Call schedule_timeout() here always adds 1 to load average * because kernel counts # active tasks as nr_running * + nr_uninterruptible. */ - schedule_timeout_and_set_state(TASK_INTERRUPTIBLE, - cfs_time_seconds(1)); + /* if there are any routes then wakeup every second. If + * there are no routes then sleep indefinitely until woken + * up by a user adding a route */ + if (!lnet_router_checker_active()) + wait_event_interruptible(the_lnet.ln_rc_waitq, + lnet_router_checker_active()); + else + wait_event_interruptible_timeout(the_lnet.ln_rc_waitq, + false, + cfs_time_seconds(1)); } - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING); - lnet_prune_rc_data(1); /* wait for UNLINK */ the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; @@ -1341,7 +1306,7 @@ lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages) LIBCFS_FREE(rb, sz); } -lnet_rtrbuf_t * +static lnet_rtrbuf_t * lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt) { int npages = rbp->rbp_npages; @@ -1375,7 +1340,7 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt) return rb; } -void +static void lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp, int cpt) { int npages = rbp->rbp_npages; @@ -1390,6 +1355,7 @@ lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp, int cpt) lnet_net_lock(cpt); lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt); list_splice_init(&rbp->rbp_bufs, &tmp); + rbp->rbp_req_nbuffers = 0; rbp->rbp_nbuffers = rbp->rbp_credits = 0; rbp->rbp_mincredits = 0; lnet_net_unlock(cpt); @@ -1409,37 +1375,50 @@ lnet_rtrpool_adjust_bufs(lnet_rtrbufpool_t *rbp, int nbufs, int cpt) lnet_rtrbuf_t *rb; int num_rb; int num_buffers = 0; + int old_req_nbufs; int npages = rbp->rbp_npages; + lnet_net_lock(cpt); /* If we are called for less buffers than already in the pool, we - * just lower the nbuffers number and excess buffers will be + * just lower the req_nbuffers number and excess buffers will be * thrown away as they are returned to the free list. Credits - * then get adjusted as well. */ - if (nbufs <= rbp->rbp_nbuffers) { - lnet_net_lock(cpt); - rbp->rbp_nbuffers = nbufs; + * then get adjusted as well. + * If we already have enough buffers allocated to serve the + * increase requested, then we can treat that the same way as we + * do the decrease. */ + num_rb = nbufs - rbp->rbp_nbuffers; + if (nbufs <= rbp->rbp_req_nbuffers || num_rb <= 0) { + rbp->rbp_req_nbuffers = nbufs; lnet_net_unlock(cpt); return 0; } + /* store the older value of rbp_req_nbuffers and then set it to + * the new request to prevent lnet_return_rx_credits_locked() from + * freeing buffers that we need to keep around */ + old_req_nbufs = rbp->rbp_req_nbuffers; + rbp->rbp_req_nbuffers = nbufs; + lnet_net_unlock(cpt); INIT_LIST_HEAD(&rb_list); /* allocate the buffers on a local list first. If all buffers are * allocated successfully then join this list to the rbp buffer * list. If not then free all allocated buffers. */ - num_rb = rbp->rbp_nbuffers; - - while (num_rb < nbufs) { + while (num_rb-- > 0) { rb = lnet_new_rtrbuf(rbp, cpt); if (rb == NULL) { CERROR("Failed to allocate %d route bufs of %d pages\n", nbufs, npages); + + lnet_net_lock(cpt); + rbp->rbp_req_nbuffers = old_req_nbufs; + lnet_net_unlock(cpt); + goto failed; } list_add(&rb->rb_list, &rb_list); num_buffers++; - num_rb++; } lnet_net_lock(cpt); @@ -1468,15 +1447,15 @@ failed: return -ENOMEM; } -void +static void lnet_rtrpool_init(lnet_rtrbufpool_t *rbp, int npages) { INIT_LIST_HEAD(&rbp->rbp_msgs); INIT_LIST_HEAD(&rbp->rbp_bufs); - rbp->rbp_npages = npages; - rbp->rbp_credits = 0; - rbp->rbp_mincredits = 0; + rbp->rbp_npages = npages; + rbp->rbp_credits = 0; + rbp->rbp_mincredits = 0; } void @@ -1809,133 +1788,3 @@ lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when) return 0; } EXPORT_SYMBOL(lnet_notify); - -void -lnet_get_tunables (void) -{ - return; -} - -#else - -int -lnet_notify (lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when) -{ - return -EOPNOTSUPP; -} - -void -lnet_router_checker (void) -{ - static time_t last = 0; - static int running = 0; - - time_t now = cfs_time_current_sec(); - int interval = now - last; - int rc; - __u64 version; - lnet_peer_t *rtr; - - /* It's no use to call me again within a sec - all intervals and - * timeouts are measured in seconds */ - if (last != 0 && interval < 2) - return; - - if (last != 0 && - interval > MAX(live_router_check_interval, - dead_router_check_interval)) - CNETERR("Checker(%d/%d) not called for %d seconds\n", - live_router_check_interval, dead_router_check_interval, - interval); - - LASSERT(LNET_CPT_NUMBER == 1); - - lnet_net_lock(0); - LASSERT(!running); /* recursion check */ - running = 1; - lnet_net_unlock(0); - - last = now; - - if (the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING) - lnet_prune_rc_data(0); /* unlink all rcd and nowait */ - - /* consume all pending events */ - while (1) { - int i; - lnet_event_t ev; - - /* NB ln_rc_eqh must be the 1st in 'eventqs' otherwise the - * recursion breaker in LNetEQPoll would fail */ - rc = LNetEQPoll(&the_lnet.ln_rc_eqh, 1, 0, &ev, &i); - if (rc == 0) /* no event pending */ - break; - - /* NB a lost SENT prevents me from pinging a router again */ - if (rc == -EOVERFLOW) { - CERROR("Dropped an event!!!\n"); - abort(); - } - - LASSERT (rc == 1); - - lnet_router_checker_event(&ev); - } - - if (the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING) { - lnet_prune_rc_data(1); /* release rcd */ - the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; - running = 0; - return; - } - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); - - lnet_net_lock(0); - - version = the_lnet.ln_routers_version; - list_for_each_entry(rtr, &the_lnet.ln_routers, lp_rtr_list) { - lnet_ping_router_locked(rtr); - LASSERT(version == the_lnet.ln_routers_version); - } - - lnet_net_unlock(0); - - running = 0; /* lock only needed for the recursion check */ - return; -} - -/* NB lnet_peers_start_down depends on me, - * so must be called before any peer creation */ -void -lnet_get_tunables (void) -{ - char *s; - - s = getenv("LNET_ROUTER_PING_TIMEOUT"); - if (s != NULL) router_ping_timeout = atoi(s); - - s = getenv("LNET_LIVE_ROUTER_CHECK_INTERVAL"); - if (s != NULL) live_router_check_interval = atoi(s); - - s = getenv("LNET_DEAD_ROUTER_CHECK_INTERVAL"); - if (s != NULL) dead_router_check_interval = atoi(s); - - /* This replaces old lnd_notify mechanism */ - check_routers_before_use = 1; - if (dead_router_check_interval <= 0) - dead_router_check_interval = 30; -} - -void -lnet_rtrpools_free(int keep_pools) -{ -} - -int -lnet_rtrpools_alloc(int im_a_arouter) -{ - return 0; -} - -#endif