X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Flnet%2Fapi-ni.c;h=0d1e7ec1fe0afb9b018aeffcdf7c78f3bea9bbda;hp=5ecf560cf32ee5d604a12d48b584cb6542542cfe;hb=c9501b87d0e06c36b180b80c08ca79b672f20c72;hpb=a3488d2f33e3ba4ae1d9c127132a6b75b5bb0112 diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 5ecf560..0d1e7ec 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -27,7 +27,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -757,6 +757,7 @@ lnet_prepare(lnet_pid_t requested_pid) INIT_LIST_HEAD(&the_lnet.ln_nis_zombie); INIT_LIST_HEAD(&the_lnet.ln_routers); INIT_LIST_HEAD(&the_lnet.ln_drop_rules); + INIT_LIST_HEAD(&the_lnet.ln_delay_rules); rc = lnet_create_remote_nets_table(); if (rc != 0) @@ -1269,6 +1270,20 @@ lnet_ni_tq_credits(lnet_ni_t *ni) } static void +lnet_ni_unlink_locked(lnet_ni_t *ni) +{ + if (!list_empty(&ni->ni_cptlist)) { + list_del_init(&ni->ni_cptlist); + lnet_ni_decref_locked(ni, 0); + } + + /* move it to zombie list and nobody can find it anymore */ + LASSERT(!list_empty(&ni->ni_list)); + list_move(&ni->ni_list, &the_lnet.ln_nis_zombie); + lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */ +} + +static void lnet_clear_zombies_nis_locked(void) { int i; @@ -1347,14 +1362,7 @@ lnet_shutdown_lndnis(void) while (!list_empty(&the_lnet.ln_nis)) { ni = list_entry(the_lnet.ln_nis.next, lnet_ni_t, ni_list); - /* move it to zombie list and nobody can find it anymore */ - list_move(&ni->ni_list, &the_lnet.ln_nis_zombie); - lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */ - - if (!list_empty(&ni->ni_cptlist)) { - list_del_init(&ni->ni_cptlist); - lnet_ni_decref_locked(ni, 0); - } + lnet_ni_unlink_locked(ni); } /* Drop the cached eqwait NI. */ @@ -1387,256 +1395,228 @@ lnet_shutdown_lndnis(void) lnet_net_unlock(LNET_LOCK_EX); } -int -lnet_shutdown_lndni(__u32 net) +/* shutdown down the NI and release refcount */ +static void +lnet_shutdown_lndni(struct lnet_ni *ni) { - lnet_ping_info_t *pinfo; - lnet_handle_md_t md_handle; - lnet_ni_t *found_ni = NULL; - int ni_count; - int rc; - - if (LNET_NETTYP(net) == LOLND) - return -EINVAL; - - ni_count = lnet_get_ni_count(); - - /* create and link a new ping info, before removing the old one */ - rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count - 1, false); - if (rc != 0) - return rc; - - /* proceed with shutting down the NI */ lnet_net_lock(LNET_LOCK_EX); - - found_ni = lnet_net2ni_locked(net, 0); - if (found_ni == NULL) { - lnet_net_unlock(LNET_LOCK_EX); - lnet_ping_md_unlink(pinfo, &md_handle); - lnet_ping_info_free(pinfo); - return -EINVAL; - } - - /* decrement the reference counter on found_ni which was - * incremented when we called lnet_net2ni_locked() */ - lnet_ni_decref_locked(found_ni, 0); - - /* Move ni to zombie list so nobody can find it anymore */ - list_move(&found_ni->ni_list, &the_lnet.ln_nis_zombie); - - /* Drop the lock reference for the ln_nis ref. */ - lnet_ni_decref_locked(found_ni, 0); - - if (!list_empty(&found_ni->ni_cptlist)) { - list_del_init(&found_ni->ni_cptlist); - lnet_ni_decref_locked(found_ni, 0); - } - + lnet_ni_unlink_locked(ni); lnet_net_unlock(LNET_LOCK_EX); /* Do peer table cleanup for this ni */ - lnet_peer_tables_cleanup(found_ni); + lnet_peer_tables_cleanup(ni); lnet_net_lock(LNET_LOCK_EX); lnet_clear_zombies_nis_locked(); lnet_net_unlock(LNET_LOCK_EX); - - lnet_ping_target_update(pinfo, md_handle); - - return 0; } static int -lnet_startup_lndnis(struct list_head *nilist, __s32 peer_timeout, - __s32 peer_cr, __s32 peer_buf_cr, __s32 credits, - int *ni_count) +lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout, + __s32 peer_cr, __s32 peer_buf_cr, __s32 credits) { int rc = 0; - struct lnet_ni *ni; int lnd_type; lnd_t *lnd; struct lnet_tx_queue *tq; int i; - while (!list_empty(nilist)) { - ni = list_entry(nilist->next, lnet_ni_t, ni_list); - lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); - - if (!libcfs_isknown_lnd(lnd_type)) - goto failed; + lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); - if (lnd_type == CIBLND || - lnd_type == OPENIBLND || - lnd_type == IIBLND || - lnd_type == VIBLND) { - CERROR("LND %s obsoleted\n", - libcfs_lnd2str(lnd_type)); - goto failed; - } + LASSERT(libcfs_isknown_lnd(lnd_type)); - /* Make sure this new NI is unique. */ - lnet_net_lock(LNET_LOCK_EX); - if (!lnet_net_unique(LNET_NIDNET(ni->ni_nid), - &the_lnet.ln_nis)) { - if (lnd_type == LOLND) { - lnet_net_unlock(LNET_LOCK_EX); - list_del(&ni->ni_list); - lnet_ni_free(ni); - continue; - } + if (lnd_type == CIBLND || lnd_type == OPENIBLND || + lnd_type == IIBLND || lnd_type == VIBLND) { + CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type)); + goto failed0; + } - CERROR("Net %s is not unique\n", - libcfs_net2str(LNET_NIDNET(ni->ni_nid))); + /* Make sure this new NI is unique. */ + lnet_net_lock(LNET_LOCK_EX); + if (!lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis)) { + if (lnd_type == LOLND) { lnet_net_unlock(LNET_LOCK_EX); - goto failed; + lnet_ni_free(ni); + return 0; } lnet_net_unlock(LNET_LOCK_EX); - LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex); - lnd = lnet_find_lnd_by_type(lnd_type); + CERROR("Net %s is not unique\n", + libcfs_net2str(LNET_NIDNET(ni->ni_nid))); + goto failed0; + } + lnet_net_unlock(LNET_LOCK_EX); + + LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex); + lnd = lnet_find_lnd_by_type(lnd_type); #ifdef __KERNEL__ + if (lnd == NULL) { + LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); + rc = request_module("%s", + libcfs_lnd2modname(lnd_type)); + LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex); + + lnd = lnet_find_lnd_by_type(lnd_type); if (lnd == NULL) { LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); - rc = request_module("%s", - libcfs_lnd2modname(lnd_type)); - LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex); - - lnd = lnet_find_lnd_by_type(lnd_type); - if (lnd == NULL) { - LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); - CERROR("Can't load LND %s, module %s, rc=%d\n", - libcfs_lnd2str(lnd_type), - libcfs_lnd2modname(lnd_type), rc); + CERROR("Can't load LND %s, module %s, rc=%d\n", + libcfs_lnd2str(lnd_type), + libcfs_lnd2modname(lnd_type), rc); #ifndef HAVE_MODULE_LOADING_SUPPORT - LCONSOLE_ERROR_MSG(0x104, "Your kernel must be " - "compiled with kernel module " - "loading support."); + LCONSOLE_ERROR_MSG(0x104, "Your kernel must be " + "compiled with kernel module " + "loading support."); #endif - goto failed; - } + goto failed0; } + } #else - if (lnd == NULL) { - LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); - CERROR("LND %s not supported\n", - libcfs_lnd2str(lnd_type)); - goto failed; - } + if (lnd == NULL) { + LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); + CERROR("LND %s not supported\n", + libcfs_lnd2str(lnd_type)); + goto failed0; + } #endif - lnet_net_lock(LNET_LOCK_EX); - lnd->lnd_refcount++; - lnet_net_unlock(LNET_LOCK_EX); - - ni->ni_lnd = lnd; - - rc = (lnd->lnd_startup)(ni); + lnet_net_lock(LNET_LOCK_EX); + lnd->lnd_refcount++; + lnet_net_unlock(LNET_LOCK_EX); - LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); + ni->ni_lnd = lnd; - if (rc != 0) { - LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s" - "\n", - rc, libcfs_lnd2str(lnd->lnd_type)); - lnet_net_lock(LNET_LOCK_EX); - lnd->lnd_refcount--; - lnet_net_unlock(LNET_LOCK_EX); - goto failed; - } + rc = (lnd->lnd_startup)(ni); - /* If given some LND tunable parameters, parse those now to - * override the values in the NI structure. */ - if (peer_buf_cr >= 0) - ni->ni_peerrtrcredits = peer_buf_cr; - if (peer_timeout >= 0) - ni->ni_peertimeout = peer_timeout; - /* - * TODO - * Note: For now, don't allow the user to change - * peertxcredits as this number is used in the - * IB LND to control queue depth. - * if (peer_cr != -1) - * ni->ni_peertxcredits = peer_cr; - */ - if (credits >= 0) - ni->ni_maxtxcredits = credits; + LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); - LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL); + if (rc != 0) { + LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n", + rc, libcfs_lnd2str(lnd->lnd_type)); + lnet_net_lock(LNET_LOCK_EX); + lnd->lnd_refcount--; + lnet_net_unlock(LNET_LOCK_EX); + goto failed0; + } - list_del(&ni->ni_list); + /* If given some LND tunable parameters, parse those now to + * override the values in the NI structure. */ + if (peer_buf_cr >= 0) + ni->ni_peerrtrcredits = peer_buf_cr; + if (peer_timeout >= 0) + ni->ni_peertimeout = peer_timeout; + /* + * TODO + * Note: For now, don't allow the user to change + * peertxcredits as this number is used in the + * IB LND to control queue depth. + * if (peer_cr != -1) + * ni->ni_peertxcredits = peer_cr; + */ + if (credits >= 0) + ni->ni_maxtxcredits = credits; + + LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL); - lnet_net_lock(LNET_LOCK_EX); - /* refcount for ln_nis */ + lnet_net_lock(LNET_LOCK_EX); + /* refcount for ln_nis */ + lnet_ni_addref_locked(ni, 0); + list_add_tail(&ni->ni_list, &the_lnet.ln_nis); + if (ni->ni_cpts != NULL) { lnet_ni_addref_locked(ni, 0); - list_add_tail(&ni->ni_list, &the_lnet.ln_nis); - if (ni->ni_cpts != NULL) { - list_add_tail(&ni->ni_cptlist, - &the_lnet.ln_nis_cpt); - lnet_ni_addref_locked(ni, 0); - } + list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt); + } - lnet_net_unlock(LNET_LOCK_EX); + lnet_net_unlock(LNET_LOCK_EX); - /* increment the ni_count here to account for the LOLND as - * well. If we increment past this point then the number - * of count will be missing the LOLND, and then ping and - * will not report the LOLND - */ - if (ni_count != NULL) - (*ni_count)++; + if (lnd->lnd_type == LOLND) { + lnet_ni_addref(ni); + LASSERT(the_lnet.ln_loni == NULL); + the_lnet.ln_loni = ni; + return 0; + } - if (lnd->lnd_type == LOLND) { +#ifndef __KERNEL__ + if (lnd->lnd_wait != NULL) { + if (the_lnet.ln_eq_waitni == NULL) { lnet_ni_addref(ni); - LASSERT(the_lnet.ln_loni == NULL); - the_lnet.ln_loni = ni; - continue; + the_lnet.ln_eq_waitni = ni; } - -#ifndef __KERNEL__ - if (lnd->lnd_wait != NULL) { - if (the_lnet.ln_eq_waitni == NULL) { - lnet_ni_addref(ni); - the_lnet.ln_eq_waitni = ni; - } - } else { + } else { # ifndef HAVE_LIBPTHREAD - LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a " - "single-threaded runtime\n", - libcfs_lnd2str(lnd_type)); - goto failed; + LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a " + "single-threaded runtime\n", + libcfs_lnd2str(lnd_type)); + /* shutdown the NI since if we get here then it must've already + * been started + */ + lnet_shutdown_lndni(ni); + return -EINVAL; # endif - } + } #endif - if (ni->ni_peertxcredits == 0 || - ni->ni_maxtxcredits == 0) { - LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n", - libcfs_lnd2str(lnd->lnd_type), - ni->ni_peertxcredits == 0 ? - "" : "per-peer "); - goto failed; - } - - cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { - tq->tq_credits_min = - tq->tq_credits_max = - tq->tq_credits = lnet_ni_tq_credits(ni); - } + if (ni->ni_peertxcredits == 0 || ni->ni_maxtxcredits == 0) { + LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n", + libcfs_lnd2str(lnd->lnd_type), + ni->ni_peertxcredits == 0 ? + "" : "per-peer "); + /* shutdown the NI since if we get here then it must've already + * been started + */ + lnet_shutdown_lndni(ni); + return -EINVAL; + } - CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n", - libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, - lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER, - ni->ni_peerrtrcredits, ni->ni_peertimeout); + cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { + tq->tq_credits_min = + tq->tq_credits_max = + tq->tq_credits = lnet_ni_tq_credits(ni); } + CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n", + libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, + lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER, + ni->ni_peerrtrcredits, ni->ni_peertimeout); + return 0; -failed: +failed0: + lnet_ni_free(ni); + return -EINVAL; +} + +static int +lnet_startup_lndnis(struct list_head *nilist) +{ + struct lnet_ni *ni; + int rc; + int lnd_type; + int ni_count = 0; + while (!list_empty(nilist)) { ni = list_entry(nilist->next, lnet_ni_t, ni_list); list_del(&ni->ni_list); - lnet_ni_free(ni); + rc = lnet_startup_lndni(ni, -1, -1, -1, -1); + + if (rc < 0) + goto failed; + + ni_count++; } - return -EINVAL; + + if (the_lnet.ln_eq_waitni != NULL && ni_count > 1) { + lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type; + LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network" + "\n", + libcfs_lnd2str(lnd_type)); + rc = -EINVAL; + goto failed; + } + + return ni_count; +failed: + lnet_shutdown_lndnis(); + + return rc; } /** @@ -1760,10 +1740,8 @@ int LNetNIInit(lnet_pid_t requested_pid) { int im_a_router = 0; - int rc; - int ni_count = 0; - int lnd_type; - struct lnet_ni *ni; + int rc, rc2; + int ni_count; lnet_ping_info_t *pinfo; lnet_handle_md_t md_handle; struct list_head net_head; @@ -1782,38 +1760,49 @@ LNetNIInit(lnet_pid_t requested_pid) } rc = lnet_prepare(requested_pid); - if (rc != 0) - goto failed0; + if (rc != 0) { + LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex); + return rc; + } - rc = lnet_parse_networks(&net_head, - !the_lnet.ln_nis_from_mod_params ? - lnet_get_networks() : ""); - if (rc < 0) - goto failed1; + /* Add in the loopback network */ + if (lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head) == NULL) { + rc = -ENOMEM; + goto failed0; + } - rc = lnet_startup_lndnis(&net_head, -1, -1, -1, -1, &ni_count); - if (rc != 0) - goto failed1; + /* If LNet is being initialized via DLC it is possible + * that the user requests not to load module parameters (ones which + * are supported by DLC) on initialization. Therefore, make sure not + * to load networks, routes and forwarding from module parameters + * in this case. On cleanup in case of failure only clean up + * routes if it has been loaded */ + if (!the_lnet.ln_nis_from_mod_params) { + rc = lnet_parse_networks(&net_head, + lnet_get_networks()); + if (rc < 0) + goto failed0; + } - if (the_lnet.ln_eq_waitni != NULL && ni_count > 1) { - lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type; - LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network" - "\n", - libcfs_lnd2str(lnd_type)); - goto failed2; + ni_count = lnet_startup_lndnis(&net_head); + if (ni_count < 0) { + rc = ni_count; + goto failed0; } - rc = lnet_parse_routes(lnet_get_routes(), &im_a_router); - if (rc != 0) - goto failed2; + if (!the_lnet.ln_nis_from_mod_params) { + rc = lnet_parse_routes(lnet_get_routes(), &im_a_router); + if (rc != 0) + goto failed1; - rc = lnet_check_routes(); - if (rc != 0) - goto failed2; + rc = lnet_check_routes(); + if (rc != 0) + goto failed2; - rc = lnet_rtrpools_alloc(im_a_router); - if (rc != 0) - goto failed2; + rc = lnet_rtrpools_alloc(im_a_router); + if (rc != 0) + goto failed2; + } rc = lnet_acceptor_start(); if (rc != 0) @@ -1839,22 +1828,24 @@ LNetNIInit(lnet_pid_t requested_pid) return 0; failed4: - the_lnet.ln_refcount = 0; lnet_ping_md_unlink(pinfo, &md_handle); lnet_ping_info_free(pinfo); + rc2 = LNetEQFree(the_lnet.ln_ping_target_eq); + LASSERT(rc2 == 0); failed3: + the_lnet.ln_refcount = 0; lnet_acceptor_stop(); - rc = LNetEQFree(the_lnet.ln_ping_target_eq); - LASSERT(rc == 0); failed2: - lnet_destroy_routes(); - lnet_shutdown_lndnis(); + if (!the_lnet.ln_nis_from_mod_params) + lnet_destroy_routes(); failed1: - lnet_unprepare(); + lnet_shutdown_lndnis(); failed0: + lnet_unprepare(); LASSERT(rc < 0); LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex); while (!list_empty(&net_head)) { + struct lnet_ni *ni; ni = list_entry(net_head.next, struct lnet_ni, ni_list); list_del_init(&ni->ni_list); lnet_ni_free(ni); @@ -2007,8 +1998,8 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets, /* Create a ni structure for the network string */ rc = lnet_parse_networks(&net_head, nets); - if (rc < 0) - return rc; + if (rc <= 0) + return rc == 0 ? -EINVAL : rc; LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex); @@ -2022,8 +2013,11 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets, if (rc != 0) goto failed0; - rc = lnet_startup_lndnis(&net_head, peer_timeout, peer_cr, - peer_buf_cr, credits, NULL); + ni = list_entry(net_head.next, struct lnet_ni, ni_list); + list_del_init(&ni->ni_list); + + rc = lnet_startup_lndni(ni, peer_timeout, peer_cr, + peer_buf_cr, credits); if (rc != 0) goto failed1; @@ -2048,10 +2042,38 @@ failed0: int lnet_dyn_del_ni(__u32 net) { - int rc; + lnet_ni_t *ni; + lnet_ping_info_t *pinfo; + lnet_handle_md_t md_handle; + int rc; + + /* don't allow userspace to shutdown the LOLND */ + if (LNET_NETTYP(net) == LOLND) + return -EINVAL; LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex); - rc = lnet_shutdown_lndni(net); + /* create and link a new ping info, before removing the old one */ + rc = lnet_ping_info_setup(&pinfo, &md_handle, + lnet_get_ni_count() - 1, false); + if (rc != 0) + goto out; + + ni = lnet_net2ni(net); + if (ni == NULL) { + rc = -EINVAL; + goto failed; + } + + /* decrement the reference counter taken by lnet_net2ni() */ + lnet_ni_decref_locked(ni, 0); + + lnet_shutdown_lndni(ni); + lnet_ping_target_update(pinfo, md_handle); + goto out; +failed: + lnet_ping_md_unlink(pinfo, &md_handle); + lnet_ping_info_free(pinfo); +out: LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex); return rc;