4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2014, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
35 #define DEBUG_SUBSYSTEM S_LNET
37 #include <lnet/lib-lnet.h>
38 #include <lnet/lib-dlc.h>
41 lnet_peer_tables_create(void)
43 struct lnet_peer_table *ptable;
44 struct list_head *hash;
48 the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
50 if (the_lnet.ln_peer_tables == NULL) {
51 CERROR("Failed to allocate cpu-partition peer tables\n");
55 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
56 LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i,
57 LNET_PEER_HASH_SIZE * sizeof(*hash));
59 CERROR("Failed to create peer hash table\n");
60 lnet_peer_tables_destroy();
64 for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
65 INIT_LIST_HEAD(&hash[j]);
66 ptable->pt_hash = hash; /* sign of initialization */
73 lnet_peer_tables_destroy(void)
75 struct lnet_peer_table *ptable;
76 struct list_head *hash;
80 if (the_lnet.ln_peer_tables == NULL)
83 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
84 hash = ptable->pt_hash;
85 if (hash == NULL) /* not intialized */
88 ptable->pt_hash = NULL;
89 for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
90 LASSERT(list_empty(&hash[j]));
92 LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
95 cfs_percpt_free(the_lnet.ln_peer_tables);
96 the_lnet.ln_peer_tables = NULL;
100 lnet_peer_table_cleanup_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable)
103 struct lnet_peer_ni *lp;
104 struct lnet_peer_ni *tmp;
106 for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
107 list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
109 if (ni != NULL && ni->ni_net != lp->lpni_net)
111 list_del_init(&lp->lpni_hashlist);
112 /* Lose hash table's ref */
113 ptable->pt_zombies++;
114 lnet_peer_ni_decref_locked(lp);
120 lnet_peer_table_finalize_wait_locked(struct lnet_peer_table *ptable,
125 for (i = 3; ptable->pt_zombies != 0; i++) {
126 lnet_net_unlock(cpt_locked);
130 "Waiting for %d zombies on peer table\n",
133 set_current_state(TASK_UNINTERRUPTIBLE);
134 schedule_timeout(cfs_time_seconds(1) >> 1);
135 lnet_net_lock(cpt_locked);
140 lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable,
143 struct lnet_peer_ni *lp;
144 struct lnet_peer_ni *tmp;
148 for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
149 list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
151 if (ni->ni_net != lp->lpni_net)
154 if (lp->lpni_rtr_refcount == 0)
157 lpni_nid = lp->lpni_nid;
159 lnet_net_unlock(cpt_locked);
160 lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lpni_nid);
161 lnet_net_lock(cpt_locked);
167 lnet_peer_tables_cleanup(lnet_ni_t *ni)
170 struct lnet_peer_table *ptable;
172 LASSERT(the_lnet.ln_shutdown || ni != NULL);
173 /* If just deleting the peers for a NI, get rid of any routes these
174 * peers are gateways for. */
175 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
176 lnet_net_lock(LNET_LOCK_EX);
177 lnet_peer_table_del_rtrs_locked(ni, ptable, i);
178 lnet_net_unlock(LNET_LOCK_EX);
181 /* Start the cleanup process */
182 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
183 lnet_net_lock(LNET_LOCK_EX);
184 lnet_peer_table_cleanup_locked(ni, ptable);
185 lnet_net_unlock(LNET_LOCK_EX);
188 /* Wait until all peers have been destroyed. */
189 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
190 lnet_net_lock(LNET_LOCK_EX);
191 lnet_peer_table_finalize_wait_locked(ptable, i);
192 lnet_net_unlock(LNET_LOCK_EX);
196 static struct lnet_peer_ni *
197 lnet_get_peer_ni_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
199 struct list_head *peers;
200 struct lnet_peer_ni *lp;
202 LASSERT(!the_lnet.ln_shutdown);
204 peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
205 list_for_each_entry(lp, peers, lpni_hashlist) {
206 if (lp->lpni_nid == nid) {
207 lnet_peer_ni_addref_locked(lp);
215 struct lnet_peer_ni *
216 lnet_find_peer_ni_locked(lnet_nid_t nid, int cpt)
218 struct lnet_peer_ni *lpni;
219 struct lnet_peer_table *ptable;
221 ptable = the_lnet.ln_peer_tables[cpt];
222 lpni = lnet_get_peer_ni_locked(ptable, nid);
228 lnet_find_or_create_peer_locked(lnet_nid_t dst_nid, int cpt, struct lnet_peer **peer)
230 struct lnet_peer_ni *lpni;
232 lpni = lnet_find_peer_ni_locked(dst_nid, cpt);
235 rc = lnet_nid2peerni_locked(&lpni, dst_nid, cpt);
240 *peer = lpni->lpni_peer_net->lpn_peer;
241 lnet_peer_ni_decref_locked(lpni);
246 struct lnet_peer_ni *
247 lnet_get_next_peer_ni_locked(struct lnet_peer *peer,
248 struct lnet_peer_net *peer_net,
249 struct lnet_peer_ni *prev)
251 struct lnet_peer_ni *lpni;
252 struct lnet_peer_net *net = peer_net;
256 net = list_entry(peer->lp_peer_nets.next,
257 struct lnet_peer_net,
259 lpni = list_entry(net->lpn_peer_nis.next, struct lnet_peer_ni,
260 lpni_on_peer_net_list);
265 if (prev->lpni_on_peer_net_list.next ==
266 &prev->lpni_peer_net->lpn_peer_nis) {
268 * if you reached the end of the peer ni list and the peer
269 * net is specified then there are no more peer nis in that
276 * we reached the end of this net ni list. move to the
279 if (prev->lpni_peer_net->lpn_on_peer_list.next ==
281 /* no more nets and no more NIs. */
284 /* get the next net */
285 net = list_entry(prev->lpni_peer_net->lpn_on_peer_list.next,
286 struct lnet_peer_net,
288 /* get the ni on it */
289 lpni = list_entry(net->lpn_peer_nis.next, struct lnet_peer_ni,
290 lpni_on_peer_net_list);
295 /* there are more nis left */
296 lpni = list_entry(prev->lpni_on_peer_net_list.next,
297 struct lnet_peer_ni, lpni_on_peer_net_list);
303 lnet_peer_is_ni_pref_locked(struct lnet_peer_ni *lpni, struct lnet_ni *ni)
307 for (i = 0; i < lpni->lpni_pref_nnids; i++) {
308 if (lpni->lpni_pref_nids[i] == ni->ni_nid)
315 lnet_try_destroy_peer_hierarchy_locked(struct lnet_peer_ni *lpni)
317 struct lnet_peer_net *peer_net;
318 struct lnet_peer *peer;
320 /* TODO: could the below situation happen? accessing an already
322 if (lpni->lpni_peer_net == NULL ||
323 lpni->lpni_peer_net->lpn_peer == NULL)
326 peer_net = lpni->lpni_peer_net;
327 peer = lpni->lpni_peer_net->lpn_peer;
329 list_del_init(&lpni->lpni_on_peer_net_list);
330 lpni->lpni_peer_net = NULL;
332 /* if peer_net is empty, then remove it from the peer */
333 if (list_empty(&peer_net->lpn_peer_nis)) {
334 list_del_init(&peer_net->lpn_on_peer_list);
335 peer_net->lpn_peer = NULL;
336 LIBCFS_FREE(peer_net, sizeof(*peer_net));
338 /* if the peer is empty then remove it from the
339 * the_lnet.ln_peers */
340 if (list_empty(&peer->lp_peer_nets)) {
341 list_del_init(&peer->lp_on_lnet_peer_list);
342 LIBCFS_FREE(peer, sizeof(*peer));
348 lnet_build_peer_hierarchy(struct lnet_peer_ni *lpni)
350 struct lnet_peer *peer;
351 struct lnet_peer_net *peer_net;
352 __u32 lpni_net = LNET_NIDNET(lpni->lpni_nid);
357 LIBCFS_ALLOC(peer, sizeof(*peer));
361 LIBCFS_ALLOC(peer_net, sizeof(*peer_net));
362 if (peer_net == NULL) {
363 LIBCFS_FREE(peer, sizeof(*peer));
367 INIT_LIST_HEAD(&peer->lp_on_lnet_peer_list);
368 INIT_LIST_HEAD(&peer->lp_peer_nets);
369 INIT_LIST_HEAD(&peer_net->lpn_on_peer_list);
370 INIT_LIST_HEAD(&peer_net->lpn_peer_nis);
372 /* build the hierarchy */
373 peer_net->lpn_net_id = lpni_net;
374 peer_net->lpn_peer = peer;
375 lpni->lpni_peer_net = peer_net;
376 peer->lp_primary_nid = lpni->lpni_nid;
377 list_add_tail(&peer_net->lpn_on_peer_list, &peer->lp_peer_nets);
378 list_add_tail(&lpni->lpni_on_peer_net_list, &peer_net->lpn_peer_nis);
379 list_add_tail(&peer->lp_on_lnet_peer_list, &the_lnet.ln_peers);
384 struct lnet_peer_net *
385 lnet_peer_get_net_locked(struct lnet_peer *peer, __u32 net_id)
387 struct lnet_peer_net *peer_net;
388 list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_on_peer_list) {
389 if (peer_net->lpn_net_id == net_id)
396 lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni)
398 struct lnet_peer_table *ptable;
400 LASSERT(atomic_read(&lpni->lpni_refcount) == 0);
401 LASSERT(lpni->lpni_rtr_refcount == 0);
402 LASSERT(list_empty(&lpni->lpni_txq));
403 LASSERT(list_empty(&lpni->lpni_hashlist));
404 LASSERT(lpni->lpni_txqnob == 0);
405 LASSERT(lpni->lpni_peer_net != NULL);
406 LASSERT(lpni->lpni_peer_net->lpn_peer != NULL);
408 ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
409 LASSERT(ptable->pt_number > 0);
412 lpni->lpni_net = NULL;
414 lnet_try_destroy_peer_hierarchy_locked(lpni);
416 LIBCFS_FREE(lpni, sizeof(*lpni));
418 LASSERT(ptable->pt_zombies > 0);
419 ptable->pt_zombies--;
423 lnet_nid2peerni_locked(struct lnet_peer_ni **lpnip, lnet_nid_t nid, int cpt)
425 struct lnet_peer_table *ptable;
426 struct lnet_peer_ni *lpni = NULL;
427 struct lnet_peer_ni *lpni2;
432 if (the_lnet.ln_shutdown) /* it's shutting down */
436 * calculate cpt2 with the standard hash function
437 * This cpt2 becomes the slot where we'll find or create the peer.
439 cpt2 = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
442 * Any changes to the peer tables happen under exclusive write
443 * lock. Any reads to the peer tables can be done via a standard
446 if (cpt != LNET_LOCK_EX) {
447 lnet_net_unlock(cpt);
448 lnet_net_lock(LNET_LOCK_EX);
451 ptable = the_lnet.ln_peer_tables[cpt2];
452 lpni = lnet_get_peer_ni_locked(ptable, nid);
455 if (cpt != LNET_LOCK_EX) {
456 lnet_net_unlock(LNET_LOCK_EX);
463 * take extra refcount in case another thread has shutdown LNet
464 * and destroyed locks and peer-table before I finish the allocation
467 lnet_net_unlock(LNET_LOCK_EX);
469 LIBCFS_CPT_ALLOC(lpni, lnet_cpt_table(), cpt2, sizeof(*lpni));
477 INIT_LIST_HEAD(&lpni->lpni_txq);
478 INIT_LIST_HEAD(&lpni->lpni_rtrq);
479 INIT_LIST_HEAD(&lpni->lpni_routes);
481 lpni->lpni_alive = !lnet_peers_start_down(); /* 1 bit!! */
482 lpni->lpni_last_alive = cfs_time_current(); /* assumes alive */
483 lpni->lpni_ping_feats = LNET_PING_FEAT_INVAL;
484 lpni->lpni_nid = nid;
485 lpni->lpni_cpt = cpt2;
486 atomic_set(&lpni->lpni_refcount, 2); /* 1 for caller; 1 for hash */
488 rc = lnet_build_peer_hierarchy(lpni);
492 lnet_net_lock(LNET_LOCK_EX);
494 if (the_lnet.ln_shutdown) {
499 lpni2 = lnet_get_peer_ni_locked(ptable, nid);
505 lpni->lpni_net = lnet_get_net_locked(LNET_NIDNET(lpni->lpni_nid));
506 if (lpni->lpni_net) {
507 lpni->lpni_txcredits =
508 lpni->lpni_mintxcredits =
509 lpni->lpni_net->net_tunables.lct_peer_tx_credits;
510 lpni->lpni_rtrcredits =
511 lpni->lpni_minrtrcredits =
512 lnet_peer_buffer_credits(lpni->lpni_net);
514 CDEBUG(D_NET, "peer_ni %s is not directly connected\n",
515 libcfs_nid2str(nid));
518 lnet_set_peer_ni_health_locked(lpni, true);
520 list_add_tail(&lpni->lpni_hashlist,
521 &ptable->pt_hash[lnet_nid2peerhash(nid)]);
522 ptable->pt_version++;
525 if (cpt != LNET_LOCK_EX) {
526 lnet_net_unlock(LNET_LOCK_EX);
533 lnet_try_destroy_peer_hierarchy_locked(lpni);
534 LIBCFS_FREE(lpni, sizeof(*lpni));
537 if (cpt != LNET_LOCK_EX) {
538 lnet_net_unlock(LNET_LOCK_EX);
545 lnet_debug_peer(lnet_nid_t nid)
547 char *aliveness = "NA";
548 struct lnet_peer_ni *lp;
552 cpt = lnet_cpt_of_nid(nid, NULL);
555 rc = lnet_nid2peerni_locked(&lp, nid, cpt);
557 lnet_net_unlock(cpt);
558 CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
562 if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
563 aliveness = lp->lpni_alive ? "up" : "down";
565 CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
566 libcfs_nid2str(lp->lpni_nid), atomic_read(&lp->lpni_refcount),
567 aliveness, lp->lpni_net->net_tunables.lct_peer_tx_credits,
568 lp->lpni_rtrcredits, lp->lpni_minrtrcredits,
569 lp->lpni_txcredits, lp->lpni_mintxcredits, lp->lpni_txqnob);
571 lnet_peer_ni_decref_locked(lp);
573 lnet_net_unlock(cpt);
576 int lnet_get_peer_info(__u32 peer_index, __u64 *nid,
577 char aliveness[LNET_MAX_STR_LEN],
578 __u32 *cpt_iter, __u32 *refcount,
579 __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
580 __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits,
583 struct lnet_peer_table *peer_table;
584 struct lnet_peer_ni *lp;
589 /* get the number of CPTs */
590 lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
592 /* if the cpt number to be examined is >= the number of cpts in
593 * the system then indicate that there are no more cpts to examin
595 if (*cpt_iter >= lncpt)
598 /* get the current table */
599 peer_table = the_lnet.ln_peer_tables[*cpt_iter];
600 /* if the ptable is NULL then there are no more cpts to examine */
601 if (peer_table == NULL)
604 lnet_net_lock(*cpt_iter);
606 for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) {
607 struct list_head *peers = &peer_table->pt_hash[j];
609 list_for_each_entry(lp, peers, lpni_hashlist) {
610 if (peer_index-- > 0)
613 snprintf(aliveness, LNET_MAX_STR_LEN, "NA");
614 if (lnet_isrouter(lp) ||
615 lnet_peer_aliveness_enabled(lp))
616 snprintf(aliveness, LNET_MAX_STR_LEN,
617 lp->lpni_alive ? "up" : "down");
620 *refcount = atomic_read(&lp->lpni_refcount);
621 *ni_peer_tx_credits =
622 lp->lpni_net->net_tunables.lct_peer_tx_credits;
623 *peer_tx_credits = lp->lpni_txcredits;
624 *peer_rtr_credits = lp->lpni_rtrcredits;
625 *peer_min_rtr_credits = lp->lpni_mintxcredits;
626 *peer_tx_qnob = lp->lpni_txqnob;
632 lnet_net_unlock(*cpt_iter);
636 return found ? 0 : -ENOENT;