4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2014, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
35 #define DEBUG_SUBSYSTEM S_LNET
37 #include <lnet/lib-lnet.h>
38 #include <lnet/lib-dlc.h>
41 lnet_peer_remove_from_remote_list(struct lnet_peer_ni *lpni)
43 if (!list_empty(&lpni->lpni_on_remote_peer_ni_list)) {
44 list_del_init(&lpni->lpni_on_remote_peer_ni_list);
45 lnet_peer_ni_decref_locked(lpni);
50 lnet_peer_net_added(struct lnet_net *net)
52 struct lnet_peer_ni *lpni, *tmp;
54 list_for_each_entry_safe(lpni, tmp, &the_lnet.ln_remote_peer_ni_list,
55 lpni_on_remote_peer_ni_list) {
57 if (LNET_NIDNET(lpni->lpni_nid) == net->net_id) {
60 spin_lock(&lpni->lpni_lock);
61 lpni->lpni_txcredits =
62 lpni->lpni_net->net_tunables.lct_peer_tx_credits;
63 lpni->lpni_mintxcredits = lpni->lpni_txcredits;
64 lpni->lpni_rtrcredits =
65 lnet_peer_buffer_credits(lpni->lpni_net);
66 lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
67 spin_unlock(&lpni->lpni_lock);
69 lnet_peer_remove_from_remote_list(lpni);
75 lnet_peer_tables_destroy(void)
77 struct lnet_peer_table *ptable;
78 struct list_head *hash;
82 if (!the_lnet.ln_peer_tables)
85 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
86 hash = ptable->pt_hash;
87 if (!hash) /* not intialized */
90 LASSERT(list_empty(&ptable->pt_zombie_list));
92 ptable->pt_hash = NULL;
93 for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
94 LASSERT(list_empty(&hash[j]));
96 LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
99 cfs_percpt_free(the_lnet.ln_peer_tables);
100 the_lnet.ln_peer_tables = NULL;
104 lnet_peer_tables_create(void)
106 struct lnet_peer_table *ptable;
107 struct list_head *hash;
111 the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
113 if (the_lnet.ln_peer_tables == NULL) {
114 CERROR("Failed to allocate cpu-partition peer tables\n");
118 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
119 LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i,
120 LNET_PEER_HASH_SIZE * sizeof(*hash));
122 CERROR("Failed to create peer hash table\n");
123 lnet_peer_tables_destroy();
127 spin_lock_init(&ptable->pt_zombie_lock);
128 INIT_LIST_HEAD(&ptable->pt_zombie_list);
130 for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
131 INIT_LIST_HEAD(&hash[j]);
132 ptable->pt_hash = hash; /* sign of initialization */
138 static struct lnet_peer_ni *
139 lnet_peer_ni_alloc(lnet_nid_t nid)
141 struct lnet_peer_ni *lpni;
142 struct lnet_net *net;
145 cpt = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
147 LIBCFS_CPT_ALLOC(lpni, lnet_cpt_table(), cpt, sizeof(*lpni));
151 INIT_LIST_HEAD(&lpni->lpni_txq);
152 INIT_LIST_HEAD(&lpni->lpni_rtrq);
153 INIT_LIST_HEAD(&lpni->lpni_routes);
154 INIT_LIST_HEAD(&lpni->lpni_hashlist);
155 INIT_LIST_HEAD(&lpni->lpni_on_peer_net_list);
156 INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list);
158 spin_lock_init(&lpni->lpni_lock);
160 lpni->lpni_alive = !lnet_peers_start_down(); /* 1 bit!! */
161 lpni->lpni_last_alive = cfs_time_current(); /* assumes alive */
162 lpni->lpni_ping_feats = LNET_PING_FEAT_INVAL;
163 lpni->lpni_nid = nid;
164 lpni->lpni_cpt = cpt;
165 lnet_set_peer_ni_health_locked(lpni, true);
167 net = lnet_get_net_locked(LNET_NIDNET(nid));
168 lpni->lpni_net = net;
170 lpni->lpni_txcredits = net->net_tunables.lct_peer_tx_credits;
171 lpni->lpni_mintxcredits = lpni->lpni_txcredits;
172 lpni->lpni_rtrcredits = lnet_peer_buffer_credits(net);
173 lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
176 * This peer_ni is not on a local network, so we
177 * cannot add the credits here. In case the net is
178 * added later, add the peer_ni to the remote peer ni
179 * list so it can be easily found and revisited.
181 /* FIXME: per-net implementation instead? */
182 atomic_inc(&lpni->lpni_refcount);
183 list_add_tail(&lpni->lpni_on_remote_peer_ni_list,
184 &the_lnet.ln_remote_peer_ni_list);
187 /* TODO: update flags */
192 static struct lnet_peer_net *
193 lnet_peer_net_alloc(__u32 net_id)
195 struct lnet_peer_net *lpn;
197 LIBCFS_CPT_ALLOC(lpn, lnet_cpt_table(), CFS_CPT_ANY, sizeof(*lpn));
201 INIT_LIST_HEAD(&lpn->lpn_on_peer_list);
202 INIT_LIST_HEAD(&lpn->lpn_peer_nis);
203 lpn->lpn_net_id = net_id;
208 static struct lnet_peer *
209 lnet_peer_alloc(lnet_nid_t nid)
211 struct lnet_peer *lp;
213 LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), CFS_CPT_ANY, sizeof(*lp));
217 INIT_LIST_HEAD(&lp->lp_on_lnet_peer_list);
218 INIT_LIST_HEAD(&lp->lp_peer_nets);
219 spin_lock_init(&lp->lp_lock);
220 lp->lp_primary_nid = nid;
222 /* TODO: update flags */
229 lnet_peer_detach_peer_ni(struct lnet_peer_ni *lpni)
231 struct lnet_peer_net *lpn;
232 struct lnet_peer *lp;
234 /* TODO: could the below situation happen? accessing an already
236 if (lpni->lpni_peer_net == NULL ||
237 lpni->lpni_peer_net->lpn_peer == NULL)
240 lpn = lpni->lpni_peer_net;
241 lp = lpni->lpni_peer_net->lpn_peer;
243 CDEBUG(D_NET, "peer %s NID %s\n",
244 libcfs_nid2str(lp->lp_primary_nid),
245 libcfs_nid2str(lpni->lpni_nid));
247 list_del_init(&lpni->lpni_on_peer_net_list);
248 lpni->lpni_peer_net = NULL;
250 /* if lpn is empty, then remove it from the peer */
251 if (list_empty(&lpn->lpn_peer_nis)) {
252 list_del_init(&lpn->lpn_on_peer_list);
253 lpn->lpn_peer = NULL;
254 LIBCFS_FREE(lpn, sizeof(*lpn));
256 /* if the peer is empty then remove it from the
257 * the_lnet.ln_peers */
258 if (list_empty(&lp->lp_peer_nets)) {
259 list_del_init(&lp->lp_on_lnet_peer_list);
260 LIBCFS_FREE(lp, sizeof(*lp));
265 /* called with lnet_net_lock LNET_LOCK_EX held */
267 lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni)
269 struct lnet_peer_table *ptable = NULL;
271 /* don't remove a peer_ni if it's also a gateway */
272 if (lpni->lpni_rtr_refcount > 0) {
273 CERROR("Peer NI %s is a gateway. Can not delete it\n",
274 libcfs_nid2str(lpni->lpni_nid));
278 lnet_peer_remove_from_remote_list(lpni);
280 /* remove peer ni from the hash list. */
281 list_del_init(&lpni->lpni_hashlist);
283 /* decrement the ref count on the peer table */
284 ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
285 LASSERT(atomic_read(&ptable->pt_number) > 0);
286 atomic_dec(&ptable->pt_number);
289 * The peer_ni can no longer be found with a lookup. But there
290 * can be current users, so keep track of it on the zombie
291 * list until the reference count has gone to zero.
293 * The last reference may be lost in a place where the
294 * lnet_net_lock locks only a single cpt, and that cpt may not
295 * be lpni->lpni_cpt. So the zombie list of lnet_peer_table
298 spin_lock(&ptable->pt_zombie_lock);
299 list_add(&lpni->lpni_hashlist, &ptable->pt_zombie_list);
300 ptable->pt_zombies++;
301 spin_unlock(&ptable->pt_zombie_lock);
303 /* no need to keep this peer_ni on the hierarchy anymore */
304 lnet_peer_detach_peer_ni(lpni);
306 /* decrement reference on peer_ni */
307 lnet_peer_ni_decref_locked(lpni);
312 void lnet_peer_uninit(void)
314 struct lnet_peer_ni *lpni, *tmp;
316 lnet_net_lock(LNET_LOCK_EX);
318 /* remove all peer_nis from the remote peer and the hash list */
319 list_for_each_entry_safe(lpni, tmp, &the_lnet.ln_remote_peer_ni_list,
320 lpni_on_remote_peer_ni_list)
321 lnet_peer_ni_del_locked(lpni);
323 lnet_peer_tables_destroy();
325 lnet_net_unlock(LNET_LOCK_EX);
329 lnet_peer_del_locked(struct lnet_peer *peer)
331 struct lnet_peer_ni *lpni = NULL, *lpni2;
334 CDEBUG(D_NET, "peer %s\n", libcfs_nid2str(peer->lp_primary_nid));
336 lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
337 while (lpni != NULL) {
338 lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
339 rc = lnet_peer_ni_del_locked(lpni);
349 lnet_peer_del(struct lnet_peer *peer)
351 lnet_net_lock(LNET_LOCK_EX);
352 lnet_peer_del_locked(peer);
353 lnet_net_unlock(LNET_LOCK_EX);
359 * Delete a NID from a peer. Call with ln_api_mutex held.
362 * -EPERM: Non-DLC deletion from DLC-configured peer.
363 * -ENOENT: No lnet_peer_ni corresponding to the nid.
364 * -ECHILD: The lnet_peer_ni isn't connected to the peer.
365 * -EBUSY: The lnet_peer_ni is the primary, and not the only peer_ni.
368 lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags)
370 struct lnet_peer_ni *lpni;
371 lnet_nid_t primary_nid = lp->lp_primary_nid;
374 if (!(flags & LNET_PEER_CONFIGURED)) {
375 if (lp->lp_state & LNET_PEER_CONFIGURED) {
380 lpni = lnet_find_peer_ni_locked(nid);
385 lnet_peer_ni_decref_locked(lpni);
386 if (lp != lpni->lpni_peer_net->lpn_peer) {
392 * This function only allows deletion of the primary NID if it
395 if (nid == lp->lp_primary_nid && lnet_get_num_peer_nis(lp) != 1) {
400 lnet_net_lock(LNET_LOCK_EX);
401 lnet_peer_ni_del_locked(lpni);
402 lnet_net_unlock(LNET_LOCK_EX);
405 CDEBUG(D_NET, "peer %s NID %s flags %#x: %d\n",
406 libcfs_nid2str(primary_nid), libcfs_nid2str(nid), flags, rc);
412 lnet_peer_table_cleanup_locked(struct lnet_net *net,
413 struct lnet_peer_table *ptable)
416 struct lnet_peer_ni *next;
417 struct lnet_peer_ni *lpni;
418 struct lnet_peer *peer;
420 for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
421 list_for_each_entry_safe(lpni, next, &ptable->pt_hash[i],
423 if (net != NULL && net != lpni->lpni_net)
426 peer = lpni->lpni_peer_net->lpn_peer;
427 if (peer->lp_primary_nid != lpni->lpni_nid) {
428 lnet_peer_ni_del_locked(lpni);
432 * Removing the primary NID implies removing
433 * the entire peer. Advance next beyond any
434 * peer_ni that belongs to the same peer.
436 list_for_each_entry_from(next, &ptable->pt_hash[i],
438 if (next->lpni_peer_net->lpn_peer != peer)
441 lnet_peer_del_locked(peer);
447 lnet_peer_ni_finalize_wait(struct lnet_peer_table *ptable)
451 spin_lock(&ptable->pt_zombie_lock);
452 while (ptable->pt_zombies) {
453 spin_unlock(&ptable->pt_zombie_lock);
457 "Waiting for %d zombies on peer table\n",
460 set_current_state(TASK_UNINTERRUPTIBLE);
461 schedule_timeout(cfs_time_seconds(1) >> 1);
462 spin_lock(&ptable->pt_zombie_lock);
464 spin_unlock(&ptable->pt_zombie_lock);
468 lnet_peer_table_del_rtrs_locked(struct lnet_net *net,
469 struct lnet_peer_table *ptable)
471 struct lnet_peer_ni *lp;
472 struct lnet_peer_ni *tmp;
476 for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
477 list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
479 if (net != lp->lpni_net)
482 if (lp->lpni_rtr_refcount == 0)
485 lpni_nid = lp->lpni_nid;
487 lnet_net_unlock(LNET_LOCK_EX);
488 lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lpni_nid);
489 lnet_net_lock(LNET_LOCK_EX);
495 lnet_peer_tables_cleanup(struct lnet_net *net)
498 struct lnet_peer_table *ptable;
500 LASSERT(the_lnet.ln_state != LNET_STATE_SHUTDOWN || net != NULL);
501 /* If just deleting the peers for a NI, get rid of any routes these
502 * peers are gateways for. */
503 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
504 lnet_net_lock(LNET_LOCK_EX);
505 lnet_peer_table_del_rtrs_locked(net, ptable);
506 lnet_net_unlock(LNET_LOCK_EX);
509 /* Start the cleanup process */
510 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
511 lnet_net_lock(LNET_LOCK_EX);
512 lnet_peer_table_cleanup_locked(net, ptable);
513 lnet_net_unlock(LNET_LOCK_EX);
516 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables)
517 lnet_peer_ni_finalize_wait(ptable);
520 static struct lnet_peer_ni *
521 lnet_get_peer_ni_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
523 struct list_head *peers;
524 struct lnet_peer_ni *lp;
526 LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING);
528 peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
529 list_for_each_entry(lp, peers, lpni_hashlist) {
530 if (lp->lpni_nid == nid) {
531 lnet_peer_ni_addref_locked(lp);
539 struct lnet_peer_ni *
540 lnet_find_peer_ni_locked(lnet_nid_t nid)
542 struct lnet_peer_ni *lpni;
543 struct lnet_peer_table *ptable;
546 cpt = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
548 ptable = the_lnet.ln_peer_tables[cpt];
549 lpni = lnet_get_peer_ni_locked(ptable, nid);
554 struct lnet_peer_ni *
555 lnet_get_peer_ni_idx_locked(int idx, struct lnet_peer_net **lpn,
556 struct lnet_peer **lp)
558 struct lnet_peer_ni *lpni;
560 list_for_each_entry((*lp), &the_lnet.ln_peers, lp_on_lnet_peer_list) {
561 list_for_each_entry((*lpn), &((*lp)->lp_peer_nets), lpn_on_peer_list) {
562 list_for_each_entry(lpni, &((*lpn)->lpn_peer_nis),
563 lpni_on_peer_net_list)
572 struct lnet_peer_ni *
573 lnet_get_next_peer_ni_locked(struct lnet_peer *peer,
574 struct lnet_peer_net *peer_net,
575 struct lnet_peer_ni *prev)
577 struct lnet_peer_ni *lpni;
578 struct lnet_peer_net *net = peer_net;
582 net = list_entry(peer->lp_peer_nets.next,
583 struct lnet_peer_net,
585 lpni = list_entry(net->lpn_peer_nis.next, struct lnet_peer_ni,
586 lpni_on_peer_net_list);
591 if (prev->lpni_on_peer_net_list.next ==
592 &prev->lpni_peer_net->lpn_peer_nis) {
594 * if you reached the end of the peer ni list and the peer
595 * net is specified then there are no more peer nis in that
602 * we reached the end of this net ni list. move to the
605 if (prev->lpni_peer_net->lpn_on_peer_list.next ==
607 /* no more nets and no more NIs. */
610 /* get the next net */
611 net = list_entry(prev->lpni_peer_net->lpn_on_peer_list.next,
612 struct lnet_peer_net,
614 /* get the ni on it */
615 lpni = list_entry(net->lpn_peer_nis.next, struct lnet_peer_ni,
616 lpni_on_peer_net_list);
621 /* there are more nis left */
622 lpni = list_entry(prev->lpni_on_peer_net_list.next,
623 struct lnet_peer_ni, lpni_on_peer_net_list);
629 * Test whether a ni is a preferred ni for this peer_ni, e.g, whether
630 * this is a preferred point-to-point path. Call with lnet_net_lock in
634 lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid)
638 if (lpni->lpni_pref_nnids == 0)
640 if (lpni->lpni_pref_nnids == 1)
641 return lpni->lpni_pref.nid == nid;
642 for (i = 0; i < lpni->lpni_pref_nnids; i++) {
643 if (lpni->lpni_pref.nids[i] == nid)
650 * Set a single ni as preferred, provided no preferred ni is already
651 * defined. Only to be used for non-multi-rail peer_ni.
654 lnet_peer_ni_set_non_mr_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
658 spin_lock(&lpni->lpni_lock);
659 if (nid == LNET_NID_ANY) {
661 } else if (lpni->lpni_pref_nnids > 0) {
663 } else if (lpni->lpni_pref_nnids == 0) {
664 lpni->lpni_pref.nid = nid;
665 lpni->lpni_pref_nnids = 1;
666 lpni->lpni_state |= LNET_PEER_NI_NON_MR_PREF;
668 spin_unlock(&lpni->lpni_lock);
670 CDEBUG(D_NET, "peer %s nid %s: %d\n",
671 libcfs_nid2str(lpni->lpni_nid), libcfs_nid2str(nid), rc);
676 * Clear the preferred NID from a non-multi-rail peer_ni, provided
677 * this preference was set by lnet_peer_ni_set_non_mr_pref_nid().
680 lnet_peer_ni_clr_non_mr_pref_nid(struct lnet_peer_ni *lpni)
684 spin_lock(&lpni->lpni_lock);
685 if (lpni->lpni_state & LNET_PEER_NI_NON_MR_PREF) {
686 lpni->lpni_pref_nnids = 0;
687 lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
688 } else if (lpni->lpni_pref_nnids == 0) {
693 spin_unlock(&lpni->lpni_lock);
695 CDEBUG(D_NET, "peer %s: %d\n",
696 libcfs_nid2str(lpni->lpni_nid), rc);
701 * Clear the preferred NIDs from a non-multi-rail peer.
704 lnet_peer_clr_non_mr_pref_nids(struct lnet_peer *lp)
706 struct lnet_peer_ni *lpni = NULL;
708 while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL)
709 lnet_peer_ni_clr_non_mr_pref_nid(lpni);
713 lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
715 lnet_nid_t *nids = NULL;
716 lnet_nid_t *oldnids = NULL;
717 struct lnet_peer *lp = lpni->lpni_peer_net->lpn_peer;
722 if (nid == LNET_NID_ANY) {
727 if (lpni->lpni_pref_nnids == 1 && lpni->lpni_pref.nid == nid) {
732 /* A non-MR node may have only one preferred NI per peer_ni */
733 if (lpni->lpni_pref_nnids > 0) {
734 if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
740 if (lpni->lpni_pref_nnids != 0) {
741 size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1);
742 LIBCFS_CPT_ALLOC(nids, lnet_cpt_table(), lpni->lpni_cpt, size);
747 for (i = 0; i < lpni->lpni_pref_nnids; i++) {
748 if (lpni->lpni_pref.nids[i] == nid) {
749 LIBCFS_FREE(nids, size);
753 nids[i] = lpni->lpni_pref.nids[i];
758 lnet_net_lock(LNET_LOCK_EX);
759 spin_lock(&lpni->lpni_lock);
760 if (lpni->lpni_pref_nnids == 0) {
761 lpni->lpni_pref.nid = nid;
763 oldnids = lpni->lpni_pref.nids;
764 lpni->lpni_pref.nids = nids;
766 lpni->lpni_pref_nnids++;
767 lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
768 spin_unlock(&lpni->lpni_lock);
769 lnet_net_unlock(LNET_LOCK_EX);
772 size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1);
773 LIBCFS_FREE(oldnids, sizeof(*oldnids) * size);
776 if (rc == -EEXIST && (lpni->lpni_state & LNET_PEER_NI_NON_MR_PREF)) {
777 spin_lock(&lpni->lpni_lock);
778 lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
779 spin_unlock(&lpni->lpni_lock);
781 CDEBUG(D_NET, "peer %s nid %s: %d\n",
782 libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(nid), rc);
787 lnet_peer_del_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
789 lnet_nid_t *nids = NULL;
790 lnet_nid_t *oldnids = NULL;
791 struct lnet_peer *lp = lpni->lpni_peer_net->lpn_peer;
796 if (lpni->lpni_pref_nnids == 0) {
801 if (lpni->lpni_pref_nnids == 1) {
802 if (lpni->lpni_pref.nid != nid) {
806 } else if (lpni->lpni_pref_nnids == 2) {
807 if (lpni->lpni_pref.nids[0] != nid &&
808 lpni->lpni_pref.nids[1] != nid) {
813 size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1);
814 LIBCFS_CPT_ALLOC(nids, lnet_cpt_table(), lpni->lpni_cpt, size);
819 for (i = 0, j = 0; i < lpni->lpni_pref_nnids; i++) {
820 if (lpni->lpni_pref.nids[i] != nid)
822 nids[j++] = lpni->lpni_pref.nids[i];
824 /* Check if we actually removed a nid. */
825 if (j == lpni->lpni_pref_nnids) {
826 LIBCFS_FREE(nids, size);
832 lnet_net_lock(LNET_LOCK_EX);
833 spin_lock(&lpni->lpni_lock);
834 if (lpni->lpni_pref_nnids == 1) {
835 lpni->lpni_pref.nid = LNET_NID_ANY;
836 } else if (lpni->lpni_pref_nnids == 2) {
837 oldnids = lpni->lpni_pref.nids;
838 if (oldnids[0] == nid)
839 lpni->lpni_pref.nid = oldnids[1];
841 lpni->lpni_pref.nid = oldnids[2];
843 oldnids = lpni->lpni_pref.nids;
844 lpni->lpni_pref.nids = nids;
846 lpni->lpni_pref_nnids--;
847 lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
848 spin_unlock(&lpni->lpni_lock);
849 lnet_net_unlock(LNET_LOCK_EX);
852 size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1);
853 LIBCFS_FREE(oldnids, sizeof(*oldnids) * size);
856 CDEBUG(D_NET, "peer %s nid %s: %d\n",
857 libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(nid), rc);
862 lnet_peer_primary_nid(lnet_nid_t nid)
864 struct lnet_peer_ni *lpni;
865 lnet_nid_t primary_nid = nid;
868 cpt = lnet_net_lock_current();
869 lpni = lnet_find_peer_ni_locked(nid);
871 primary_nid = lpni->lpni_peer_net->lpn_peer->lp_primary_nid;
872 lnet_peer_ni_decref_locked(lpni);
874 lnet_net_unlock(cpt);
880 LNetPrimaryNID(lnet_nid_t nid)
882 struct lnet_peer_ni *lpni;
883 lnet_nid_t primary_nid = nid;
887 cpt = lnet_net_lock_current();
888 lpni = lnet_nid2peerni_locked(nid, LNET_NID_ANY, cpt);
893 primary_nid = lpni->lpni_peer_net->lpn_peer->lp_primary_nid;
894 lnet_peer_ni_decref_locked(lpni);
896 lnet_net_unlock(cpt);
898 CDEBUG(D_NET, "NID %s primary NID %s rc %d\n", libcfs_nid2str(nid),
899 libcfs_nid2str(primary_nid), rc);
902 EXPORT_SYMBOL(LNetPrimaryNID);
904 struct lnet_peer_net *
905 lnet_peer_get_net_locked(struct lnet_peer *peer, __u32 net_id)
907 struct lnet_peer_net *peer_net;
908 list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_on_peer_list) {
909 if (peer_net->lpn_net_id == net_id)
916 * Always returns 0, but it the last function called from functions
917 * that do return an int, so returning 0 here allows the compiler to
921 lnet_peer_attach_peer_ni(struct lnet_peer *lp,
922 struct lnet_peer_net *lpn,
923 struct lnet_peer_ni *lpni,
926 struct lnet_peer_table *ptable;
928 /* Install the new peer_ni */
929 lnet_net_lock(LNET_LOCK_EX);
930 /* Add peer_ni to global peer table hash, if necessary. */
931 if (list_empty(&lpni->lpni_hashlist)) {
932 int hash = lnet_nid2peerhash(lpni->lpni_nid);
934 ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
935 list_add_tail(&lpni->lpni_hashlist, &ptable->pt_hash[hash]);
936 ptable->pt_version++;
937 atomic_inc(&ptable->pt_number);
938 atomic_inc(&lpni->lpni_refcount);
941 /* Detach the peer_ni from an existing peer, if necessary. */
942 if (lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer != lp)
943 lnet_peer_detach_peer_ni(lpni);
945 /* Add peer_ni to peer_net */
946 lpni->lpni_peer_net = lpn;
947 list_add_tail(&lpni->lpni_on_peer_net_list, &lpn->lpn_peer_nis);
949 /* Add peer_net to peer */
950 if (!lpn->lpn_peer) {
952 list_add_tail(&lpn->lpn_on_peer_list, &lp->lp_peer_nets);
955 /* Add peer to global peer list */
956 if (list_empty(&lp->lp_on_lnet_peer_list))
957 list_add_tail(&lp->lp_on_lnet_peer_list, &the_lnet.ln_peers);
959 /* Update peer state */
960 spin_lock(&lp->lp_lock);
961 if (flags & LNET_PEER_CONFIGURED) {
962 if (!(lp->lp_state & LNET_PEER_CONFIGURED))
963 lp->lp_state |= LNET_PEER_CONFIGURED;
965 if (flags & LNET_PEER_MULTI_RAIL) {
966 if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
967 lp->lp_state |= LNET_PEER_MULTI_RAIL;
968 lnet_peer_clr_non_mr_pref_nids(lp);
971 spin_unlock(&lp->lp_lock);
973 lnet_net_unlock(LNET_LOCK_EX);
975 CDEBUG(D_NET, "peer %s NID %s flags %#x\n",
976 libcfs_nid2str(lp->lp_primary_nid),
977 libcfs_nid2str(lpni->lpni_nid), flags);
983 * Create a new peer, with nid as its primary nid.
985 * Call with the lnet_api_mutex held.
988 lnet_peer_add(lnet_nid_t nid, unsigned flags)
990 struct lnet_peer *lp;
991 struct lnet_peer_net *lpn;
992 struct lnet_peer_ni *lpni;
995 LASSERT(nid != LNET_NID_ANY);
998 * No need for the lnet_net_lock here, because the
999 * lnet_api_mutex is held.
1001 lpni = lnet_find_peer_ni_locked(nid);
1003 /* A peer with this NID already exists. */
1004 lp = lpni->lpni_peer_net->lpn_peer;
1005 lnet_peer_ni_decref_locked(lpni);
1007 * This is an error if the peer was configured and the
1008 * primary NID differs or an attempt is made to change
1009 * the Multi-Rail flag. Otherwise the assumption is
1010 * that an existing peer is being modified.
1012 if (lp->lp_state & LNET_PEER_CONFIGURED) {
1013 if (lp->lp_primary_nid != nid)
1015 else if ((lp->lp_state ^ flags) & LNET_PEER_MULTI_RAIL)
1019 /* Delete and recreate as a configured peer. */
1023 /* Create peer, peer_net, and peer_ni. */
1025 lp = lnet_peer_alloc(nid);
1028 lpn = lnet_peer_net_alloc(LNET_NIDNET(nid));
1031 lpni = lnet_peer_ni_alloc(nid);
1035 return lnet_peer_attach_peer_ni(lp, lpn, lpni, flags);
1038 LIBCFS_FREE(lpn, sizeof(*lpn));
1040 LIBCFS_FREE(lp, sizeof(*lp));
1042 CDEBUG(D_NET, "peer %s NID flags %#x: %d\n",
1043 libcfs_nid2str(nid), flags, rc);
1048 * Add a NID to a peer. Call with ln_api_mutex held.
1051 * -EPERM: Non-DLC addition to a DLC-configured peer.
1052 * -EEXIST: The NID was configured by DLC for a different peer.
1053 * -ENOMEM: Out of memory.
1054 * -ENOTUNIQ: Adding a second peer NID on a single network on a
1055 * non-multi-rail peer.
1058 lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags)
1060 struct lnet_peer_net *lpn;
1061 struct lnet_peer_ni *lpni;
1065 LASSERT(nid != LNET_NID_ANY);
1067 /* A configured peer can only be updated through configuration. */
1068 if (!(flags & LNET_PEER_CONFIGURED)) {
1069 if (lp->lp_state & LNET_PEER_CONFIGURED) {
1076 * The MULTI_RAIL flag can be set but not cleared, because
1077 * that would leave the peer struct in an invalid state.
1079 if (flags & LNET_PEER_MULTI_RAIL) {
1080 spin_lock(&lp->lp_lock);
1081 if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
1082 lp->lp_state |= LNET_PEER_MULTI_RAIL;
1083 lnet_peer_clr_non_mr_pref_nids(lp);
1085 spin_unlock(&lp->lp_lock);
1086 } else if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
1091 lpni = lnet_find_peer_ni_locked(nid);
1094 * A peer_ni already exists. This is only a problem if
1095 * it is not connected to this peer and was configured
1098 lnet_peer_ni_decref_locked(lpni);
1099 if (lpni->lpni_peer_net->lpn_peer == lp)
1101 if (lnet_peer_ni_is_configured(lpni)) {
1105 /* If this is the primary NID, destroy the peer. */
1106 if (lnet_peer_ni_is_primary(lpni)) {
1107 lnet_peer_del(lpni->lpni_peer_net->lpn_peer);
1108 lpni = lnet_peer_ni_alloc(nid);
1115 lpni = lnet_peer_ni_alloc(nid);
1123 * Get the peer_net. Check that we're not adding a second
1124 * peer_ni on a peer_net of a non-multi-rail peer.
1126 lpn = lnet_peer_get_net_locked(lp, LNET_NIDNET(nid));
1128 lpn = lnet_peer_net_alloc(LNET_NIDNET(nid));
1133 } else if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
1138 return lnet_peer_attach_peer_ni(lp, lpn, lpni, flags);
1141 /* If the peer_ni was allocated above its peer_net pointer is NULL */
1142 if (!lpni->lpni_peer_net)
1143 LIBCFS_FREE(lpni, sizeof(*lpni));
1145 CDEBUG(D_NET, "peer %s NID %s flags %#x: %d\n",
1146 libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(nid),
1152 * lpni creation initiated due to traffic either sending or receiving.
1155 lnet_peer_ni_traffic_add(lnet_nid_t nid, lnet_nid_t pref)
1157 struct lnet_peer *lp;
1158 struct lnet_peer_net *lpn;
1159 struct lnet_peer_ni *lpni;
1163 if (nid == LNET_NID_ANY) {
1168 /* lnet_net_lock is not needed here because ln_api_lock is held */
1169 lpni = lnet_find_peer_ni_locked(nid);
1172 * We must have raced with another thread. Since we
1173 * know next to nothing about a peer_ni created by
1174 * traffic, we just assume everything is ok and
1177 lnet_peer_ni_decref_locked(lpni);
1181 /* Create peer, peer_net, and peer_ni. */
1183 lp = lnet_peer_alloc(nid);
1186 lpn = lnet_peer_net_alloc(LNET_NIDNET(nid));
1189 lpni = lnet_peer_ni_alloc(nid);
1192 if (pref != LNET_NID_ANY)
1193 lnet_peer_ni_set_non_mr_pref_nid(lpni, pref);
1195 return lnet_peer_attach_peer_ni(lp, lpn, lpni, flags);
1198 LIBCFS_FREE(lpn, sizeof(*lpn));
1200 LIBCFS_FREE(lp, sizeof(*lp));
1202 CDEBUG(D_NET, "peer %s: %d\n", libcfs_nid2str(nid), rc);
1207 * Implementation of IOC_LIBCFS_ADD_PEER_NI.
1209 * This API handles the following combinations:
1210 * Create a peer with its primary NI if only the prim_nid is provided
1211 * Add a NID to a peer identified by the prim_nid. The peer identified
1212 * by the prim_nid must already exist.
1213 * The peer being created may be non-MR.
1215 * The caller must hold ln_api_mutex. This prevents the peer from
1216 * being created/modified/deleted by a different thread.
1219 lnet_add_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, bool mr)
1221 struct lnet_peer *lp = NULL;
1222 struct lnet_peer_ni *lpni;
1225 /* The prim_nid must always be specified */
1226 if (prim_nid == LNET_NID_ANY)
1229 flags = LNET_PEER_CONFIGURED;
1231 flags |= LNET_PEER_MULTI_RAIL;
1234 * If nid isn't specified, we must create a new peer with
1235 * prim_nid as its primary nid.
1237 if (nid == LNET_NID_ANY)
1238 return lnet_peer_add(prim_nid, flags);
1240 /* Look up the prim_nid, which must exist. */
1241 lpni = lnet_find_peer_ni_locked(prim_nid);
1244 lnet_peer_ni_decref_locked(lpni);
1245 lp = lpni->lpni_peer_net->lpn_peer;
1247 /* Peer must have been configured. */
1248 if (!(lp->lp_state & LNET_PEER_CONFIGURED)) {
1249 CDEBUG(D_NET, "peer %s was not configured\n",
1250 libcfs_nid2str(prim_nid));
1254 /* Primary NID must match */
1255 if (lp->lp_primary_nid != prim_nid) {
1256 CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
1257 libcfs_nid2str(prim_nid),
1258 libcfs_nid2str(lp->lp_primary_nid));
1262 /* Multi-Rail flag must match. */
1263 if ((lp->lp_state ^ flags) & LNET_PEER_MULTI_RAIL) {
1264 CDEBUG(D_NET, "multi-rail state mismatch for peer %s\n",
1265 libcfs_nid2str(prim_nid));
1269 return lnet_peer_add_nid(lp, nid, flags);
1273 * Implementation of IOC_LIBCFS_DEL_PEER_NI.
1275 * This API handles the following combinations:
1276 * Delete a NI from a peer if both prim_nid and nid are provided.
1277 * Delete a peer if only prim_nid is provided.
1278 * Delete a peer if its primary nid is provided.
1280 * The caller must hold ln_api_mutex. This prevents the peer from
1281 * being modified/deleted by a different thread.
1284 lnet_del_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid)
1286 struct lnet_peer *lp;
1287 struct lnet_peer_ni *lpni;
1290 if (prim_nid == LNET_NID_ANY)
1293 lpni = lnet_find_peer_ni_locked(prim_nid);
1296 lnet_peer_ni_decref_locked(lpni);
1297 lp = lpni->lpni_peer_net->lpn_peer;
1299 if (prim_nid != lp->lp_primary_nid) {
1300 CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
1301 libcfs_nid2str(prim_nid),
1302 libcfs_nid2str(lp->lp_primary_nid));
1306 if (nid == LNET_NID_ANY || nid == lp->lp_primary_nid)
1307 return lnet_peer_del(lp);
1309 flags = LNET_PEER_CONFIGURED;
1310 if (lp->lp_state & LNET_PEER_MULTI_RAIL)
1311 flags |= LNET_PEER_MULTI_RAIL;
1313 return lnet_peer_del_nid(lp, nid, flags);
1317 lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni)
1319 struct lnet_peer_table *ptable;
1321 LASSERT(atomic_read(&lpni->lpni_refcount) == 0);
1322 LASSERT(lpni->lpni_rtr_refcount == 0);
1323 LASSERT(list_empty(&lpni->lpni_txq));
1324 LASSERT(lpni->lpni_txqnob == 0);
1326 lpni->lpni_net = NULL;
1328 /* remove the peer ni from the zombie list */
1329 ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
1330 spin_lock(&ptable->pt_zombie_lock);
1331 list_del_init(&lpni->lpni_hashlist);
1332 ptable->pt_zombies--;
1333 spin_unlock(&ptable->pt_zombie_lock);
1335 if (lpni->lpni_pref_nnids > 1) {
1336 LIBCFS_FREE(lpni->lpni_pref.nids,
1337 sizeof(*lpni->lpni_pref.nids) * lpni->lpni_pref_nnids);
1339 LIBCFS_FREE(lpni, sizeof(*lpni));
1342 struct lnet_peer_ni *
1343 lnet_nid2peerni_ex(lnet_nid_t nid, int cpt)
1345 struct lnet_peer_ni *lpni = NULL;
1348 if (the_lnet.ln_state != LNET_STATE_RUNNING)
1349 return ERR_PTR(-ESHUTDOWN);
1352 * find if a peer_ni already exists.
1353 * If so then just return that.
1355 lpni = lnet_find_peer_ni_locked(nid);
1359 lnet_net_unlock(cpt);
1361 rc = lnet_peer_ni_traffic_add(nid, LNET_NID_ANY);
1364 goto out_net_relock;
1367 lpni = lnet_find_peer_ni_locked(nid);
1376 struct lnet_peer_ni *
1377 lnet_nid2peerni_locked(lnet_nid_t nid, lnet_nid_t pref, int cpt)
1379 struct lnet_peer_ni *lpni = NULL;
1382 if (the_lnet.ln_state != LNET_STATE_RUNNING)
1383 return ERR_PTR(-ESHUTDOWN);
1386 * find if a peer_ni already exists.
1387 * If so then just return that.
1389 lpni = lnet_find_peer_ni_locked(nid);
1395 * use the lnet_api_mutex to serialize the creation of the peer_ni
1396 * and the creation/deletion of the local ni/net. When a local ni is
1397 * created, if there exists a set of peer_nis on that network,
1398 * they need to be traversed and updated. When a local NI is
1399 * deleted, which could result in a network being deleted, then
1400 * all peer nis on that network need to be removed as well.
1402 * Creation through traffic should also be serialized with
1403 * creation through DLC.
1405 lnet_net_unlock(cpt);
1406 mutex_lock(&the_lnet.ln_api_mutex);
1408 * Shutdown is only set under the ln_api_lock, so a single
1409 * check here is sufficent.
1411 if (the_lnet.ln_state != LNET_STATE_RUNNING) {
1412 lpni = ERR_PTR(-ESHUTDOWN);
1413 goto out_mutex_unlock;
1416 rc = lnet_peer_ni_traffic_add(nid, pref);
1419 goto out_mutex_unlock;
1422 lpni = lnet_find_peer_ni_locked(nid);
1426 mutex_unlock(&the_lnet.ln_api_mutex);
1433 lnet_debug_peer(lnet_nid_t nid)
1435 char *aliveness = "NA";
1436 struct lnet_peer_ni *lp;
1439 cpt = lnet_cpt_of_nid(nid, NULL);
1442 lp = lnet_nid2peerni_locked(nid, LNET_NID_ANY, cpt);
1444 lnet_net_unlock(cpt);
1445 CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
1449 if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
1450 aliveness = lp->lpni_alive ? "up" : "down";
1452 CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
1453 libcfs_nid2str(lp->lpni_nid), atomic_read(&lp->lpni_refcount),
1454 aliveness, lp->lpni_net->net_tunables.lct_peer_tx_credits,
1455 lp->lpni_rtrcredits, lp->lpni_minrtrcredits,
1456 lp->lpni_txcredits, lp->lpni_mintxcredits, lp->lpni_txqnob);
1458 lnet_peer_ni_decref_locked(lp);
1460 lnet_net_unlock(cpt);
1463 int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
1464 char aliveness[LNET_MAX_STR_LEN],
1465 __u32 *cpt_iter, __u32 *refcount,
1466 __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
1467 __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits,
1468 __u32 *peer_tx_qnob)
1470 struct lnet_peer_table *peer_table;
1471 struct lnet_peer_ni *lp;
1476 /* get the number of CPTs */
1477 lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
1479 /* if the cpt number to be examined is >= the number of cpts in
1480 * the system then indicate that there are no more cpts to examin
1482 if (*cpt_iter >= lncpt)
1485 /* get the current table */
1486 peer_table = the_lnet.ln_peer_tables[*cpt_iter];
1487 /* if the ptable is NULL then there are no more cpts to examine */
1488 if (peer_table == NULL)
1491 lnet_net_lock(*cpt_iter);
1493 for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) {
1494 struct list_head *peers = &peer_table->pt_hash[j];
1496 list_for_each_entry(lp, peers, lpni_hashlist) {
1497 if (peer_index-- > 0)
1500 snprintf(aliveness, LNET_MAX_STR_LEN, "NA");
1501 if (lnet_isrouter(lp) ||
1502 lnet_peer_aliveness_enabled(lp))
1503 snprintf(aliveness, LNET_MAX_STR_LEN,
1504 lp->lpni_alive ? "up" : "down");
1506 *nid = lp->lpni_nid;
1507 *refcount = atomic_read(&lp->lpni_refcount);
1508 *ni_peer_tx_credits =
1509 lp->lpni_net->net_tunables.lct_peer_tx_credits;
1510 *peer_tx_credits = lp->lpni_txcredits;
1511 *peer_rtr_credits = lp->lpni_rtrcredits;
1512 *peer_min_rtr_credits = lp->lpni_mintxcredits;
1513 *peer_tx_qnob = lp->lpni_txqnob;
1519 lnet_net_unlock(*cpt_iter);
1523 return found ? 0 : -ENOENT;
1526 int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
1527 bool *mr, struct lnet_peer_ni_credit_info *peer_ni_info,
1528 struct lnet_ioctl_element_stats *peer_ni_stats)
1530 struct lnet_peer_ni *lpni = NULL;
1531 struct lnet_peer_net *lpn = NULL;
1532 struct lnet_peer *lp = NULL;
1534 lpni = lnet_get_peer_ni_idx_locked(idx, &lpn, &lp);
1539 *primary_nid = lp->lp_primary_nid;
1540 *mr = lnet_peer_is_multi_rail(lp);
1541 *nid = lpni->lpni_nid;
1542 snprintf(peer_ni_info->cr_aliveness, LNET_MAX_STR_LEN, "NA");
1543 if (lnet_isrouter(lpni) ||
1544 lnet_peer_aliveness_enabled(lpni))
1545 snprintf(peer_ni_info->cr_aliveness, LNET_MAX_STR_LEN,
1546 lpni->lpni_alive ? "up" : "down");
1548 peer_ni_info->cr_refcount = atomic_read(&lpni->lpni_refcount);
1549 peer_ni_info->cr_ni_peer_tx_credits = (lpni->lpni_net != NULL) ?
1550 lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0;
1551 peer_ni_info->cr_peer_tx_credits = lpni->lpni_txcredits;
1552 peer_ni_info->cr_peer_rtr_credits = lpni->lpni_rtrcredits;
1553 peer_ni_info->cr_peer_min_rtr_credits = lpni->lpni_minrtrcredits;
1554 peer_ni_info->cr_peer_min_tx_credits = lpni->lpni_mintxcredits;
1555 peer_ni_info->cr_peer_tx_qnob = lpni->lpni_txqnob;
1557 peer_ni_stats->send_count = atomic_read(&lpni->lpni_stats.send_count);
1558 peer_ni_stats->recv_count = atomic_read(&lpni->lpni_stats.recv_count);
1559 peer_ni_stats->drop_count = atomic_read(&lpni->lpni_stats.drop_count);