4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2014, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
35 #define DEBUG_SUBSYSTEM S_LNET
37 #include <lnet/lib-lnet.h>
38 #include <uapi/linux/lnet/lnet-dlc.h>
41 lnet_peer_remove_from_remote_list(struct lnet_peer_ni *lpni)
43 if (!list_empty(&lpni->lpni_on_remote_peer_ni_list)) {
44 list_del_init(&lpni->lpni_on_remote_peer_ni_list);
45 lnet_peer_ni_decref_locked(lpni);
50 lnet_peer_net_added(struct lnet_net *net)
52 struct lnet_peer_ni *lpni, *tmp;
54 list_for_each_entry_safe(lpni, tmp, &the_lnet.ln_remote_peer_ni_list,
55 lpni_on_remote_peer_ni_list) {
57 if (LNET_NIDNET(lpni->lpni_nid) == net->net_id) {
60 spin_lock(&lpni->lpni_lock);
61 lpni->lpni_txcredits =
62 lpni->lpni_net->net_tunables.lct_peer_tx_credits;
63 lpni->lpni_mintxcredits = lpni->lpni_txcredits;
64 lpni->lpni_rtrcredits =
65 lnet_peer_buffer_credits(lpni->lpni_net);
66 lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
67 spin_unlock(&lpni->lpni_lock);
69 lnet_peer_remove_from_remote_list(lpni);
75 lnet_peer_tables_destroy(void)
77 struct lnet_peer_table *ptable;
78 struct list_head *hash;
82 if (!the_lnet.ln_peer_tables)
85 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
86 hash = ptable->pt_hash;
87 if (!hash) /* not intialized */
90 LASSERT(list_empty(&ptable->pt_zombie_list));
92 ptable->pt_hash = NULL;
93 for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
94 LASSERT(list_empty(&hash[j]));
96 LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
99 cfs_percpt_free(the_lnet.ln_peer_tables);
100 the_lnet.ln_peer_tables = NULL;
104 lnet_peer_tables_create(void)
106 struct lnet_peer_table *ptable;
107 struct list_head *hash;
111 the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
113 if (the_lnet.ln_peer_tables == NULL) {
114 CERROR("Failed to allocate cpu-partition peer tables\n");
118 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
119 LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i,
120 LNET_PEER_HASH_SIZE * sizeof(*hash));
122 CERROR("Failed to create peer hash table\n");
123 lnet_peer_tables_destroy();
127 spin_lock_init(&ptable->pt_zombie_lock);
128 INIT_LIST_HEAD(&ptable->pt_zombie_list);
130 INIT_LIST_HEAD(&ptable->pt_peer_list);
132 for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
133 INIT_LIST_HEAD(&hash[j]);
134 ptable->pt_hash = hash; /* sign of initialization */
140 static struct lnet_peer_ni *
141 lnet_peer_ni_alloc(lnet_nid_t nid)
143 struct lnet_peer_ni *lpni;
144 struct lnet_net *net;
147 cpt = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
149 LIBCFS_CPT_ALLOC(lpni, lnet_cpt_table(), cpt, sizeof(*lpni));
153 INIT_LIST_HEAD(&lpni->lpni_txq);
154 INIT_LIST_HEAD(&lpni->lpni_rtrq);
155 INIT_LIST_HEAD(&lpni->lpni_routes);
156 INIT_LIST_HEAD(&lpni->lpni_hashlist);
157 INIT_LIST_HEAD(&lpni->lpni_peer_nis);
158 INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list);
160 spin_lock_init(&lpni->lpni_lock);
162 lpni->lpni_alive = !lnet_peers_start_down(); /* 1 bit!! */
163 lpni->lpni_last_alive = cfs_time_current(); /* assumes alive */
164 lpni->lpni_ping_feats = LNET_PING_FEAT_INVAL;
165 lpni->lpni_nid = nid;
166 lpni->lpni_cpt = cpt;
167 lnet_set_peer_ni_health_locked(lpni, true);
169 net = lnet_get_net_locked(LNET_NIDNET(nid));
170 lpni->lpni_net = net;
172 lpni->lpni_txcredits = net->net_tunables.lct_peer_tx_credits;
173 lpni->lpni_mintxcredits = lpni->lpni_txcredits;
174 lpni->lpni_rtrcredits = lnet_peer_buffer_credits(net);
175 lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
178 * This peer_ni is not on a local network, so we
179 * cannot add the credits here. In case the net is
180 * added later, add the peer_ni to the remote peer ni
181 * list so it can be easily found and revisited.
183 /* FIXME: per-net implementation instead? */
184 atomic_inc(&lpni->lpni_refcount);
185 list_add_tail(&lpni->lpni_on_remote_peer_ni_list,
186 &the_lnet.ln_remote_peer_ni_list);
189 CDEBUG(D_NET, "%p nid %s\n", lpni, libcfs_nid2str(lpni->lpni_nid));
194 static struct lnet_peer_net *
195 lnet_peer_net_alloc(__u32 net_id)
197 struct lnet_peer_net *lpn;
199 LIBCFS_CPT_ALLOC(lpn, lnet_cpt_table(), CFS_CPT_ANY, sizeof(*lpn));
203 INIT_LIST_HEAD(&lpn->lpn_peer_nets);
204 INIT_LIST_HEAD(&lpn->lpn_peer_nis);
205 lpn->lpn_net_id = net_id;
207 CDEBUG(D_NET, "%p net %s\n", lpn, libcfs_net2str(lpn->lpn_net_id));
213 lnet_destroy_peer_net_locked(struct lnet_peer_net *lpn)
215 struct lnet_peer *lp;
217 CDEBUG(D_NET, "%p net %s\n", lpn, libcfs_net2str(lpn->lpn_net_id));
219 LASSERT(atomic_read(&lpn->lpn_refcount) == 0);
220 LASSERT(list_empty(&lpn->lpn_peer_nis));
221 LASSERT(list_empty(&lpn->lpn_peer_nets));
223 lpn->lpn_peer = NULL;
224 LIBCFS_FREE(lpn, sizeof(*lpn));
226 lnet_peer_decref_locked(lp);
229 static struct lnet_peer *
230 lnet_peer_alloc(lnet_nid_t nid)
232 struct lnet_peer *lp;
234 LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), CFS_CPT_ANY, sizeof(*lp));
238 INIT_LIST_HEAD(&lp->lp_peer_list);
239 INIT_LIST_HEAD(&lp->lp_peer_nets);
240 INIT_LIST_HEAD(&lp->lp_dc_list);
241 init_waitqueue_head(&lp->lp_dc_waitq);
242 spin_lock_init(&lp->lp_lock);
243 lp->lp_primary_nid = nid;
244 lp->lp_cpt = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
246 CDEBUG(D_NET, "%p nid %s\n", lp, libcfs_nid2str(lp->lp_primary_nid));
252 lnet_destroy_peer_locked(struct lnet_peer *lp)
254 CDEBUG(D_NET, "%p nid %s\n", lp, libcfs_nid2str(lp->lp_primary_nid));
256 LASSERT(atomic_read(&lp->lp_refcount) == 0);
257 LASSERT(list_empty(&lp->lp_peer_nets));
258 LASSERT(list_empty(&lp->lp_peer_list));
260 LIBCFS_FREE(lp, sizeof(*lp));
264 * Detach a peer_ni from its peer_net. If this was the last peer_ni on
265 * that peer_net, detach the peer_net from the peer.
267 * Call with lnet_net_lock/EX held
270 lnet_peer_detach_peer_ni_locked(struct lnet_peer_ni *lpni)
272 struct lnet_peer_table *ptable;
273 struct lnet_peer_net *lpn;
274 struct lnet_peer *lp;
277 * Belts and suspenders: gracefully handle teardown of a
278 * partially connected peer_ni.
280 lpn = lpni->lpni_peer_net;
282 list_del_init(&lpni->lpni_peer_nis);
284 * If there are no lpni's left, we detach lpn from
285 * lp_peer_nets, so it cannot be found anymore.
287 if (list_empty(&lpn->lpn_peer_nis))
288 list_del_init(&lpn->lpn_peer_nets);
290 /* Update peer NID count. */
292 ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
294 ptable->pt_peer_nnids--;
297 * If there are no more peer nets, make the peer unfindable
298 * via the peer_tables.
300 if (list_empty(&lp->lp_peer_nets)) {
301 list_del_init(&lp->lp_peer_list);
304 CDEBUG(D_NET, "peer %s NID %s\n",
305 libcfs_nid2str(lp->lp_primary_nid),
306 libcfs_nid2str(lpni->lpni_nid));
309 /* called with lnet_net_lock LNET_LOCK_EX held */
311 lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni)
313 struct lnet_peer_table *ptable = NULL;
315 /* don't remove a peer_ni if it's also a gateway */
316 if (lpni->lpni_rtr_refcount > 0) {
317 CERROR("Peer NI %s is a gateway. Can not delete it\n",
318 libcfs_nid2str(lpni->lpni_nid));
322 lnet_peer_remove_from_remote_list(lpni);
324 /* remove peer ni from the hash list. */
325 list_del_init(&lpni->lpni_hashlist);
327 /* decrement the ref count on the peer table */
328 ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
329 LASSERT(ptable->pt_number > 0);
333 * The peer_ni can no longer be found with a lookup. But there
334 * can be current users, so keep track of it on the zombie
335 * list until the reference count has gone to zero.
337 * The last reference may be lost in a place where the
338 * lnet_net_lock locks only a single cpt, and that cpt may not
339 * be lpni->lpni_cpt. So the zombie list of lnet_peer_table
342 spin_lock(&ptable->pt_zombie_lock);
343 list_add(&lpni->lpni_hashlist, &ptable->pt_zombie_list);
344 ptable->pt_zombies++;
345 spin_unlock(&ptable->pt_zombie_lock);
347 /* no need to keep this peer_ni on the hierarchy anymore */
348 lnet_peer_detach_peer_ni_locked(lpni);
350 /* remove hashlist reference on peer_ni */
351 lnet_peer_ni_decref_locked(lpni);
356 void lnet_peer_uninit(void)
358 struct lnet_peer_ni *lpni, *tmp;
360 lnet_net_lock(LNET_LOCK_EX);
362 /* remove all peer_nis from the remote peer and the hash list */
363 list_for_each_entry_safe(lpni, tmp, &the_lnet.ln_remote_peer_ni_list,
364 lpni_on_remote_peer_ni_list)
365 lnet_peer_ni_del_locked(lpni);
367 lnet_peer_tables_destroy();
369 lnet_net_unlock(LNET_LOCK_EX);
373 lnet_peer_del_locked(struct lnet_peer *peer)
375 struct lnet_peer_ni *lpni = NULL, *lpni2;
378 CDEBUG(D_NET, "peer %s\n", libcfs_nid2str(peer->lp_primary_nid));
380 lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
381 while (lpni != NULL) {
382 lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
383 rc = lnet_peer_ni_del_locked(lpni);
393 lnet_peer_del(struct lnet_peer *peer)
395 lnet_net_lock(LNET_LOCK_EX);
396 lnet_peer_del_locked(peer);
397 lnet_net_unlock(LNET_LOCK_EX);
403 * Delete a NID from a peer. Call with ln_api_mutex held.
406 * -EPERM: Non-DLC deletion from DLC-configured peer.
407 * -ENOENT: No lnet_peer_ni corresponding to the nid.
408 * -ECHILD: The lnet_peer_ni isn't connected to the peer.
409 * -EBUSY: The lnet_peer_ni is the primary, and not the only peer_ni.
412 lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags)
414 struct lnet_peer_ni *lpni;
415 lnet_nid_t primary_nid = lp->lp_primary_nid;
418 if (!(flags & LNET_PEER_CONFIGURED)) {
419 if (lp->lp_state & LNET_PEER_CONFIGURED) {
424 lpni = lnet_find_peer_ni_locked(nid);
429 lnet_peer_ni_decref_locked(lpni);
430 if (lp != lpni->lpni_peer_net->lpn_peer) {
436 * This function only allows deletion of the primary NID if it
439 if (nid == lp->lp_primary_nid && lp->lp_nnis != 1) {
444 lnet_net_lock(LNET_LOCK_EX);
445 lnet_peer_ni_del_locked(lpni);
446 lnet_net_unlock(LNET_LOCK_EX);
449 CDEBUG(D_NET, "peer %s NID %s flags %#x: %d\n",
450 libcfs_nid2str(primary_nid), libcfs_nid2str(nid), flags, rc);
456 lnet_peer_table_cleanup_locked(struct lnet_net *net,
457 struct lnet_peer_table *ptable)
460 struct lnet_peer_ni *next;
461 struct lnet_peer_ni *lpni;
462 struct lnet_peer *peer;
464 for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
465 list_for_each_entry_safe(lpni, next, &ptable->pt_hash[i],
467 if (net != NULL && net != lpni->lpni_net)
470 peer = lpni->lpni_peer_net->lpn_peer;
471 if (peer->lp_primary_nid != lpni->lpni_nid) {
472 lnet_peer_ni_del_locked(lpni);
476 * Removing the primary NID implies removing
477 * the entire peer. Advance next beyond any
478 * peer_ni that belongs to the same peer.
480 list_for_each_entry_from(next, &ptable->pt_hash[i],
482 if (next->lpni_peer_net->lpn_peer != peer)
485 lnet_peer_del_locked(peer);
491 lnet_peer_ni_finalize_wait(struct lnet_peer_table *ptable)
495 spin_lock(&ptable->pt_zombie_lock);
496 while (ptable->pt_zombies) {
497 spin_unlock(&ptable->pt_zombie_lock);
499 if (is_power_of_2(i)) {
501 "Waiting for %d zombies on peer table\n",
504 set_current_state(TASK_UNINTERRUPTIBLE);
505 schedule_timeout(cfs_time_seconds(1) >> 1);
506 spin_lock(&ptable->pt_zombie_lock);
508 spin_unlock(&ptable->pt_zombie_lock);
512 lnet_peer_table_del_rtrs_locked(struct lnet_net *net,
513 struct lnet_peer_table *ptable)
515 struct lnet_peer_ni *lp;
516 struct lnet_peer_ni *tmp;
520 for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
521 list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
523 if (net != lp->lpni_net)
526 if (lp->lpni_rtr_refcount == 0)
529 lpni_nid = lp->lpni_nid;
531 lnet_net_unlock(LNET_LOCK_EX);
532 lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lpni_nid);
533 lnet_net_lock(LNET_LOCK_EX);
539 lnet_peer_tables_cleanup(struct lnet_net *net)
542 struct lnet_peer_table *ptable;
544 LASSERT(the_lnet.ln_state != LNET_STATE_SHUTDOWN || net != NULL);
545 /* If just deleting the peers for a NI, get rid of any routes these
546 * peers are gateways for. */
547 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
548 lnet_net_lock(LNET_LOCK_EX);
549 lnet_peer_table_del_rtrs_locked(net, ptable);
550 lnet_net_unlock(LNET_LOCK_EX);
553 /* Start the cleanup process */
554 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
555 lnet_net_lock(LNET_LOCK_EX);
556 lnet_peer_table_cleanup_locked(net, ptable);
557 lnet_net_unlock(LNET_LOCK_EX);
560 cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables)
561 lnet_peer_ni_finalize_wait(ptable);
564 static struct lnet_peer_ni *
565 lnet_get_peer_ni_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
567 struct list_head *peers;
568 struct lnet_peer_ni *lp;
570 LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING);
572 peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
573 list_for_each_entry(lp, peers, lpni_hashlist) {
574 if (lp->lpni_nid == nid) {
575 lnet_peer_ni_addref_locked(lp);
583 struct lnet_peer_ni *
584 lnet_find_peer_ni_locked(lnet_nid_t nid)
586 struct lnet_peer_ni *lpni;
587 struct lnet_peer_table *ptable;
590 cpt = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
592 ptable = the_lnet.ln_peer_tables[cpt];
593 lpni = lnet_get_peer_ni_locked(ptable, nid);
598 struct lnet_peer_ni *
599 lnet_get_peer_ni_idx_locked(int idx, struct lnet_peer_net **lpn,
600 struct lnet_peer **lp)
602 struct lnet_peer_table *ptable;
603 struct lnet_peer_ni *lpni;
607 lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
609 for (cpt = 0; cpt < lncpt; cpt++) {
610 ptable = the_lnet.ln_peer_tables[cpt];
611 if (ptable->pt_peer_nnids > idx)
613 idx -= ptable->pt_peer_nnids;
618 list_for_each_entry((*lp), &ptable->pt_peer_list, lp_peer_list) {
619 if ((*lp)->lp_nnis <= idx) {
620 idx -= (*lp)->lp_nnis;
623 list_for_each_entry((*lpn), &((*lp)->lp_peer_nets),
625 list_for_each_entry(lpni, &((*lpn)->lpn_peer_nis),
636 struct lnet_peer_ni *
637 lnet_get_next_peer_ni_locked(struct lnet_peer *peer,
638 struct lnet_peer_net *peer_net,
639 struct lnet_peer_ni *prev)
641 struct lnet_peer_ni *lpni;
642 struct lnet_peer_net *net = peer_net;
646 if (list_empty(&peer->lp_peer_nets))
649 net = list_entry(peer->lp_peer_nets.next,
650 struct lnet_peer_net,
653 lpni = list_entry(net->lpn_peer_nis.next, struct lnet_peer_ni,
659 if (prev->lpni_peer_nis.next == &prev->lpni_peer_net->lpn_peer_nis) {
661 * if you reached the end of the peer ni list and the peer
662 * net is specified then there are no more peer nis in that
669 * we reached the end of this net ni list. move to the
672 if (prev->lpni_peer_net->lpn_peer_nets.next ==
674 /* no more nets and no more NIs. */
677 /* get the next net */
678 net = list_entry(prev->lpni_peer_net->lpn_peer_nets.next,
679 struct lnet_peer_net,
681 /* get the ni on it */
682 lpni = list_entry(net->lpn_peer_nis.next, struct lnet_peer_ni,
688 /* there are more nis left */
689 lpni = list_entry(prev->lpni_peer_nis.next,
690 struct lnet_peer_ni, lpni_peer_nis);
696 * Test whether a ni is a preferred ni for this peer_ni, e.g, whether
697 * this is a preferred point-to-point path. Call with lnet_net_lock in
701 lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid)
705 if (lpni->lpni_pref_nnids == 0)
707 if (lpni->lpni_pref_nnids == 1)
708 return lpni->lpni_pref.nid == nid;
709 for (i = 0; i < lpni->lpni_pref_nnids; i++) {
710 if (lpni->lpni_pref.nids[i] == nid)
717 * Set a single ni as preferred, provided no preferred ni is already
718 * defined. Only to be used for non-multi-rail peer_ni.
721 lnet_peer_ni_set_non_mr_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
725 spin_lock(&lpni->lpni_lock);
726 if (nid == LNET_NID_ANY) {
728 } else if (lpni->lpni_pref_nnids > 0) {
730 } else if (lpni->lpni_pref_nnids == 0) {
731 lpni->lpni_pref.nid = nid;
732 lpni->lpni_pref_nnids = 1;
733 lpni->lpni_state |= LNET_PEER_NI_NON_MR_PREF;
735 spin_unlock(&lpni->lpni_lock);
737 CDEBUG(D_NET, "peer %s nid %s: %d\n",
738 libcfs_nid2str(lpni->lpni_nid), libcfs_nid2str(nid), rc);
743 * Clear the preferred NID from a non-multi-rail peer_ni, provided
744 * this preference was set by lnet_peer_ni_set_non_mr_pref_nid().
747 lnet_peer_ni_clr_non_mr_pref_nid(struct lnet_peer_ni *lpni)
751 spin_lock(&lpni->lpni_lock);
752 if (lpni->lpni_state & LNET_PEER_NI_NON_MR_PREF) {
753 lpni->lpni_pref_nnids = 0;
754 lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
755 } else if (lpni->lpni_pref_nnids == 0) {
760 spin_unlock(&lpni->lpni_lock);
762 CDEBUG(D_NET, "peer %s: %d\n",
763 libcfs_nid2str(lpni->lpni_nid), rc);
768 * Clear the preferred NIDs from a non-multi-rail peer.
771 lnet_peer_clr_non_mr_pref_nids(struct lnet_peer *lp)
773 struct lnet_peer_ni *lpni = NULL;
775 while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL)
776 lnet_peer_ni_clr_non_mr_pref_nid(lpni);
780 lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
782 lnet_nid_t *nids = NULL;
783 lnet_nid_t *oldnids = NULL;
784 struct lnet_peer *lp = lpni->lpni_peer_net->lpn_peer;
789 if (nid == LNET_NID_ANY) {
794 if (lpni->lpni_pref_nnids == 1 && lpni->lpni_pref.nid == nid) {
799 /* A non-MR node may have only one preferred NI per peer_ni */
800 if (lpni->lpni_pref_nnids > 0) {
801 if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
807 if (lpni->lpni_pref_nnids != 0) {
808 size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1);
809 LIBCFS_CPT_ALLOC(nids, lnet_cpt_table(), lpni->lpni_cpt, size);
814 for (i = 0; i < lpni->lpni_pref_nnids; i++) {
815 if (lpni->lpni_pref.nids[i] == nid) {
816 LIBCFS_FREE(nids, size);
820 nids[i] = lpni->lpni_pref.nids[i];
825 lnet_net_lock(LNET_LOCK_EX);
826 spin_lock(&lpni->lpni_lock);
827 if (lpni->lpni_pref_nnids == 0) {
828 lpni->lpni_pref.nid = nid;
830 oldnids = lpni->lpni_pref.nids;
831 lpni->lpni_pref.nids = nids;
833 lpni->lpni_pref_nnids++;
834 lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
835 spin_unlock(&lpni->lpni_lock);
836 lnet_net_unlock(LNET_LOCK_EX);
839 size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1);
840 LIBCFS_FREE(oldnids, sizeof(*oldnids) * size);
843 if (rc == -EEXIST && (lpni->lpni_state & LNET_PEER_NI_NON_MR_PREF)) {
844 spin_lock(&lpni->lpni_lock);
845 lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
846 spin_unlock(&lpni->lpni_lock);
848 CDEBUG(D_NET, "peer %s nid %s: %d\n",
849 libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(nid), rc);
854 lnet_peer_del_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
856 lnet_nid_t *nids = NULL;
857 lnet_nid_t *oldnids = NULL;
858 struct lnet_peer *lp = lpni->lpni_peer_net->lpn_peer;
863 if (lpni->lpni_pref_nnids == 0) {
868 if (lpni->lpni_pref_nnids == 1) {
869 if (lpni->lpni_pref.nid != nid) {
873 } else if (lpni->lpni_pref_nnids == 2) {
874 if (lpni->lpni_pref.nids[0] != nid &&
875 lpni->lpni_pref.nids[1] != nid) {
880 size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1);
881 LIBCFS_CPT_ALLOC(nids, lnet_cpt_table(), lpni->lpni_cpt, size);
886 for (i = 0, j = 0; i < lpni->lpni_pref_nnids; i++) {
887 if (lpni->lpni_pref.nids[i] != nid)
889 nids[j++] = lpni->lpni_pref.nids[i];
891 /* Check if we actually removed a nid. */
892 if (j == lpni->lpni_pref_nnids) {
893 LIBCFS_FREE(nids, size);
899 lnet_net_lock(LNET_LOCK_EX);
900 spin_lock(&lpni->lpni_lock);
901 if (lpni->lpni_pref_nnids == 1) {
902 lpni->lpni_pref.nid = LNET_NID_ANY;
903 } else if (lpni->lpni_pref_nnids == 2) {
904 oldnids = lpni->lpni_pref.nids;
905 if (oldnids[0] == nid)
906 lpni->lpni_pref.nid = oldnids[1];
908 lpni->lpni_pref.nid = oldnids[2];
910 oldnids = lpni->lpni_pref.nids;
911 lpni->lpni_pref.nids = nids;
913 lpni->lpni_pref_nnids--;
914 lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
915 spin_unlock(&lpni->lpni_lock);
916 lnet_net_unlock(LNET_LOCK_EX);
919 size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1);
920 LIBCFS_FREE(oldnids, sizeof(*oldnids) * size);
923 CDEBUG(D_NET, "peer %s nid %s: %d\n",
924 libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(nid), rc);
929 lnet_peer_primary_nid_locked(lnet_nid_t nid)
931 struct lnet_peer_ni *lpni;
932 lnet_nid_t primary_nid = nid;
934 lpni = lnet_find_peer_ni_locked(nid);
936 primary_nid = lpni->lpni_peer_net->lpn_peer->lp_primary_nid;
937 lnet_peer_ni_decref_locked(lpni);
944 LNetPrimaryNID(lnet_nid_t nid)
946 struct lnet_peer_ni *lpni;
947 lnet_nid_t primary_nid = nid;
951 cpt = lnet_net_lock_current();
952 lpni = lnet_nid2peerni_locked(nid, LNET_NID_ANY, cpt);
957 primary_nid = lpni->lpni_peer_net->lpn_peer->lp_primary_nid;
958 lnet_peer_ni_decref_locked(lpni);
960 lnet_net_unlock(cpt);
962 CDEBUG(D_NET, "NID %s primary NID %s rc %d\n", libcfs_nid2str(nid),
963 libcfs_nid2str(primary_nid), rc);
966 EXPORT_SYMBOL(LNetPrimaryNID);
968 struct lnet_peer_net *
969 lnet_peer_get_net_locked(struct lnet_peer *peer, __u32 net_id)
971 struct lnet_peer_net *peer_net;
972 list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_peer_nets) {
973 if (peer_net->lpn_net_id == net_id)
980 * Attach a peer_ni to a peer_net and peer. This function assumes
981 * peer_ni is not already attached to the peer_net/peer. The peer_ni
982 * may be attached to a different peer, in which case it will be
983 * properly detached first. The whole operation is done atomically.
985 * Always returns 0. This is the last function called from functions
986 * that do return an int, so returning 0 here allows the compiler to
990 lnet_peer_attach_peer_ni(struct lnet_peer *lp,
991 struct lnet_peer_net *lpn,
992 struct lnet_peer_ni *lpni,
995 struct lnet_peer_table *ptable;
997 /* Install the new peer_ni */
998 lnet_net_lock(LNET_LOCK_EX);
999 /* Add peer_ni to global peer table hash, if necessary. */
1000 if (list_empty(&lpni->lpni_hashlist)) {
1001 int hash = lnet_nid2peerhash(lpni->lpni_nid);
1003 ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
1004 list_add_tail(&lpni->lpni_hashlist, &ptable->pt_hash[hash]);
1005 ptable->pt_version++;
1006 ptable->pt_number++;
1007 /* This is the 1st refcount on lpni. */
1008 atomic_inc(&lpni->lpni_refcount);
1011 /* Detach the peer_ni from an existing peer, if necessary. */
1012 if (lpni->lpni_peer_net) {
1013 LASSERT(lpni->lpni_peer_net != lpn);
1014 LASSERT(lpni->lpni_peer_net->lpn_peer != lp);
1015 lnet_peer_detach_peer_ni_locked(lpni);
1016 lnet_peer_net_decref_locked(lpni->lpni_peer_net);
1017 lpni->lpni_peer_net = NULL;
1020 /* Add peer_ni to peer_net */
1021 lpni->lpni_peer_net = lpn;
1022 list_add_tail(&lpni->lpni_peer_nis, &lpn->lpn_peer_nis);
1023 lnet_peer_net_addref_locked(lpn);
1025 /* Add peer_net to peer */
1026 if (!lpn->lpn_peer) {
1028 list_add_tail(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
1029 lnet_peer_addref_locked(lp);
1032 /* Add peer to global peer list, if necessary */
1033 ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
1034 if (list_empty(&lp->lp_peer_list)) {
1035 list_add_tail(&lp->lp_peer_list, &ptable->pt_peer_list);
1040 /* Update peer state */
1041 spin_lock(&lp->lp_lock);
1042 if (flags & LNET_PEER_CONFIGURED) {
1043 if (!(lp->lp_state & LNET_PEER_CONFIGURED))
1044 lp->lp_state |= LNET_PEER_CONFIGURED;
1046 if (flags & LNET_PEER_MULTI_RAIL) {
1047 if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
1048 lp->lp_state |= LNET_PEER_MULTI_RAIL;
1049 lnet_peer_clr_non_mr_pref_nids(lp);
1052 spin_unlock(&lp->lp_lock);
1055 the_lnet.ln_peer_tables[lp->lp_cpt]->pt_peer_nnids++;
1056 lnet_net_unlock(LNET_LOCK_EX);
1058 CDEBUG(D_NET, "peer %s NID %s flags %#x\n",
1059 libcfs_nid2str(lp->lp_primary_nid),
1060 libcfs_nid2str(lpni->lpni_nid), flags);
1066 * Create a new peer, with nid as its primary nid.
1068 * Call with the lnet_api_mutex held.
1071 lnet_peer_add(lnet_nid_t nid, unsigned flags)
1073 struct lnet_peer *lp;
1074 struct lnet_peer_net *lpn;
1075 struct lnet_peer_ni *lpni;
1078 LASSERT(nid != LNET_NID_ANY);
1081 * No need for the lnet_net_lock here, because the
1082 * lnet_api_mutex is held.
1084 lpni = lnet_find_peer_ni_locked(nid);
1086 /* A peer with this NID already exists. */
1087 lp = lpni->lpni_peer_net->lpn_peer;
1088 lnet_peer_ni_decref_locked(lpni);
1090 * This is an error if the peer was configured and the
1091 * primary NID differs or an attempt is made to change
1092 * the Multi-Rail flag. Otherwise the assumption is
1093 * that an existing peer is being modified.
1095 if (lp->lp_state & LNET_PEER_CONFIGURED) {
1096 if (lp->lp_primary_nid != nid)
1098 else if ((lp->lp_state ^ flags) & LNET_PEER_MULTI_RAIL)
1102 /* Delete and recreate as a configured peer. */
1106 /* Create peer, peer_net, and peer_ni. */
1108 lp = lnet_peer_alloc(nid);
1111 lpn = lnet_peer_net_alloc(LNET_NIDNET(nid));
1114 lpni = lnet_peer_ni_alloc(nid);
1118 return lnet_peer_attach_peer_ni(lp, lpn, lpni, flags);
1121 LIBCFS_FREE(lpn, sizeof(*lpn));
1123 LIBCFS_FREE(lp, sizeof(*lp));
1125 CDEBUG(D_NET, "peer %s NID flags %#x: %d\n",
1126 libcfs_nid2str(nid), flags, rc);
1131 * Add a NID to a peer. Call with ln_api_mutex held.
1134 * -EPERM: Non-DLC addition to a DLC-configured peer.
1135 * -EEXIST: The NID was configured by DLC for a different peer.
1136 * -ENOMEM: Out of memory.
1137 * -ENOTUNIQ: Adding a second peer NID on a single network on a
1138 * non-multi-rail peer.
1141 lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags)
1143 struct lnet_peer_net *lpn;
1144 struct lnet_peer_ni *lpni;
1148 LASSERT(nid != LNET_NID_ANY);
1150 /* A configured peer can only be updated through configuration. */
1151 if (!(flags & LNET_PEER_CONFIGURED)) {
1152 if (lp->lp_state & LNET_PEER_CONFIGURED) {
1159 * The MULTI_RAIL flag can be set but not cleared, because
1160 * that would leave the peer struct in an invalid state.
1162 if (flags & LNET_PEER_MULTI_RAIL) {
1163 spin_lock(&lp->lp_lock);
1164 if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
1165 lp->lp_state |= LNET_PEER_MULTI_RAIL;
1166 lnet_peer_clr_non_mr_pref_nids(lp);
1168 spin_unlock(&lp->lp_lock);
1169 } else if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
1174 lpni = lnet_find_peer_ni_locked(nid);
1177 * A peer_ni already exists. This is only a problem if
1178 * it is not connected to this peer and was configured
1181 lnet_peer_ni_decref_locked(lpni);
1182 if (lpni->lpni_peer_net->lpn_peer == lp)
1184 if (lnet_peer_ni_is_configured(lpni)) {
1188 /* If this is the primary NID, destroy the peer. */
1189 if (lnet_peer_ni_is_primary(lpni)) {
1190 lnet_peer_del(lpni->lpni_peer_net->lpn_peer);
1191 lpni = lnet_peer_ni_alloc(nid);
1198 lpni = lnet_peer_ni_alloc(nid);
1206 * Get the peer_net. Check that we're not adding a second
1207 * peer_ni on a peer_net of a non-multi-rail peer.
1209 lpn = lnet_peer_get_net_locked(lp, LNET_NIDNET(nid));
1211 lpn = lnet_peer_net_alloc(LNET_NIDNET(nid));
1216 } else if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
1221 return lnet_peer_attach_peer_ni(lp, lpn, lpni, flags);
1224 /* If the peer_ni was allocated above its peer_net pointer is NULL */
1225 if (!lpni->lpni_peer_net)
1226 LIBCFS_FREE(lpni, sizeof(*lpni));
1228 CDEBUG(D_NET, "peer %s NID %s flags %#x: %d\n",
1229 libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(nid),
1235 * lpni creation initiated due to traffic either sending or receiving.
1238 lnet_peer_ni_traffic_add(lnet_nid_t nid, lnet_nid_t pref)
1240 struct lnet_peer *lp;
1241 struct lnet_peer_net *lpn;
1242 struct lnet_peer_ni *lpni;
1246 if (nid == LNET_NID_ANY) {
1251 /* lnet_net_lock is not needed here because ln_api_lock is held */
1252 lpni = lnet_find_peer_ni_locked(nid);
1255 * We must have raced with another thread. Since we
1256 * know next to nothing about a peer_ni created by
1257 * traffic, we just assume everything is ok and
1260 lnet_peer_ni_decref_locked(lpni);
1264 /* Create peer, peer_net, and peer_ni. */
1266 lp = lnet_peer_alloc(nid);
1269 lpn = lnet_peer_net_alloc(LNET_NIDNET(nid));
1272 lpni = lnet_peer_ni_alloc(nid);
1275 if (pref != LNET_NID_ANY)
1276 lnet_peer_ni_set_non_mr_pref_nid(lpni, pref);
1278 return lnet_peer_attach_peer_ni(lp, lpn, lpni, flags);
1281 LIBCFS_FREE(lpn, sizeof(*lpn));
1283 LIBCFS_FREE(lp, sizeof(*lp));
1285 CDEBUG(D_NET, "peer %s: %d\n", libcfs_nid2str(nid), rc);
1290 * Implementation of IOC_LIBCFS_ADD_PEER_NI.
1292 * This API handles the following combinations:
1293 * Create a peer with its primary NI if only the prim_nid is provided
1294 * Add a NID to a peer identified by the prim_nid. The peer identified
1295 * by the prim_nid must already exist.
1296 * The peer being created may be non-MR.
1298 * The caller must hold ln_api_mutex. This prevents the peer from
1299 * being created/modified/deleted by a different thread.
1302 lnet_add_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, bool mr)
1304 struct lnet_peer *lp = NULL;
1305 struct lnet_peer_ni *lpni;
1308 /* The prim_nid must always be specified */
1309 if (prim_nid == LNET_NID_ANY)
1312 flags = LNET_PEER_CONFIGURED;
1314 flags |= LNET_PEER_MULTI_RAIL;
1317 * If nid isn't specified, we must create a new peer with
1318 * prim_nid as its primary nid.
1320 if (nid == LNET_NID_ANY)
1321 return lnet_peer_add(prim_nid, flags);
1323 /* Look up the prim_nid, which must exist. */
1324 lpni = lnet_find_peer_ni_locked(prim_nid);
1327 lnet_peer_ni_decref_locked(lpni);
1328 lp = lpni->lpni_peer_net->lpn_peer;
1330 /* Peer must have been configured. */
1331 if (!(lp->lp_state & LNET_PEER_CONFIGURED)) {
1332 CDEBUG(D_NET, "peer %s was not configured\n",
1333 libcfs_nid2str(prim_nid));
1337 /* Primary NID must match */
1338 if (lp->lp_primary_nid != prim_nid) {
1339 CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
1340 libcfs_nid2str(prim_nid),
1341 libcfs_nid2str(lp->lp_primary_nid));
1345 /* Multi-Rail flag must match. */
1346 if ((lp->lp_state ^ flags) & LNET_PEER_MULTI_RAIL) {
1347 CDEBUG(D_NET, "multi-rail state mismatch for peer %s\n",
1348 libcfs_nid2str(prim_nid));
1352 return lnet_peer_add_nid(lp, nid, flags);
1356 * Implementation of IOC_LIBCFS_DEL_PEER_NI.
1358 * This API handles the following combinations:
1359 * Delete a NI from a peer if both prim_nid and nid are provided.
1360 * Delete a peer if only prim_nid is provided.
1361 * Delete a peer if its primary nid is provided.
1363 * The caller must hold ln_api_mutex. This prevents the peer from
1364 * being modified/deleted by a different thread.
1367 lnet_del_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid)
1369 struct lnet_peer *lp;
1370 struct lnet_peer_ni *lpni;
1373 if (prim_nid == LNET_NID_ANY)
1376 lpni = lnet_find_peer_ni_locked(prim_nid);
1379 lnet_peer_ni_decref_locked(lpni);
1380 lp = lpni->lpni_peer_net->lpn_peer;
1382 if (prim_nid != lp->lp_primary_nid) {
1383 CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
1384 libcfs_nid2str(prim_nid),
1385 libcfs_nid2str(lp->lp_primary_nid));
1389 if (nid == LNET_NID_ANY || nid == lp->lp_primary_nid)
1390 return lnet_peer_del(lp);
1392 flags = LNET_PEER_CONFIGURED;
1393 if (lp->lp_state & LNET_PEER_MULTI_RAIL)
1394 flags |= LNET_PEER_MULTI_RAIL;
1396 return lnet_peer_del_nid(lp, nid, flags);
1400 lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni)
1402 struct lnet_peer_table *ptable;
1403 struct lnet_peer_net *lpn;
1405 CDEBUG(D_NET, "%p nid %s\n", lpni, libcfs_nid2str(lpni->lpni_nid));
1407 LASSERT(atomic_read(&lpni->lpni_refcount) == 0);
1408 LASSERT(lpni->lpni_rtr_refcount == 0);
1409 LASSERT(list_empty(&lpni->lpni_txq));
1410 LASSERT(lpni->lpni_txqnob == 0);
1412 lpn = lpni->lpni_peer_net;
1413 lpni->lpni_peer_net = NULL;
1414 lpni->lpni_net = NULL;
1416 /* remove the peer ni from the zombie list */
1417 ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
1418 spin_lock(&ptable->pt_zombie_lock);
1419 list_del_init(&lpni->lpni_hashlist);
1420 ptable->pt_zombies--;
1421 spin_unlock(&ptable->pt_zombie_lock);
1423 if (lpni->lpni_pref_nnids > 1) {
1424 LIBCFS_FREE(lpni->lpni_pref.nids,
1425 sizeof(*lpni->lpni_pref.nids) * lpni->lpni_pref_nnids);
1427 LIBCFS_FREE(lpni, sizeof(*lpni));
1429 lnet_peer_net_decref_locked(lpn);
1432 struct lnet_peer_ni *
1433 lnet_nid2peerni_ex(lnet_nid_t nid, int cpt)
1435 struct lnet_peer_ni *lpni = NULL;
1438 if (the_lnet.ln_state != LNET_STATE_RUNNING)
1439 return ERR_PTR(-ESHUTDOWN);
1442 * find if a peer_ni already exists.
1443 * If so then just return that.
1445 lpni = lnet_find_peer_ni_locked(nid);
1449 lnet_net_unlock(cpt);
1451 rc = lnet_peer_ni_traffic_add(nid, LNET_NID_ANY);
1454 goto out_net_relock;
1457 lpni = lnet_find_peer_ni_locked(nid);
1467 * Get a peer_ni for the given nid, create it if necessary. Takes a
1468 * hold on the peer_ni.
1470 struct lnet_peer_ni *
1471 lnet_nid2peerni_locked(lnet_nid_t nid, lnet_nid_t pref, int cpt)
1473 struct lnet_peer_ni *lpni = NULL;
1476 if (the_lnet.ln_state != LNET_STATE_RUNNING)
1477 return ERR_PTR(-ESHUTDOWN);
1480 * find if a peer_ni already exists.
1481 * If so then just return that.
1483 lpni = lnet_find_peer_ni_locked(nid);
1489 * use the lnet_api_mutex to serialize the creation of the peer_ni
1490 * and the creation/deletion of the local ni/net. When a local ni is
1491 * created, if there exists a set of peer_nis on that network,
1492 * they need to be traversed and updated. When a local NI is
1493 * deleted, which could result in a network being deleted, then
1494 * all peer nis on that network need to be removed as well.
1496 * Creation through traffic should also be serialized with
1497 * creation through DLC.
1499 lnet_net_unlock(cpt);
1500 mutex_lock(&the_lnet.ln_api_mutex);
1502 * Shutdown is only set under the ln_api_lock, so a single
1503 * check here is sufficent.
1505 if (the_lnet.ln_state != LNET_STATE_RUNNING) {
1506 lpni = ERR_PTR(-ESHUTDOWN);
1507 goto out_mutex_unlock;
1510 rc = lnet_peer_ni_traffic_add(nid, pref);
1513 goto out_mutex_unlock;
1516 lpni = lnet_find_peer_ni_locked(nid);
1520 mutex_unlock(&the_lnet.ln_api_mutex);
1523 /* Lock has been dropped, check again for shutdown. */
1524 if (the_lnet.ln_state != LNET_STATE_RUNNING) {
1526 lnet_peer_ni_decref_locked(lpni);
1527 lpni = ERR_PTR(-ESHUTDOWN);
1538 * Is a peer uptodate from the point of view of discovery?
1540 * If it is currently being processed, obviously not.
1541 * A forced Ping or Push is also handled by the discovery thread.
1543 * Otherwise look at whether the peer needs rediscovering.
1546 lnet_peer_is_uptodate(struct lnet_peer *lp)
1550 spin_lock(&lp->lp_lock);
1551 if (lp->lp_state & (LNET_PEER_DISCOVERING |
1552 LNET_PEER_FORCE_PING |
1553 LNET_PEER_FORCE_PUSH)) {
1555 } else if (lp->lp_state & LNET_PEER_REDISCOVER) {
1556 if (lnet_peer_discovery_disabled)
1560 } else if (lp->lp_state & LNET_PEER_DISCOVERED) {
1561 if (lp->lp_state & LNET_PEER_NIDS_UPTODATE)
1568 spin_unlock(&lp->lp_lock);
1574 * Queue a peer for the attention of the discovery thread. Call with
1575 * lnet_net_lock/EX held. Returns 0 if the peer was queued, and
1576 * -EALREADY if the peer was already queued.
1578 static int lnet_peer_queue_for_discovery(struct lnet_peer *lp)
1582 spin_lock(&lp->lp_lock);
1583 if (!(lp->lp_state & LNET_PEER_DISCOVERING))
1584 lp->lp_state |= LNET_PEER_DISCOVERING;
1585 spin_unlock(&lp->lp_lock);
1586 if (list_empty(&lp->lp_dc_list)) {
1587 lnet_peer_addref_locked(lp);
1588 list_add_tail(&lp->lp_dc_list, &the_lnet.ln_dc_request);
1589 wake_up(&the_lnet.ln_dc_waitq);
1599 * Discovery of a peer is complete. Wake all waiters on the peer.
1600 * Call with lnet_net_lock/EX held.
1602 static void lnet_peer_discovery_complete(struct lnet_peer *lp)
1604 list_del_init(&lp->lp_dc_list);
1605 wake_up_all(&lp->lp_dc_waitq);
1606 lnet_peer_decref_locked(lp);
1610 * Peer discovery slow path. The ln_api_mutex is held on entry, and
1611 * dropped/retaken within this function. An lnet_peer_ni is passed in
1612 * because discovery could tear down an lnet_peer.
1615 lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt)
1618 struct lnet_peer *lp;
1622 lnet_net_unlock(cpt);
1623 lnet_net_lock(LNET_LOCK_EX);
1625 /* We're willing to be interrupted. */
1627 lp = lpni->lpni_peer_net->lpn_peer;
1628 prepare_to_wait(&lp->lp_dc_waitq, &wait, TASK_INTERRUPTIBLE);
1629 if (signal_pending(current))
1631 if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
1633 if (lnet_peer_is_uptodate(lp))
1635 lnet_peer_queue_for_discovery(lp);
1636 lnet_peer_addref_locked(lp);
1637 lnet_net_unlock(LNET_LOCK_EX);
1639 finish_wait(&lp->lp_dc_waitq, &wait);
1640 lnet_net_lock(LNET_LOCK_EX);
1641 lnet_peer_decref_locked(lp);
1642 /* Do not use lp beyond this point. */
1644 finish_wait(&lp->lp_dc_waitq, &wait);
1646 lnet_net_unlock(LNET_LOCK_EX);
1649 if (signal_pending(current))
1651 else if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
1653 else if (!lnet_peer_is_uptodate(lp))
1660 * Event handler for the discovery EQ.
1662 * Called with lnet_res_lock(cpt) held. The cpt is the
1663 * lnet_cpt_of_cookie() of the md handle cookie.
1665 static void lnet_discovery_event_handler(lnet_event_t *event)
1667 wake_up(&the_lnet.ln_dc_waitq);
1671 * Wait for work to be queued or some other change that must be
1672 * attended to. Returns non-zero if the discovery thread should shut
1675 static int lnet_peer_discovery_wait_for_work(void)
1682 cpt = lnet_net_lock_current();
1684 prepare_to_wait(&the_lnet.ln_dc_waitq, &wait,
1685 TASK_INTERRUPTIBLE);
1686 if (the_lnet.ln_dc_state == LNET_DC_STATE_STOPPING)
1688 if (lnet_push_target_resize_needed())
1690 if (!list_empty(&the_lnet.ln_dc_request))
1692 lnet_net_unlock(cpt);
1694 finish_wait(&the_lnet.ln_dc_waitq, &wait);
1695 cpt = lnet_net_lock_current();
1697 finish_wait(&the_lnet.ln_dc_waitq, &wait);
1699 if (the_lnet.ln_dc_state == LNET_DC_STATE_STOPPING)
1702 lnet_net_unlock(cpt);
1704 CDEBUG(D_NET, "woken: %d\n", rc);
1709 /* The discovery thread. */
1710 static int lnet_peer_discovery(void *arg)
1712 struct lnet_peer *lp;
1714 CDEBUG(D_NET, "started\n");
1715 cfs_block_allsigs();
1718 if (lnet_peer_discovery_wait_for_work())
1721 if (lnet_push_target_resize_needed())
1722 lnet_push_target_resize();
1724 lnet_net_lock(LNET_LOCK_EX);
1725 if (the_lnet.ln_dc_state == LNET_DC_STATE_STOPPING)
1727 while (!list_empty(&the_lnet.ln_dc_request)) {
1728 lp = list_first_entry(&the_lnet.ln_dc_request,
1729 struct lnet_peer, lp_dc_list);
1730 list_move(&lp->lp_dc_list, &the_lnet.ln_dc_working);
1731 lnet_net_unlock(LNET_LOCK_EX);
1733 /* Just tag and release for now. */
1734 spin_lock(&lp->lp_lock);
1735 if (lnet_peer_discovery_disabled) {
1736 lp->lp_state |= LNET_PEER_REDISCOVER;
1737 lp->lp_state &= ~(LNET_PEER_DISCOVERED |
1738 LNET_PEER_NIDS_UPTODATE |
1739 LNET_PEER_DISCOVERING);
1741 lp->lp_state |= (LNET_PEER_DISCOVERED |
1742 LNET_PEER_NIDS_UPTODATE);
1743 lp->lp_state &= ~(LNET_PEER_REDISCOVER |
1744 LNET_PEER_DISCOVERING);
1746 spin_unlock(&lp->lp_lock);
1748 lnet_net_lock(LNET_LOCK_EX);
1749 if (!(lp->lp_state & LNET_PEER_DISCOVERING))
1750 lnet_peer_discovery_complete(lp);
1751 if (the_lnet.ln_dc_state == LNET_DC_STATE_STOPPING)
1754 lnet_net_unlock(LNET_LOCK_EX);
1757 CDEBUG(D_NET, "stopping\n");
1759 * Clean up before telling lnet_peer_discovery_stop() that
1760 * we're done. Use wake_up() below to somewhat reduce the
1761 * size of the thundering herd if there are multiple threads
1762 * waiting on discovery of a single peer.
1764 LNetEQFree(the_lnet.ln_dc_eqh);
1765 LNetInvalidateEQHandle(&the_lnet.ln_dc_eqh);
1767 lnet_net_lock(LNET_LOCK_EX);
1768 list_for_each_entry(lp, &the_lnet.ln_dc_request, lp_dc_list) {
1769 spin_lock(&lp->lp_lock);
1770 lp->lp_state |= LNET_PEER_REDISCOVER;
1771 lp->lp_state &= ~(LNET_PEER_DISCOVERED |
1772 LNET_PEER_DISCOVERING |
1773 LNET_PEER_NIDS_UPTODATE);
1774 spin_unlock(&lp->lp_lock);
1775 lnet_peer_discovery_complete(lp);
1777 list_for_each_entry(lp, &the_lnet.ln_dc_working, lp_dc_list) {
1778 spin_lock(&lp->lp_lock);
1779 lp->lp_state |= LNET_PEER_REDISCOVER;
1780 lp->lp_state &= ~(LNET_PEER_DISCOVERED |
1781 LNET_PEER_DISCOVERING |
1782 LNET_PEER_NIDS_UPTODATE);
1783 spin_unlock(&lp->lp_lock);
1784 lnet_peer_discovery_complete(lp);
1786 lnet_net_unlock(LNET_LOCK_EX);
1788 the_lnet.ln_dc_state = LNET_DC_STATE_SHUTDOWN;
1789 wake_up(&the_lnet.ln_dc_waitq);
1791 CDEBUG(D_NET, "stopped\n");
1796 /* ln_api_mutex is held on entry. */
1797 int lnet_peer_discovery_start(void)
1799 struct task_struct *task;
1802 if (the_lnet.ln_dc_state != LNET_DC_STATE_SHUTDOWN)
1805 INIT_LIST_HEAD(&the_lnet.ln_dc_request);
1806 INIT_LIST_HEAD(&the_lnet.ln_dc_working);
1807 init_waitqueue_head(&the_lnet.ln_dc_waitq);
1809 rc = LNetEQAlloc(0, lnet_discovery_event_handler, &the_lnet.ln_dc_eqh);
1811 CERROR("Can't allocate discovery EQ: %d\n", rc);
1815 the_lnet.ln_dc_state = LNET_DC_STATE_RUNNING;
1816 task = kthread_run(lnet_peer_discovery, NULL, "lnet_discovery");
1819 CERROR("Can't start peer discovery thread: %d\n", rc);
1821 LNetEQFree(the_lnet.ln_dc_eqh);
1822 LNetInvalidateEQHandle(&the_lnet.ln_dc_eqh);
1824 the_lnet.ln_dc_state = LNET_DC_STATE_SHUTDOWN;
1830 /* ln_api_mutex is held on entry. */
1831 void lnet_peer_discovery_stop(void)
1833 if (the_lnet.ln_dc_state == LNET_DC_STATE_SHUTDOWN)
1836 LASSERT(the_lnet.ln_dc_state == LNET_DC_STATE_RUNNING);
1837 the_lnet.ln_dc_state = LNET_DC_STATE_STOPPING;
1838 wake_up(&the_lnet.ln_dc_waitq);
1840 wait_event(the_lnet.ln_dc_waitq,
1841 the_lnet.ln_dc_state == LNET_DC_STATE_SHUTDOWN);
1843 LASSERT(list_empty(&the_lnet.ln_dc_request));
1844 LASSERT(list_empty(&the_lnet.ln_dc_working));
1850 lnet_debug_peer(lnet_nid_t nid)
1852 char *aliveness = "NA";
1853 struct lnet_peer_ni *lp;
1856 cpt = lnet_cpt_of_nid(nid, NULL);
1859 lp = lnet_nid2peerni_locked(nid, LNET_NID_ANY, cpt);
1861 lnet_net_unlock(cpt);
1862 CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
1866 if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
1867 aliveness = lp->lpni_alive ? "up" : "down";
1869 CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
1870 libcfs_nid2str(lp->lpni_nid), atomic_read(&lp->lpni_refcount),
1871 aliveness, lp->lpni_net->net_tunables.lct_peer_tx_credits,
1872 lp->lpni_rtrcredits, lp->lpni_minrtrcredits,
1873 lp->lpni_txcredits, lp->lpni_mintxcredits, lp->lpni_txqnob);
1875 lnet_peer_ni_decref_locked(lp);
1877 lnet_net_unlock(cpt);
1880 /* Gathering information for userspace. */
1882 int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
1883 char aliveness[LNET_MAX_STR_LEN],
1884 __u32 *cpt_iter, __u32 *refcount,
1885 __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
1886 __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits,
1887 __u32 *peer_tx_qnob)
1889 struct lnet_peer_table *peer_table;
1890 struct lnet_peer_ni *lp;
1895 /* get the number of CPTs */
1896 lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
1898 /* if the cpt number to be examined is >= the number of cpts in
1899 * the system then indicate that there are no more cpts to examin
1901 if (*cpt_iter >= lncpt)
1904 /* get the current table */
1905 peer_table = the_lnet.ln_peer_tables[*cpt_iter];
1906 /* if the ptable is NULL then there are no more cpts to examine */
1907 if (peer_table == NULL)
1910 lnet_net_lock(*cpt_iter);
1912 for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) {
1913 struct list_head *peers = &peer_table->pt_hash[j];
1915 list_for_each_entry(lp, peers, lpni_hashlist) {
1916 if (peer_index-- > 0)
1919 snprintf(aliveness, LNET_MAX_STR_LEN, "NA");
1920 if (lnet_isrouter(lp) ||
1921 lnet_peer_aliveness_enabled(lp))
1922 snprintf(aliveness, LNET_MAX_STR_LEN,
1923 lp->lpni_alive ? "up" : "down");
1925 *nid = lp->lpni_nid;
1926 *refcount = atomic_read(&lp->lpni_refcount);
1927 *ni_peer_tx_credits =
1928 lp->lpni_net->net_tunables.lct_peer_tx_credits;
1929 *peer_tx_credits = lp->lpni_txcredits;
1930 *peer_rtr_credits = lp->lpni_rtrcredits;
1931 *peer_min_rtr_credits = lp->lpni_mintxcredits;
1932 *peer_tx_qnob = lp->lpni_txqnob;
1938 lnet_net_unlock(*cpt_iter);
1942 return found ? 0 : -ENOENT;
1945 /* ln_api_mutex is held, which keeps the peer list stable */
1946 int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
1948 struct lnet_peer_ni_credit_info __user *peer_ni_info,
1949 struct lnet_ioctl_element_stats __user *peer_ni_stats)
1951 struct lnet_peer_ni *lpni = NULL;
1952 struct lnet_peer_net *lpn = NULL;
1953 struct lnet_peer *lp = NULL;
1954 struct lnet_peer_ni_credit_info ni_info;
1955 struct lnet_ioctl_element_stats ni_stats;
1958 lpni = lnet_get_peer_ni_idx_locked(idx, &lpn, &lp);
1963 *primary_nid = lp->lp_primary_nid;
1964 *mr = lnet_peer_is_multi_rail(lp);
1965 *nid = lpni->lpni_nid;
1966 snprintf(ni_info.cr_aliveness, LNET_MAX_STR_LEN, "NA");
1967 if (lnet_isrouter(lpni) ||
1968 lnet_peer_aliveness_enabled(lpni))
1969 snprintf(ni_info.cr_aliveness, LNET_MAX_STR_LEN,
1970 lpni->lpni_alive ? "up" : "down");
1972 ni_info.cr_refcount = atomic_read(&lpni->lpni_refcount);
1973 ni_info.cr_ni_peer_tx_credits = (lpni->lpni_net != NULL) ?
1974 lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0;
1975 ni_info.cr_peer_tx_credits = lpni->lpni_txcredits;
1976 ni_info.cr_peer_rtr_credits = lpni->lpni_rtrcredits;
1977 ni_info.cr_peer_min_rtr_credits = lpni->lpni_minrtrcredits;
1978 ni_info.cr_peer_min_tx_credits = lpni->lpni_mintxcredits;
1979 ni_info.cr_peer_tx_qnob = lpni->lpni_txqnob;
1980 ni_info.cr_ncpt = lpni->lpni_cpt;
1982 ni_stats.iel_send_count = atomic_read(&lpni->lpni_stats.send_count);
1983 ni_stats.iel_recv_count = atomic_read(&lpni->lpni_stats.recv_count);
1984 ni_stats.iel_drop_count = atomic_read(&lpni->lpni_stats.drop_count);
1986 /* If copy_to_user fails */
1988 if (copy_to_user(peer_ni_info, &ni_info, sizeof(ni_info)))
1991 if (copy_to_user(peer_ni_stats, &ni_stats, sizeof(ni_stats)))