Whamcloud - gitweb
LU-3131 tests: fix sanity 56u/102k for single OST.
[fs/lustre-release.git] / lnet / lnet / peer.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lnet/lnet/peer.c
37  */
38
39 #define DEBUG_SUBSYSTEM S_LNET
40
41 #include <lnet/lib-lnet.h>
42
43 int
44 lnet_peer_tables_create(void)
45 {
46         struct lnet_peer_table  *ptable;
47         cfs_list_t              *hash;
48         int                     i;
49         int                     j;
50
51         the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
52                                                    sizeof(*ptable));
53         if (the_lnet.ln_peer_tables == NULL) {
54                 CERROR("Failed to allocate cpu-partition peer tables\n");
55                 return -ENOMEM;
56         }
57
58         cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
59                 CFS_INIT_LIST_HEAD(&ptable->pt_deathrow);
60
61                 LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i,
62                                  LNET_PEER_HASH_SIZE * sizeof(*hash));
63                 if (hash == NULL) {
64                         CERROR("Failed to create peer hash table\n");
65                         lnet_peer_tables_destroy();
66                         return -ENOMEM;
67                 }
68
69                 for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
70                         CFS_INIT_LIST_HEAD(&hash[j]);
71                 ptable->pt_hash = hash; /* sign of initialization */
72         }
73
74         return 0;
75 }
76
77 void
78 lnet_peer_tables_destroy(void)
79 {
80         struct lnet_peer_table  *ptable;
81         cfs_list_t              *hash;
82         int                     i;
83         int                     j;
84
85         if (the_lnet.ln_peer_tables == NULL)
86                 return;
87
88         cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
89                 hash = ptable->pt_hash;
90                 if (hash == NULL) /* not intialized */
91                         break;
92
93                 LASSERT(cfs_list_empty(&ptable->pt_deathrow));
94
95                 ptable->pt_hash = NULL;
96                 for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
97                         LASSERT(cfs_list_empty(&hash[j]));
98
99                 LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
100         }
101
102         cfs_percpt_free(the_lnet.ln_peer_tables);
103         the_lnet.ln_peer_tables = NULL;
104 }
105
106 void
107 lnet_peer_tables_cleanup(void)
108 {
109         struct lnet_peer_table  *ptable;
110         int                     i;
111         int                     j;
112
113         LASSERT(the_lnet.ln_shutdown);  /* i.e. no new peers */
114
115         cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
116                 lnet_net_lock(i);
117
118                 for (j = 0; j < LNET_PEER_HASH_SIZE; j++) {
119                         cfs_list_t *peers = &ptable->pt_hash[j];
120
121                         while (!cfs_list_empty(peers)) {
122                                 lnet_peer_t *lp = cfs_list_entry(peers->next,
123                                                                  lnet_peer_t,
124                                                                  lp_hashlist);
125                                 cfs_list_del_init(&lp->lp_hashlist);
126                                 /* lose hash table's ref */
127                                 lnet_peer_decref_locked(lp);
128                         }
129                 }
130
131                 lnet_net_unlock(i);
132         }
133
134         cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
135                 CFS_LIST_HEAD   (deathrow);
136                 lnet_peer_t     *lp;
137
138                 lnet_net_lock(i);
139
140                 for (j = 3; ptable->pt_number != 0; j++) {
141                         lnet_net_unlock(i);
142
143                         if ((j & (j - 1)) == 0) {
144                                 CDEBUG(D_WARNING,
145                                        "Waiting for %d peers on peer table\n",
146                                        ptable->pt_number);
147                         }
148                         cfs_pause(cfs_time_seconds(1) / 2);
149                         lnet_net_lock(i);
150                 }
151                 cfs_list_splice_init(&ptable->pt_deathrow, &deathrow);
152
153                 lnet_net_unlock(i);
154
155                 while (!cfs_list_empty(&deathrow)) {
156                         lp = cfs_list_entry(deathrow.next,
157                                             lnet_peer_t, lp_hashlist);
158                         cfs_list_del(&lp->lp_hashlist);
159                         LIBCFS_FREE(lp, sizeof(*lp));
160                 }
161         }
162 }
163
164 void
165 lnet_destroy_peer_locked(lnet_peer_t *lp)
166 {
167         struct lnet_peer_table *ptable;
168
169         LASSERT(lp->lp_refcount == 0);
170         LASSERT(lp->lp_rtr_refcount == 0);
171         LASSERT(cfs_list_empty(&lp->lp_txq));
172         LASSERT(cfs_list_empty(&lp->lp_hashlist));
173         LASSERT(lp->lp_txqnob == 0);
174
175         ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
176         LASSERT(ptable->pt_number > 0);
177         ptable->pt_number--;
178
179         lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
180         lp->lp_ni = NULL;
181
182         cfs_list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
183 }
184
185 lnet_peer_t *
186 lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
187 {
188         cfs_list_t      *peers;
189         lnet_peer_t     *lp;
190
191         LASSERT(!the_lnet.ln_shutdown);
192
193         peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
194         cfs_list_for_each_entry(lp, peers, lp_hashlist) {
195                 if (lp->lp_nid == nid) {
196                         lnet_peer_addref_locked(lp);
197                         return lp;
198                 }
199         }
200
201         return NULL;
202 }
203
204 int
205 lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt)
206 {
207         struct lnet_peer_table  *ptable;
208         lnet_peer_t             *lp = NULL;
209         lnet_peer_t             *lp2;
210         int                     cpt2;
211         int                     rc = 0;
212
213         *lpp = NULL;
214         if (the_lnet.ln_shutdown) /* it's shutting down */
215                 return -ESHUTDOWN;
216
217         /* cpt can be LNET_LOCK_EX if it's called from router functions */
218         cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
219
220         ptable = the_lnet.ln_peer_tables[cpt2];
221         lp = lnet_find_peer_locked(ptable, nid);
222         if (lp != NULL) {
223                 *lpp = lp;
224                 return 0;
225         }
226
227         if (!cfs_list_empty(&ptable->pt_deathrow)) {
228                 lp = cfs_list_entry(ptable->pt_deathrow.next,
229                                     lnet_peer_t, lp_hashlist);
230                 cfs_list_del(&lp->lp_hashlist);
231         }
232
233         /*
234          * take extra refcount in case another thread has shutdown LNet
235          * and destroyed locks and peer-table before I finish the allocation
236          */
237         ptable->pt_number++;
238         lnet_net_unlock(cpt);
239
240         if (lp != NULL)
241                 memset(lp, 0, sizeof(*lp));
242         else
243                 LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), cpt2, sizeof(*lp));
244
245         if (lp == NULL) {
246                 rc = -ENOMEM;
247                 lnet_net_lock(cpt);
248                 goto out;
249         }
250
251         CFS_INIT_LIST_HEAD(&lp->lp_txq);
252         CFS_INIT_LIST_HEAD(&lp->lp_rtrq);
253         CFS_INIT_LIST_HEAD(&lp->lp_routes);
254
255         lp->lp_notify = 0;
256         lp->lp_notifylnd = 0;
257         lp->lp_notifying = 0;
258         lp->lp_alive_count = 0;
259         lp->lp_timestamp = 0;
260         lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
261         lp->lp_last_alive = cfs_time_current(); /* assumes alive */
262         lp->lp_last_query = 0; /* haven't asked NI yet */
263         lp->lp_ping_timestamp = 0;
264         lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
265         lp->lp_nid = nid;
266         lp->lp_cpt = cpt2;
267         lp->lp_refcount = 2;    /* 1 for caller; 1 for hash */
268         lp->lp_rtr_refcount = 0;
269
270         lnet_net_lock(cpt);
271
272         if (the_lnet.ln_shutdown) {
273                 rc = -ESHUTDOWN;
274                 goto out;
275         }
276
277         lp2 = lnet_find_peer_locked(ptable, nid);
278         if (lp2 != NULL) {
279                 *lpp = lp2;
280                 goto out;
281         }
282
283         lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
284         if (lp->lp_ni == NULL) {
285                 rc = -EHOSTUNREACH;
286                 goto out;
287         }
288
289         lp->lp_txcredits    =
290         lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
291         lp->lp_rtrcredits    =
292         lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
293
294         cfs_list_add_tail(&lp->lp_hashlist,
295                           &ptable->pt_hash[lnet_nid2peerhash(nid)]);
296         ptable->pt_version++;
297         *lpp = lp;
298
299         return 0;
300 out:
301         if (lp != NULL)
302                 cfs_list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
303         ptable->pt_number--;
304         return rc;
305 }
306
307 void
308 lnet_debug_peer(lnet_nid_t nid)
309 {
310         char            *aliveness = "NA";
311         lnet_peer_t     *lp;
312         int             rc;
313         int             cpt;
314
315         cpt = lnet_cpt_of_nid(nid);
316         lnet_net_lock(cpt);
317
318         rc = lnet_nid2peer_locked(&lp, nid, cpt);
319         if (rc != 0) {
320                 lnet_net_unlock(cpt);
321                 CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
322                 return;
323         }
324
325         if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
326                 aliveness = lp->lp_alive ? "up" : "down";
327
328         CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
329                libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
330                aliveness, lp->lp_ni->ni_peertxcredits,
331                lp->lp_rtrcredits, lp->lp_minrtrcredits,
332                lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
333
334         lnet_peer_decref_locked(lp);
335
336         lnet_net_unlock(cpt);
337 }