Whamcloud - gitweb
3c5ee47a22d10260d437e77fbb89a2440755f007
[fs/lustre-release.git] / lnet / lnet / peer.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  */
30 /*
31  * This file is part of Lustre, http://www.lustre.org/
32  * Lustre is a trademark of Sun Microsystems, Inc.
33  *
34  * lnet/lnet/peer.c
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38
39 #include <lnet/lib-lnet.h>
40
41 int
42 lnet_peer_tables_create(void)
43 {
44         struct lnet_peer_table  *ptable;
45         cfs_list_t              *hash;
46         int                     i;
47         int                     j;
48
49         the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
50                                                    sizeof(*ptable));
51         if (the_lnet.ln_peer_tables == NULL) {
52                 CERROR("Failed to allocate cpu-partition peer tables\n");
53                 return -ENOMEM;
54         }
55
56         cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
57                 CFS_INIT_LIST_HEAD(&ptable->pt_deathrow);
58
59                 LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i,
60                                  LNET_PEER_HASH_SIZE * sizeof(*hash));
61                 if (hash == NULL) {
62                         CERROR("Failed to create peer hash table\n");
63                         lnet_peer_tables_destroy();
64                         return -ENOMEM;
65                 }
66
67                 for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
68                         CFS_INIT_LIST_HEAD(&hash[j]);
69                 ptable->pt_hash = hash; /* sign of initialization */
70         }
71
72         return 0;
73 }
74
75 void
76 lnet_peer_tables_destroy(void)
77 {
78         struct lnet_peer_table  *ptable;
79         cfs_list_t              *hash;
80         int                     i;
81         int                     j;
82
83         if (the_lnet.ln_peer_tables == NULL)
84                 return;
85
86         cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
87                 hash = ptable->pt_hash;
88                 if (hash == NULL) /* not intialized */
89                         break;
90
91                 LASSERT(cfs_list_empty(&ptable->pt_deathrow));
92
93                 ptable->pt_hash = NULL;
94                 for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
95                         LASSERT(cfs_list_empty(&hash[j]));
96
97                 LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
98         }
99
100         cfs_percpt_free(the_lnet.ln_peer_tables);
101         the_lnet.ln_peer_tables = NULL;
102 }
103
104 void
105 lnet_peer_tables_cleanup(void)
106 {
107         struct lnet_peer_table  *ptable;
108         int                     i;
109         int                     j;
110
111         LASSERT(the_lnet.ln_shutdown);  /* i.e. no new peers */
112
113         cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
114                 lnet_net_lock(i);
115
116                 for (j = 0; j < LNET_PEER_HASH_SIZE; j++) {
117                         cfs_list_t *peers = &ptable->pt_hash[j];
118
119                         while (!cfs_list_empty(peers)) {
120                                 lnet_peer_t *lp = cfs_list_entry(peers->next,
121                                                                  lnet_peer_t,
122                                                                  lp_hashlist);
123                                 cfs_list_del_init(&lp->lp_hashlist);
124                                 /* lose hash table's ref */
125                                 lnet_peer_decref_locked(lp);
126                         }
127                 }
128
129                 lnet_net_unlock(i);
130         }
131
132         cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
133                 CFS_LIST_HEAD   (deathrow);
134                 lnet_peer_t     *lp;
135
136                 lnet_net_lock(i);
137
138                 for (j = 3; ptable->pt_number != 0; j++) {
139                         lnet_net_unlock(i);
140
141                         if ((j & (j - 1)) == 0) {
142                                 CDEBUG(D_WARNING,
143                                        "Waiting for %d peers on peer table\n",
144                                        ptable->pt_number);
145                         }
146                         cfs_pause(cfs_time_seconds(1) / 2);
147                         lnet_net_lock(i);
148                 }
149                 cfs_list_splice_init(&ptable->pt_deathrow, &deathrow);
150
151                 lnet_net_unlock(i);
152
153                 while (!cfs_list_empty(&deathrow)) {
154                         lp = cfs_list_entry(deathrow.next,
155                                             lnet_peer_t, lp_hashlist);
156                         cfs_list_del(&lp->lp_hashlist);
157                         LIBCFS_FREE(lp, sizeof(*lp));
158                 }
159         }
160 }
161
162 void
163 lnet_destroy_peer_locked(lnet_peer_t *lp)
164 {
165         struct lnet_peer_table *ptable;
166
167         LASSERT(lp->lp_refcount == 0);
168         LASSERT(lp->lp_rtr_refcount == 0);
169         LASSERT(cfs_list_empty(&lp->lp_txq));
170         LASSERT(cfs_list_empty(&lp->lp_hashlist));
171         LASSERT(lp->lp_txqnob == 0);
172
173         ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
174         LASSERT(ptable->pt_number > 0);
175         ptable->pt_number--;
176
177         lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
178         lp->lp_ni = NULL;
179
180         cfs_list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
181 }
182
183 lnet_peer_t *
184 lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
185 {
186         cfs_list_t      *peers;
187         lnet_peer_t     *lp;
188
189         if (the_lnet.ln_shutdown)
190                 return NULL;
191
192         peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
193         cfs_list_for_each_entry(lp, peers, lp_hashlist) {
194                 if (lp->lp_nid == nid) {
195                         lnet_peer_addref_locked(lp);
196                         return lp;
197                 }
198         }
199
200         return NULL;
201 }
202
203 int
204 lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt)
205 {
206         struct lnet_peer_table  *ptable;
207         lnet_peer_t             *lp = NULL;
208         lnet_peer_t             *lp2;
209         int                     cpt2;
210         int                     rc = 0;
211
212         /* cpt can be LNET_LOCK_EX if it's called from router functions */
213         cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
214
215         ptable = the_lnet.ln_peer_tables[cpt2];
216         lp = lnet_find_peer_locked(ptable, nid);
217         if (lp != NULL) {
218                 *lpp = lp;
219                 return 0;
220         }
221
222         if (!cfs_list_empty(&ptable->pt_deathrow)) {
223                 lp = cfs_list_entry(ptable->pt_deathrow.next,
224                                     lnet_peer_t, lp_hashlist);
225                 cfs_list_del(&lp->lp_hashlist);
226         }
227
228         *lpp = NULL;
229         /*
230          * take extra refcount in case another thread has shutdown LNet
231          * and destroyed locks and peer-table before I finish the allocation
232          */
233         ptable->pt_number++;
234         lnet_net_unlock(cpt);
235
236         if (lp != NULL)
237                 memset(lp, 0, sizeof(*lp));
238         else
239                 LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), cpt2, sizeof(*lp));
240
241         if (lp == NULL) {
242                 rc = -ENOMEM;
243                 lnet_net_lock(cpt);
244                 goto out;
245         }
246
247         CFS_INIT_LIST_HEAD(&lp->lp_txq);
248         CFS_INIT_LIST_HEAD(&lp->lp_rtrq);
249         CFS_INIT_LIST_HEAD(&lp->lp_routes);
250
251         lp->lp_notify = 0;
252         lp->lp_notifylnd = 0;
253         lp->lp_notifying = 0;
254         lp->lp_alive_count = 0;
255         lp->lp_timestamp = 0;
256         lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
257         lp->lp_last_alive = cfs_time_current(); /* assumes alive */
258         lp->lp_last_query = 0; /* haven't asked NI yet */
259         lp->lp_ping_timestamp = 0;
260         lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
261         lp->lp_nid = nid;
262         lp->lp_cpt = cpt2;
263         lp->lp_refcount = 2;    /* 1 for caller; 1 for hash */
264         lp->lp_rtr_refcount = 0;
265
266         lnet_net_lock(cpt);
267
268         if (the_lnet.ln_shutdown) {
269                 rc = -ESHUTDOWN;
270                 goto out;
271         }
272
273         lp2 = lnet_find_peer_locked(ptable, nid);
274         if (lp2 != NULL) {
275                 *lpp = lp2;
276                 goto out;
277         }
278
279         lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
280         if (lp->lp_ni == NULL) {
281                 rc = -EHOSTUNREACH;
282                 goto out;
283         }
284
285         lp->lp_txcredits    =
286         lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
287         lp->lp_rtrcredits    =
288         lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
289
290         cfs_list_add_tail(&lp->lp_hashlist,
291                           &ptable->pt_hash[lnet_nid2peerhash(nid)]);
292         ptable->pt_version++;
293         *lpp = lp;
294
295         return 0;
296 out:
297         if (lp != NULL)
298                 cfs_list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
299         ptable->pt_number--;
300         return rc;
301 }
302
303 void
304 lnet_debug_peer(lnet_nid_t nid)
305 {
306         char            *aliveness = "NA";
307         lnet_peer_t     *lp;
308         int             rc;
309         int             cpt;
310
311         cpt = lnet_cpt_of_nid(nid);
312         lnet_net_lock(cpt);
313
314         rc = lnet_nid2peer_locked(&lp, nid, cpt);
315         if (rc != 0) {
316                 lnet_net_unlock(cpt);
317                 CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
318                 return;
319         }
320
321         if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
322                 aliveness = lp->lp_alive ? "up" : "down";
323
324         CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
325                libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
326                aliveness, lp->lp_ni->ni_peertxcredits,
327                lp->lp_rtrcredits, lp->lp_minrtrcredits,
328                lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
329
330         lnet_peer_decref_locked(lp);
331
332         lnet_net_unlock(cpt);
333 }