Whamcloud - gitweb
LU-13641 socklnd: remove tcp bonding
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lnet/klnds/socklnd/socklnd.c
33  *
34  * Author: Zach Brown <zab@zabbo.net>
35  * Author: Peter J. Braam <braam@clusterfs.com>
36  * Author: Phil Schwan <phil@clusterfs.com>
37  * Author: Eric Barton <eric@bartonsoftware.com>
38  */
39
40 #include <linux/inetdevice.h>
41 #include "socklnd.h"
42 #include <linux/sunrpc/addr.h>
43
44 static const struct lnet_lnd the_ksocklnd;
45 struct ksock_nal_data ksocknal_data;
46
47 static struct ksock_interface *
48 ksocknal_ip2iface(struct lnet_ni *ni, struct sockaddr *addr)
49 {
50         struct ksock_net *net = ni->ni_data;
51         struct ksock_interface *iface;
52
53         iface = &net->ksnn_interface;
54
55         if (rpc_cmp_addr((struct sockaddr *)&iface->ksni_addr, addr))
56                 return iface;
57
58         return NULL;
59 }
60
61 static struct ksock_interface *
62 ksocknal_index2iface(struct lnet_ni *ni, int index)
63 {
64         struct ksock_net *net = ni->ni_data;
65         struct ksock_interface *iface;
66
67         iface = &net->ksnn_interface;
68
69         if (iface->ksni_index == index)
70                 return iface;
71
72         return NULL;
73 }
74
75 static int ksocknal_ip2index(struct sockaddr *addr, struct lnet_ni *ni)
76 {
77         struct net_device *dev;
78         int ret = -1;
79         DECLARE_CONST_IN_IFADDR(ifa);
80
81         if (addr->sa_family != AF_INET)
82                 /* No IPv6 support yet */
83                 return ret;
84
85         rcu_read_lock();
86         for_each_netdev(ni->ni_net_ns, dev) {
87                 int flags = dev_get_flags(dev);
88                 struct in_device *in_dev;
89
90                 if (flags & IFF_LOOPBACK) /* skip the loopback IF */
91                         continue;
92
93                 if (!(flags & IFF_UP))
94                         continue;
95
96                 in_dev = __in_dev_get_rcu(dev);
97                 if (!in_dev)
98                         continue;
99
100                 in_dev_for_each_ifa_rcu(ifa, in_dev) {
101                         if (ifa->ifa_local ==
102                             ((struct sockaddr_in *)addr)->sin_addr.s_addr)
103                                 ret = dev->ifindex;
104                 }
105                 endfor_ifa(in_dev);
106                 if (ret >= 0)
107                         break;
108         }
109         rcu_read_unlock();
110
111         return ret;
112 }
113
114 static struct ksock_route *
115 ksocknal_create_route(struct sockaddr *addr)
116 {
117         struct ksock_route *route;
118
119         LIBCFS_ALLOC (route, sizeof (*route));
120         if (route == NULL)
121                 return (NULL);
122
123         refcount_set(&route->ksnr_refcount, 1);
124         route->ksnr_peer = NULL;
125         route->ksnr_retry_interval = 0;         /* OK to connect at any time */
126         rpc_copy_addr((struct sockaddr *)&route->ksnr_addr, addr);
127         rpc_set_port((struct sockaddr *)&route->ksnr_addr, rpc_get_port(addr));
128         route->ksnr_myiface = -1;
129         route->ksnr_scheduled = 0;
130         route->ksnr_connecting = 0;
131         route->ksnr_connected = 0;
132         route->ksnr_deleted = 0;
133         route->ksnr_conn_count = 0;
134         route->ksnr_share_count = 0;
135
136         return route;
137 }
138
139 void
140 ksocknal_destroy_route(struct ksock_route *route)
141 {
142         LASSERT(refcount_read(&route->ksnr_refcount) == 0);
143
144         if (route->ksnr_peer != NULL)
145                 ksocknal_peer_decref(route->ksnr_peer);
146
147         LIBCFS_FREE (route, sizeof (*route));
148 }
149
150 static struct ksock_peer_ni *
151 ksocknal_create_peer(struct lnet_ni *ni, struct lnet_process_id id)
152 {
153         int cpt = lnet_cpt_of_nid(id.nid, ni);
154         struct ksock_net *net = ni->ni_data;
155         struct ksock_peer_ni *peer_ni;
156
157         LASSERT(id.nid != LNET_NID_ANY);
158         LASSERT(id.pid != LNET_PID_ANY);
159         LASSERT(!in_interrupt());
160
161         if (!atomic_inc_unless_negative(&net->ksnn_npeers)) {
162                 CERROR("Can't create peer_ni: network shutdown\n");
163                 return ERR_PTR(-ESHUTDOWN);
164         }
165
166         LIBCFS_CPT_ALLOC(peer_ni, lnet_cpt_table(), cpt, sizeof(*peer_ni));
167         if (!peer_ni) {
168                 atomic_dec(&net->ksnn_npeers);
169                 return ERR_PTR(-ENOMEM);
170         }
171
172         peer_ni->ksnp_ni = ni;
173         peer_ni->ksnp_id = id;
174         refcount_set(&peer_ni->ksnp_refcount, 1); /* 1 ref for caller */
175         peer_ni->ksnp_closing = 0;
176         peer_ni->ksnp_accepting = 0;
177         peer_ni->ksnp_proto = NULL;
178         peer_ni->ksnp_last_alive = 0;
179         peer_ni->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
180
181         INIT_LIST_HEAD(&peer_ni->ksnp_conns);
182         INIT_LIST_HEAD(&peer_ni->ksnp_routes);
183         INIT_LIST_HEAD(&peer_ni->ksnp_tx_queue);
184         INIT_LIST_HEAD(&peer_ni->ksnp_zc_req_list);
185         spin_lock_init(&peer_ni->ksnp_lock);
186
187         return peer_ni;
188 }
189
190 void
191 ksocknal_destroy_peer(struct ksock_peer_ni *peer_ni)
192 {
193         struct ksock_net *net = peer_ni->ksnp_ni->ni_data;
194
195         CDEBUG (D_NET, "peer_ni %s %p deleted\n",
196                 libcfs_id2str(peer_ni->ksnp_id), peer_ni);
197
198         LASSERT(refcount_read(&peer_ni->ksnp_refcount) == 0);
199         LASSERT(peer_ni->ksnp_accepting == 0);
200         LASSERT(list_empty(&peer_ni->ksnp_conns));
201         LASSERT(list_empty(&peer_ni->ksnp_routes));
202         LASSERT(list_empty(&peer_ni->ksnp_tx_queue));
203         LASSERT(list_empty(&peer_ni->ksnp_zc_req_list));
204
205         LIBCFS_FREE(peer_ni, sizeof(*peer_ni));
206
207         /* NB a peer_ni's connections and routes keep a reference on their
208          * peer_ni until they are destroyed, so we can be assured that _all_
209          * state to do with this peer_ni has been cleaned up when its refcount
210          * drops to zero.
211          */
212         if (atomic_dec_and_test(&net->ksnn_npeers))
213                 wake_up_var(&net->ksnn_npeers);
214 }
215
216 struct ksock_peer_ni *
217 ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id)
218 {
219         struct ksock_peer_ni *peer_ni;
220
221         hash_for_each_possible(ksocknal_data.ksnd_peers, peer_ni,
222                                ksnp_list, id.nid) {
223                 LASSERT(!peer_ni->ksnp_closing);
224
225                 if (peer_ni->ksnp_ni != ni)
226                         continue;
227
228                 if (peer_ni->ksnp_id.nid != id.nid ||
229                     peer_ni->ksnp_id.pid != id.pid)
230                         continue;
231
232                 CDEBUG(D_NET, "got peer_ni [%p] -> %s (%d)\n",
233                        peer_ni, libcfs_id2str(id),
234                        refcount_read(&peer_ni->ksnp_refcount));
235                 return peer_ni;
236         }
237         return NULL;
238 }
239
240 struct ksock_peer_ni *
241 ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id)
242 {
243         struct ksock_peer_ni *peer_ni;
244
245         read_lock(&ksocknal_data.ksnd_global_lock);
246         peer_ni = ksocknal_find_peer_locked(ni, id);
247         if (peer_ni != NULL)                    /* +1 ref for caller? */
248                 ksocknal_peer_addref(peer_ni);
249         read_unlock(&ksocknal_data.ksnd_global_lock);
250
251         return (peer_ni);
252 }
253
254 static void
255 ksocknal_unlink_peer_locked(struct ksock_peer_ni *peer_ni)
256 {
257         int i;
258         struct ksock_interface *iface;
259
260         for (i = 0; i < peer_ni->ksnp_n_passive_ips; i++) {
261                 struct sockaddr_in sa = { .sin_family = AF_INET };
262                 LASSERT(i < LNET_INTERFACES_NUM);
263                 sa.sin_addr.s_addr = htonl(peer_ni->ksnp_passive_ips[i]);
264
265                 iface = ksocknal_ip2iface(peer_ni->ksnp_ni,
266                                           (struct sockaddr *)&sa);
267                 /*
268                  * All IPs in peer_ni->ksnp_passive_ips[] come from the
269                  * interface list, therefore the call must succeed.
270                  */
271                 LASSERT(iface != NULL);
272
273                 CDEBUG(D_NET, "peer_ni=%p iface=%p ksni_nroutes=%d\n",
274                        peer_ni, iface, iface->ksni_nroutes);
275                 iface->ksni_npeers--;
276         }
277
278         LASSERT(list_empty(&peer_ni->ksnp_conns));
279         LASSERT(list_empty(&peer_ni->ksnp_routes));
280         LASSERT(!peer_ni->ksnp_closing);
281         peer_ni->ksnp_closing = 1;
282         hlist_del(&peer_ni->ksnp_list);
283         /* lose peerlist's ref */
284         ksocknal_peer_decref(peer_ni);
285 }
286
287 static int
288 ksocknal_get_peer_info(struct lnet_ni *ni, int index,
289                        struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip,
290                        int *port, int *conn_count, int *share_count)
291 {
292         struct ksock_peer_ni *peer_ni;
293         struct ksock_route *route;
294         struct list_head *rtmp;
295         int i;
296         int j;
297         int rc = -ENOENT;
298
299         read_lock(&ksocknal_data.ksnd_global_lock);
300
301         hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) {
302
303                 if (peer_ni->ksnp_ni != ni)
304                         continue;
305
306                 if (peer_ni->ksnp_n_passive_ips == 0 &&
307                     list_empty(&peer_ni->ksnp_routes)) {
308                         if (index-- > 0)
309                                 continue;
310
311                         *id = peer_ni->ksnp_id;
312                         *myip = 0;
313                         *peer_ip = 0;
314                         *port = 0;
315                         *conn_count = 0;
316                         *share_count = 0;
317                         rc = 0;
318                         goto out;
319                 }
320
321                 for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++) {
322                         if (index-- > 0)
323                                 continue;
324
325                         *id = peer_ni->ksnp_id;
326                         *myip = peer_ni->ksnp_passive_ips[j];
327                         *peer_ip = 0;
328                         *port = 0;
329                         *conn_count = 0;
330                         *share_count = 0;
331                         rc = 0;
332                         goto out;
333                 }
334
335                 list_for_each(rtmp, &peer_ni->ksnp_routes) {
336                         if (index-- > 0)
337                                 continue;
338
339                         route = list_entry(rtmp, struct ksock_route,
340                                            ksnr_list);
341
342                         *id = peer_ni->ksnp_id;
343                         if (route->ksnr_addr.ss_family == AF_INET) {
344                                 struct sockaddr_in *sa =
345                                         (void *)&route->ksnr_addr;
346                                 rc = choose_ipv4_src(
347                                         myip,
348                                         route->ksnr_myiface,
349                                         ntohl(sa->sin_addr.s_addr),
350                                         ni->ni_net_ns);
351                                 *peer_ip = ntohl(sa->sin_addr.s_addr);
352                                 *port = ntohs(sa->sin_port);
353                         } else {
354                                 *myip = 0xFFFFFFFF;
355                                 *peer_ip = 0xFFFFFFFF;
356                                 *port = 0;
357                                 rc = -ENOTSUPP;
358                         }
359                         *conn_count = route->ksnr_conn_count;
360                         *share_count = route->ksnr_share_count;
361                         goto out;
362                 }
363         }
364 out:
365         read_unlock(&ksocknal_data.ksnd_global_lock);
366         return rc;
367 }
368
369 static void
370 ksocknal_associate_route_conn_locked(struct ksock_route *route,
371                                      struct ksock_conn *conn)
372 {
373         struct ksock_peer_ni *peer_ni = route->ksnr_peer;
374         int type = conn->ksnc_type;
375         struct ksock_interface *iface;
376         int conn_iface =
377                 ksocknal_ip2index((struct sockaddr *)&conn->ksnc_myaddr,
378                                   route->ksnr_peer->ksnp_ni);
379
380         conn->ksnc_route = route;
381         ksocknal_route_addref(route);
382
383         if (route->ksnr_myiface != conn_iface) {
384                 if (route->ksnr_myiface < 0) {
385                         /* route wasn't bound locally yet (the initial route) */
386                         CDEBUG(D_NET, "Binding %s %pIS to interface %d\n",
387                                libcfs_id2str(peer_ni->ksnp_id),
388                                &route->ksnr_addr,
389                                conn_iface);
390                 } else {
391                         CDEBUG(D_NET,
392                                "Rebinding %s %pIS from interface %d to %d\n",
393                                libcfs_id2str(peer_ni->ksnp_id),
394                                &route->ksnr_addr,
395                                route->ksnr_myiface,
396                                conn_iface);
397
398                         iface = ksocknal_index2iface(route->ksnr_peer->ksnp_ni,
399                                                      route->ksnr_myiface);
400                         if (iface)
401                                 iface->ksni_nroutes--;
402                 }
403                 route->ksnr_myiface = conn_iface;
404                 iface = ksocknal_index2iface(route->ksnr_peer->ksnp_ni,
405                                              route->ksnr_myiface);
406                 if (iface)
407                         iface->ksni_nroutes++;
408         }
409
410         route->ksnr_connected |= (1<<type);
411         route->ksnr_conn_count++;
412
413         /* Successful connection => further attempts can
414          * proceed immediately
415          */
416         route->ksnr_retry_interval = 0;
417 }
418
419 static void
420 ksocknal_add_route_locked(struct ksock_peer_ni *peer_ni, struct ksock_route *route)
421 {
422         struct list_head *tmp;
423         struct ksock_conn *conn;
424         struct ksock_route *route2;
425         struct ksock_net *net = peer_ni->ksnp_ni->ni_data;
426
427         LASSERT(!peer_ni->ksnp_closing);
428         LASSERT(route->ksnr_peer == NULL);
429         LASSERT(!route->ksnr_scheduled);
430         LASSERT(!route->ksnr_connecting);
431         LASSERT(route->ksnr_connected == 0);
432
433         /* LASSERT(unique) */
434         list_for_each(tmp, &peer_ni->ksnp_routes) {
435                 route2 = list_entry(tmp, struct ksock_route, ksnr_list);
436
437                 if (rpc_cmp_addr((struct sockaddr *)&route2->ksnr_addr,
438                                  (struct sockaddr *)&route->ksnr_addr)) {
439                         CERROR("Duplicate route %s %pI4h\n",
440                                libcfs_id2str(peer_ni->ksnp_id),
441                                &route->ksnr_addr);
442                         LBUG();
443                 }
444         }
445
446         route->ksnr_peer = peer_ni;
447         ksocknal_peer_addref(peer_ni);
448
449         /* set the route's interface to the current net's interface */
450         route->ksnr_myiface = net->ksnn_interface.ksni_index;
451         net->ksnn_interface.ksni_nroutes++;
452
453         /* peer_ni's routelist takes over my ref on 'route' */
454         list_add_tail(&route->ksnr_list, &peer_ni->ksnp_routes);
455
456         list_for_each(tmp, &peer_ni->ksnp_conns) {
457                 conn = list_entry(tmp, struct ksock_conn, ksnc_list);
458
459                 if (!rpc_cmp_addr((struct sockaddr *)&conn->ksnc_peeraddr,
460                                   (struct sockaddr *)&route->ksnr_addr))
461                         continue;
462
463                 ksocknal_associate_route_conn_locked(route, conn);
464                 /* keep going (typed routes) */
465         }
466 }
467
468 static void
469 ksocknal_del_route_locked(struct ksock_route *route)
470 {
471         struct ksock_peer_ni *peer_ni = route->ksnr_peer;
472         struct ksock_interface *iface;
473         struct ksock_conn *conn;
474         struct ksock_conn *cnxt;
475
476         LASSERT(!route->ksnr_deleted);
477
478         /* Close associated conns */
479         list_for_each_entry_safe(conn, cnxt, &peer_ni->ksnp_conns, ksnc_list) {
480                 if (conn->ksnc_route != route)
481                         continue;
482
483                 ksocknal_close_conn_locked(conn, 0);
484         }
485
486         if (route->ksnr_myiface >= 0) {
487                 iface = ksocknal_index2iface(route->ksnr_peer->ksnp_ni,
488                                              route->ksnr_myiface);
489                 if (iface)
490                         iface->ksni_nroutes--;
491         }
492
493         route->ksnr_deleted = 1;
494         list_del(&route->ksnr_list);
495         ksocknal_route_decref(route);           /* drop peer_ni's ref */
496
497         if (list_empty(&peer_ni->ksnp_routes) &&
498             list_empty(&peer_ni->ksnp_conns)) {
499                 /* I've just removed the last route to a peer_ni with no active
500                  * connections */
501                 ksocknal_unlink_peer_locked(peer_ni);
502         }
503 }
504
505 int
506 ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id,
507                   struct sockaddr *addr)
508 {
509         struct list_head *tmp;
510         struct ksock_peer_ni *peer_ni;
511         struct ksock_peer_ni *peer2;
512         struct ksock_route *route;
513         struct ksock_route *route2;
514
515         if (id.nid == LNET_NID_ANY ||
516             id.pid == LNET_PID_ANY)
517                 return (-EINVAL);
518
519         /* Have a brand new peer_ni ready... */
520         peer_ni = ksocknal_create_peer(ni, id);
521         if (IS_ERR(peer_ni))
522                 return PTR_ERR(peer_ni);
523
524         route = ksocknal_create_route(addr);
525         if (route == NULL) {
526                 ksocknal_peer_decref(peer_ni);
527                 return (-ENOMEM);
528         }
529
530         write_lock_bh(&ksocknal_data.ksnd_global_lock);
531
532         /* always called with a ref on ni, so shutdown can't have started */
533         LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers)
534                 >= 0);
535
536         peer2 = ksocknal_find_peer_locked(ni, id);
537         if (peer2 != NULL) {
538                 ksocknal_peer_decref(peer_ni);
539                 peer_ni = peer2;
540         } else {
541                 /* peer_ni table takes my ref on peer_ni */
542                 hash_add(ksocknal_data.ksnd_peers, &peer_ni->ksnp_list, id.nid);
543         }
544
545         route2 = NULL;
546         list_for_each(tmp, &peer_ni->ksnp_routes) {
547                 route2 = list_entry(tmp, struct ksock_route, ksnr_list);
548
549                 if (rpc_cmp_addr(addr, (struct sockaddr *)&route2->ksnr_addr))
550                         break;
551
552                 route2 = NULL;
553         }
554         if (route2 == NULL) {
555                 ksocknal_add_route_locked(peer_ni, route);
556                 route->ksnr_share_count++;
557         } else {
558                 ksocknal_route_decref(route);
559                 route2->ksnr_share_count++;
560         }
561
562         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
563
564         return 0;
565 }
566
567 static void
568 ksocknal_del_peer_locked(struct ksock_peer_ni *peer_ni, __u32 ip)
569 {
570         struct ksock_conn *conn;
571         struct ksock_conn *cnxt;
572         struct ksock_route *route;
573         struct ksock_route *rnxt;
574         int nshared;
575
576         LASSERT(!peer_ni->ksnp_closing);
577
578         /* Extra ref prevents peer_ni disappearing until I'm done with it */
579         ksocknal_peer_addref(peer_ni);
580
581         list_for_each_entry_safe(route, rnxt, &peer_ni->ksnp_routes,
582                                  ksnr_list) {
583                 /* no match */
584                 if (ip) {
585                         if (route->ksnr_addr.ss_family != AF_INET)
586                                 continue;
587                         if (((struct sockaddr_in *)&route->ksnr_addr)
588                                         ->sin_addr.s_addr != htonl(ip))
589                                 continue;
590                 }
591
592                 route->ksnr_share_count = 0;
593                 /* This deletes associated conns too */
594                 ksocknal_del_route_locked(route);
595         }
596
597         nshared = 0;
598         list_for_each_entry(route, &peer_ni->ksnp_routes, ksnr_list)
599                 nshared += route->ksnr_share_count;
600
601         if (nshared == 0) {
602                 /* remove everything else if there are no explicit entries
603                  * left
604                  */
605                 list_for_each_entry_safe(route, rnxt, &peer_ni->ksnp_routes,
606                                          ksnr_list) {
607                         /* we should only be removing auto-entries */
608                         LASSERT(route->ksnr_share_count == 0);
609                         ksocknal_del_route_locked(route);
610                 }
611
612                 list_for_each_entry_safe(conn, cnxt, &peer_ni->ksnp_conns,
613                                          ksnc_list)
614                         ksocknal_close_conn_locked(conn, 0);
615         }
616
617         ksocknal_peer_decref(peer_ni);
618         /* NB peer_ni unlinks itself when last conn/route is removed */
619 }
620
621 static int
622 ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip)
623 {
624         LIST_HEAD(zombies);
625         struct hlist_node *pnxt;
626         struct ksock_peer_ni *peer_ni;
627         int lo;
628         int hi;
629         int i;
630         int rc = -ENOENT;
631
632         write_lock_bh(&ksocknal_data.ksnd_global_lock);
633
634         if (id.nid != LNET_NID_ANY) {
635                 lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers));
636                 hi = lo;
637         } else {
638                 lo = 0;
639                 hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1;
640         }
641
642         for (i = lo; i <= hi; i++) {
643                 hlist_for_each_entry_safe(peer_ni, pnxt,
644                                           &ksocknal_data.ksnd_peers[i],
645                                           ksnp_list) {
646                         if (peer_ni->ksnp_ni != ni)
647                                 continue;
648
649                         if (!((id.nid == LNET_NID_ANY ||
650                                peer_ni->ksnp_id.nid == id.nid) &&
651                               (id.pid == LNET_PID_ANY ||
652                                peer_ni->ksnp_id.pid == id.pid)))
653                                 continue;
654
655                         ksocknal_peer_addref(peer_ni);  /* a ref for me... */
656
657                         ksocknal_del_peer_locked(peer_ni, ip);
658
659                         if (peer_ni->ksnp_closing &&
660                             !list_empty(&peer_ni->ksnp_tx_queue)) {
661                                 LASSERT(list_empty(&peer_ni->ksnp_conns));
662                                 LASSERT(list_empty(&peer_ni->ksnp_routes));
663
664                                 list_splice_init(&peer_ni->ksnp_tx_queue,
665                                                  &zombies);
666                         }
667
668                         ksocknal_peer_decref(peer_ni);  /* ...till here */
669
670                         rc = 0;                         /* matched! */
671                 }
672         }
673
674         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
675
676         ksocknal_txlist_done(ni, &zombies, -ENETDOWN);
677
678         return rc;
679 }
680
681 static struct ksock_conn *
682 ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index)
683 {
684         struct ksock_peer_ni *peer_ni;
685         struct ksock_conn *conn;
686         struct list_head *ctmp;
687         int i;
688
689         read_lock(&ksocknal_data.ksnd_global_lock);
690
691         hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) {
692                 LASSERT(!peer_ni->ksnp_closing);
693
694                 if (peer_ni->ksnp_ni != ni)
695                         continue;
696
697                 list_for_each(ctmp, &peer_ni->ksnp_conns) {
698                         if (index-- > 0)
699                                 continue;
700
701                         conn = list_entry(ctmp, struct ksock_conn,
702                                           ksnc_list);
703                         ksocknal_conn_addref(conn);
704                         read_unlock(&ksocknal_data.ksnd_global_lock);
705                         return conn;
706                 }
707         }
708
709         read_unlock(&ksocknal_data.ksnd_global_lock);
710         return NULL;
711 }
712
713 static struct ksock_sched *
714 ksocknal_choose_scheduler_locked(unsigned int cpt)
715 {
716         struct ksock_sched *sched = ksocknal_data.ksnd_schedulers[cpt];
717         int i;
718
719         if (sched->kss_nthreads == 0) {
720                 cfs_percpt_for_each(sched, i, ksocknal_data.ksnd_schedulers) {
721                         if (sched->kss_nthreads > 0) {
722                                 CDEBUG(D_NET, "scheduler[%d] has no threads. selected scheduler[%d]\n",
723                                        cpt, sched->kss_cpt);
724                                 return sched;
725                         }
726                 }
727                 return NULL;
728         }
729
730         return sched;
731 }
732
733 int
734 ksocknal_accept(struct lnet_ni *ni, struct socket *sock)
735 {
736         struct ksock_connreq *cr;
737         int rc;
738         struct sockaddr_storage peer;
739
740         rc = lnet_sock_getaddr(sock, true, &peer);
741         LASSERT(rc == 0);               /* we succeeded before */
742
743         LIBCFS_ALLOC(cr, sizeof(*cr));
744         if (cr == NULL) {
745                 LCONSOLE_ERROR_MSG(0x12f,
746                                    "Dropping connection request from %pIS: memory exhausted\n",
747                                    &peer);
748                 return -ENOMEM;
749         }
750
751         lnet_ni_addref(ni);
752         cr->ksncr_ni   = ni;
753         cr->ksncr_sock = sock;
754
755         spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
756
757         list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
758         wake_up(&ksocknal_data.ksnd_connd_waitq);
759
760         spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
761         return 0;
762 }
763
764 static int
765 ksocknal_connecting(struct ksock_peer_ni *peer_ni, struct sockaddr *sa)
766 {
767         struct ksock_route *route;
768
769         list_for_each_entry(route, &peer_ni->ksnp_routes, ksnr_list) {
770                 if (rpc_cmp_addr((struct sockaddr *)&route->ksnr_addr, sa))
771                         return route->ksnr_connecting;
772         }
773         return 0;
774 }
775
776 int
777 ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
778                      struct socket *sock, int type)
779 {
780         rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
781         LIST_HEAD(zombies);
782         struct lnet_process_id peerid;
783         struct list_head *tmp;
784         u64 incarnation;
785         struct ksock_conn *conn;
786         struct ksock_conn *conn2;
787         struct ksock_peer_ni *peer_ni = NULL;
788         struct ksock_peer_ni *peer2;
789         struct ksock_sched *sched;
790         struct ksock_hello_msg *hello;
791         int cpt;
792         struct ksock_tx *tx;
793         struct ksock_tx *txtmp;
794         int rc;
795         int rc2;
796         int active;
797         char *warn = NULL;
798
799         active = (route != NULL);
800
801         LASSERT (active == (type != SOCKLND_CONN_NONE));
802
803         LIBCFS_ALLOC(conn, sizeof(*conn));
804         if (conn == NULL) {
805                 rc = -ENOMEM;
806                 goto failed_0;
807         }
808
809         conn->ksnc_peer = NULL;
810         conn->ksnc_route = NULL;
811         conn->ksnc_sock = sock;
812         /* 2 ref, 1 for conn, another extra ref prevents socket
813          * being closed before establishment of connection */
814         refcount_set(&conn->ksnc_sock_refcount, 2);
815         conn->ksnc_type = type;
816         ksocknal_lib_save_callback(sock, conn);
817         refcount_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
818
819         conn->ksnc_rx_ready = 0;
820         conn->ksnc_rx_scheduled = 0;
821
822         INIT_LIST_HEAD(&conn->ksnc_tx_queue);
823         conn->ksnc_tx_ready = 0;
824         conn->ksnc_tx_scheduled = 0;
825         conn->ksnc_tx_carrier = NULL;
826         atomic_set (&conn->ksnc_tx_nob, 0);
827
828         LIBCFS_ALLOC(hello, offsetof(struct ksock_hello_msg,
829                                      kshm_ips[LNET_INTERFACES_NUM]));
830         if (hello == NULL) {
831                 rc = -ENOMEM;
832                 goto failed_1;
833         }
834
835         /* stash conn's local and remote addrs */
836         rc = ksocknal_lib_get_conn_addrs (conn);
837         if (rc != 0)
838                 goto failed_1;
839
840         /* Find out/confirm peer_ni's NID and connection type and get the
841          * vector of interfaces she's willing to let me connect to.
842          * Passive connections use the listener timeout since the peer_ni sends
843          * eagerly */
844
845         if (active) {
846                 peer_ni = route->ksnr_peer;
847                 LASSERT(ni == peer_ni->ksnp_ni);
848
849                 /* Active connection sends HELLO eagerly */
850                 hello->kshm_nips = 0;
851                 peerid = peer_ni->ksnp_id;
852
853                 write_lock_bh(global_lock);
854                 conn->ksnc_proto = peer_ni->ksnp_proto;
855                 write_unlock_bh(global_lock);
856
857                 if (conn->ksnc_proto == NULL) {
858                          conn->ksnc_proto = &ksocknal_protocol_v3x;
859 #if SOCKNAL_VERSION_DEBUG
860                          if (*ksocknal_tunables.ksnd_protocol == 2)
861                                  conn->ksnc_proto = &ksocknal_protocol_v2x;
862                          else if (*ksocknal_tunables.ksnd_protocol == 1)
863                                  conn->ksnc_proto = &ksocknal_protocol_v1x;
864 #endif
865                 }
866
867                 rc = ksocknal_send_hello (ni, conn, peerid.nid, hello);
868                 if (rc != 0)
869                         goto failed_1;
870         } else {
871                 peerid.nid = LNET_NID_ANY;
872                 peerid.pid = LNET_PID_ANY;
873
874                 /* Passive, get protocol from peer_ni */
875                 conn->ksnc_proto = NULL;
876         }
877
878         rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation);
879         if (rc < 0)
880                 goto failed_1;
881
882         LASSERT(rc == 0 || active);
883         LASSERT(conn->ksnc_proto != NULL);
884         LASSERT(peerid.nid != LNET_NID_ANY);
885
886         cpt = lnet_cpt_of_nid(peerid.nid, ni);
887
888         if (active) {
889                 ksocknal_peer_addref(peer_ni);
890                 write_lock_bh(global_lock);
891         } else {
892                 peer_ni = ksocknal_create_peer(ni, peerid);
893                 if (IS_ERR(peer_ni)) {
894                         rc = PTR_ERR(peer_ni);
895                         goto failed_1;
896                 }
897
898                 write_lock_bh(global_lock);
899
900                 /* called with a ref on ni, so shutdown can't have started */
901                 LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers) >= 0);
902
903                 peer2 = ksocknal_find_peer_locked(ni, peerid);
904                 if (peer2 == NULL) {
905                         /* NB this puts an "empty" peer_ni in the peer_ni
906                          * table (which takes my ref) */
907                         hash_add(ksocknal_data.ksnd_peers,
908                                  &peer_ni->ksnp_list, peerid.nid);
909                 } else {
910                         ksocknal_peer_decref(peer_ni);
911                         peer_ni = peer2;
912                 }
913
914                 /* +1 ref for me */
915                 ksocknal_peer_addref(peer_ni);
916                 peer_ni->ksnp_accepting++;
917
918                 /* Am I already connecting to this guy?  Resolve in
919                  * favour of higher NID...
920                  */
921                 if (peerid.nid < ni->ni_nid &&
922                     ksocknal_connecting(peer_ni, ((struct sockaddr *)
923                                                   &conn->ksnc_peeraddr))) {
924                         rc = EALREADY;
925                         warn = "connection race resolution";
926                         goto failed_2;
927                 }
928         }
929
930         if (peer_ni->ksnp_closing ||
931             (active && route->ksnr_deleted)) {
932                 /* peer_ni/route got closed under me */
933                 rc = -ESTALE;
934                 warn = "peer_ni/route removed";
935                 goto failed_2;
936         }
937
938         if (peer_ni->ksnp_proto == NULL) {
939                 /* Never connected before.
940                  * NB recv_hello may have returned EPROTO to signal my peer_ni
941                  * wants a different protocol than the one I asked for.
942                  */
943                 LASSERT(list_empty(&peer_ni->ksnp_conns));
944
945                 peer_ni->ksnp_proto = conn->ksnc_proto;
946                 peer_ni->ksnp_incarnation = incarnation;
947         }
948
949         if (peer_ni->ksnp_proto != conn->ksnc_proto ||
950             peer_ni->ksnp_incarnation != incarnation) {
951                 /* peer_ni rebooted or I've got the wrong protocol version */
952                 ksocknal_close_peer_conns_locked(peer_ni, NULL, 0);
953
954                 peer_ni->ksnp_proto = NULL;
955                 rc = ESTALE;
956                 warn = peer_ni->ksnp_incarnation != incarnation ?
957                         "peer_ni rebooted" :
958                         "wrong proto version";
959                 goto failed_2;
960         }
961
962         switch (rc) {
963         default:
964                 LBUG();
965         case 0:
966                 break;
967         case EALREADY:
968                 warn = "lost conn race";
969                 goto failed_2;
970         case EPROTO:
971                 warn = "retry with different protocol version";
972                 goto failed_2;
973         }
974
975         /* Refuse to duplicate an existing connection, unless this is a
976          * loopback connection */
977         if (!rpc_cmp_addr((struct sockaddr *)&conn->ksnc_peeraddr,
978                           (struct sockaddr *)&conn->ksnc_myaddr)) {
979                 list_for_each(tmp, &peer_ni->ksnp_conns) {
980                         conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
981
982                         if (!rpc_cmp_addr(
983                                     (struct sockaddr *)&conn2->ksnc_peeraddr,
984                                     (struct sockaddr *)&conn->ksnc_peeraddr) ||
985                             !rpc_cmp_addr(
986                                     (struct sockaddr *)&conn2->ksnc_myaddr,
987                                     (struct sockaddr *)&conn->ksnc_myaddr) ||
988                             conn2->ksnc_type != conn->ksnc_type)
989                                 continue;
990
991                         /* Reply on a passive connection attempt so the peer_ni
992                          * realises we're connected. */
993                         LASSERT (rc == 0);
994                         if (!active)
995                                 rc = EALREADY;
996
997                         warn = "duplicate";
998                         goto failed_2;
999                 }
1000         }
1001
1002         /* If the connection created by this route didn't bind to the IP
1003          * address the route connected to, the connection/route matching
1004          * code below probably isn't going to work. */
1005         if (active &&
1006             !rpc_cmp_addr((struct sockaddr *)&route->ksnr_addr,
1007                           (struct sockaddr *)&conn->ksnc_peeraddr)) {
1008                 CERROR("Route %s %pIS connected to %pIS\n",
1009                        libcfs_id2str(peer_ni->ksnp_id),
1010                        &route->ksnr_addr,
1011                        &conn->ksnc_peeraddr);
1012         }
1013
1014         /* Search for a route corresponding to the new connection and
1015          * create an association.  This allows incoming connections created
1016          * by routes in my peer_ni to match my own route entries so I don't
1017          * continually create duplicate routes. */
1018         list_for_each(tmp, &peer_ni->ksnp_routes) {
1019                 route = list_entry(tmp, struct ksock_route, ksnr_list);
1020
1021                 if (!rpc_cmp_addr((struct sockaddr *)&route->ksnr_addr,
1022                                   (struct sockaddr *)&conn->ksnc_peeraddr))
1023                         continue;
1024
1025                 ksocknal_associate_route_conn_locked(route, conn);
1026                 break;
1027         }
1028
1029         conn->ksnc_peer = peer_ni;                 /* conn takes my ref on peer_ni */
1030         peer_ni->ksnp_last_alive = ktime_get_seconds();
1031         peer_ni->ksnp_send_keepalive = 0;
1032         peer_ni->ksnp_error = 0;
1033
1034         sched = ksocknal_choose_scheduler_locked(cpt);
1035         if (!sched) {
1036                 CERROR("no schedulers available. node is unhealthy\n");
1037                 goto failed_2;
1038         }
1039         /*
1040          * The cpt might have changed if we ended up selecting a non cpt
1041          * native scheduler. So use the scheduler's cpt instead.
1042          */
1043         cpt = sched->kss_cpt;
1044         sched->kss_nconns++;
1045         conn->ksnc_scheduler = sched;
1046
1047         conn->ksnc_tx_last_post = ktime_get_seconds();
1048         /* Set the deadline for the outgoing HELLO to drain */
1049         conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
1050         conn->ksnc_tx_deadline = ktime_get_seconds() +
1051                                  ksocknal_timeout();
1052         smp_mb();   /* order with adding to peer_ni's conn list */
1053
1054         list_add(&conn->ksnc_list, &peer_ni->ksnp_conns);
1055         ksocknal_conn_addref(conn);
1056
1057         ksocknal_new_packet(conn, 0);
1058
1059         conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
1060
1061         /* Take packets blocking for this connection. */
1062         list_for_each_entry_safe(tx, txtmp, &peer_ni->ksnp_tx_queue, tx_list) {
1063                 if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) ==
1064                     SOCKNAL_MATCH_NO)
1065                         continue;
1066
1067                 list_del(&tx->tx_list);
1068                 ksocknal_queue_tx_locked(tx, conn);
1069         }
1070
1071         write_unlock_bh(global_lock);
1072
1073         /* We've now got a new connection.  Any errors from here on are just
1074          * like "normal" comms errors and we close the connection normally.
1075          * NB (a) we still have to send the reply HELLO for passive
1076          *        connections,
1077          *    (b) normal I/O on the conn is blocked until I setup and call the
1078          *        socket callbacks.
1079          */
1080
1081         CDEBUG(D_NET, "New conn %s p %d.x %pIS -> %pISp"
1082                " incarnation:%lld sched[%d]\n",
1083                libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
1084                &conn->ksnc_myaddr, &conn->ksnc_peeraddr,
1085                incarnation, cpt);
1086
1087         if (!active) {
1088                 hello->kshm_nips = 0;
1089                 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1090         }
1091
1092         LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg,
1093                                     kshm_ips[LNET_INTERFACES_NUM]));
1094
1095         /* setup the socket AFTER I've received hello (it disables
1096          * SO_LINGER).  I might call back to the acceptor who may want
1097          * to send a protocol version response and then close the
1098          * socket; this ensures the socket only tears down after the
1099          * response has been sent. */
1100         if (rc == 0)
1101                 rc = ksocknal_lib_setup_sock(sock);
1102
1103         write_lock_bh(global_lock);
1104
1105         /* NB my callbacks block while I hold ksnd_global_lock */
1106         ksocknal_lib_set_callback(sock, conn);
1107
1108         if (!active)
1109                 peer_ni->ksnp_accepting--;
1110
1111         write_unlock_bh(global_lock);
1112
1113         if (rc != 0) {
1114                 write_lock_bh(global_lock);
1115                 if (!conn->ksnc_closing) {
1116                         /* could be closed by another thread */
1117                         ksocknal_close_conn_locked(conn, rc);
1118                 }
1119                 write_unlock_bh(global_lock);
1120         } else if (ksocknal_connsock_addref(conn) == 0) {
1121                 /* Allow I/O to proceed. */
1122                 ksocknal_read_callback(conn);
1123                 ksocknal_write_callback(conn);
1124                 ksocknal_connsock_decref(conn);
1125         }
1126
1127         ksocknal_connsock_decref(conn);
1128         ksocknal_conn_decref(conn);
1129         return rc;
1130
1131 failed_2:
1132         if (!peer_ni->ksnp_closing &&
1133             list_empty(&peer_ni->ksnp_conns) &&
1134             list_empty(&peer_ni->ksnp_routes)) {
1135                 list_splice_init(&peer_ni->ksnp_tx_queue, &zombies);
1136                 ksocknal_unlink_peer_locked(peer_ni);
1137         }
1138
1139         write_unlock_bh(global_lock);
1140
1141         if (warn != NULL) {
1142                 if (rc < 0)
1143                         CERROR("Not creating conn %s type %d: %s\n",
1144                                libcfs_id2str(peerid), conn->ksnc_type, warn);
1145                 else
1146                         CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
1147                               libcfs_id2str(peerid), conn->ksnc_type, warn);
1148         }
1149
1150         if (!active) {
1151                 if (rc > 0) {
1152                         /* Request retry by replying with CONN_NONE
1153                          * ksnc_proto has been set already */
1154                         conn->ksnc_type = SOCKLND_CONN_NONE;
1155                         hello->kshm_nips = 0;
1156                         ksocknal_send_hello(ni, conn, peerid.nid, hello);
1157                 }
1158
1159                 write_lock_bh(global_lock);
1160                 peer_ni->ksnp_accepting--;
1161                 write_unlock_bh(global_lock);
1162         }
1163
1164         /*
1165          * If we get here without an error code, just use -EALREADY.
1166          * Depending on how we got here, the error may be positive
1167          * or negative. Normalize the value for ksocknal_txlist_done().
1168          */
1169         rc2 = (rc == 0 ? -EALREADY : (rc > 0 ? -rc : rc));
1170         ksocknal_txlist_done(ni, &zombies, rc2);
1171         ksocknal_peer_decref(peer_ni);
1172
1173 failed_1:
1174         if (hello != NULL)
1175                 LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg,
1176                                             kshm_ips[LNET_INTERFACES_NUM]));
1177
1178         LIBCFS_FREE(conn, sizeof(*conn));
1179
1180 failed_0:
1181         sock_release(sock);
1182         return rc;
1183 }
1184
1185 void
1186 ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
1187 {
1188         /* This just does the immmediate housekeeping, and queues the
1189          * connection for the reaper to terminate.
1190          * Caller holds ksnd_global_lock exclusively in irq context */
1191         struct ksock_peer_ni *peer_ni = conn->ksnc_peer;
1192         struct ksock_route *route;
1193         struct ksock_conn *conn2;
1194         struct list_head *tmp;
1195
1196         LASSERT(peer_ni->ksnp_error == 0);
1197         LASSERT(!conn->ksnc_closing);
1198         conn->ksnc_closing = 1;
1199
1200         /* ksnd_deathrow_conns takes over peer_ni's ref */
1201         list_del(&conn->ksnc_list);
1202
1203         route = conn->ksnc_route;
1204         if (route != NULL) {
1205                 /* dissociate conn from route... */
1206                 LASSERT(!route->ksnr_deleted);
1207                 LASSERT((route->ksnr_connected & BIT(conn->ksnc_type)) != 0);
1208
1209                 conn2 = NULL;
1210                 list_for_each(tmp, &peer_ni->ksnp_conns) {
1211                         conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
1212
1213                         if (conn2->ksnc_route == route &&
1214                             conn2->ksnc_type == conn->ksnc_type)
1215                                 break;
1216
1217                         conn2 = NULL;
1218                 }
1219                 if (conn2 == NULL)
1220                         route->ksnr_connected &= ~BIT(conn->ksnc_type);
1221
1222                 conn->ksnc_route = NULL;
1223
1224                 ksocknal_route_decref(route);   /* drop conn's ref on route */
1225         }
1226
1227         if (list_empty(&peer_ni->ksnp_conns)) {
1228                 /* No more connections to this peer_ni */
1229
1230                 if (!list_empty(&peer_ni->ksnp_tx_queue)) {
1231                         struct ksock_tx *tx;
1232
1233                         LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
1234
1235                         /* throw them to the last connection...,
1236                          * these TXs will be send to /dev/null by scheduler */
1237                         list_for_each_entry(tx, &peer_ni->ksnp_tx_queue,
1238                                             tx_list)
1239                                 ksocknal_tx_prep(conn, tx);
1240
1241                         spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
1242                         list_splice_init(&peer_ni->ksnp_tx_queue,
1243                                          &conn->ksnc_tx_queue);
1244                         spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
1245                 }
1246
1247                 /* renegotiate protocol version */
1248                 peer_ni->ksnp_proto = NULL;
1249                 /* stash last conn close reason */
1250                 peer_ni->ksnp_error = error;
1251
1252                 if (list_empty(&peer_ni->ksnp_routes)) {
1253                         /* I've just closed last conn belonging to a
1254                          * peer_ni with no routes to it */
1255                         ksocknal_unlink_peer_locked(peer_ni);
1256                 }
1257         }
1258
1259         spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1260
1261         list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns);
1262         wake_up(&ksocknal_data.ksnd_reaper_waitq);
1263
1264         spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1265 }
1266
1267 void
1268 ksocknal_peer_failed(struct ksock_peer_ni *peer_ni)
1269 {
1270         bool notify = false;
1271         time64_t last_alive = 0;
1272
1273         /* There has been a connection failure or comms error; but I'll only
1274          * tell LNET I think the peer_ni is dead if it's to another kernel and
1275          * there are no connections or connection attempts in existence. */
1276
1277         read_lock(&ksocknal_data.ksnd_global_lock);
1278
1279         if ((peer_ni->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
1280              list_empty(&peer_ni->ksnp_conns) &&
1281              peer_ni->ksnp_accepting == 0 &&
1282              ksocknal_find_connecting_route_locked(peer_ni) == NULL) {
1283                 notify = true;
1284                 last_alive = peer_ni->ksnp_last_alive;
1285         }
1286
1287         read_unlock(&ksocknal_data.ksnd_global_lock);
1288
1289         if (notify)
1290                 lnet_notify(peer_ni->ksnp_ni, peer_ni->ksnp_id.nid,
1291                             false, false, last_alive);
1292 }
1293
1294 void
1295 ksocknal_finalize_zcreq(struct ksock_conn *conn)
1296 {
1297         struct ksock_peer_ni *peer_ni = conn->ksnc_peer;
1298         struct ksock_tx *tx;
1299         struct ksock_tx *tmp;
1300         LIST_HEAD(zlist);
1301
1302         /* NB safe to finalize TXs because closing of socket will
1303          * abort all buffered data */
1304         LASSERT(conn->ksnc_sock == NULL);
1305
1306         spin_lock(&peer_ni->ksnp_lock);
1307
1308         list_for_each_entry_safe(tx, tmp, &peer_ni->ksnp_zc_req_list, tx_zc_list) {
1309                 if (tx->tx_conn != conn)
1310                         continue;
1311
1312                 LASSERT(tx->tx_msg.ksm_zc_cookies[0] != 0);
1313
1314                 tx->tx_msg.ksm_zc_cookies[0] = 0;
1315                 tx->tx_zc_aborted = 1;  /* mark it as not-acked */
1316                 list_move(&tx->tx_zc_list, &zlist);
1317         }
1318
1319         spin_unlock(&peer_ni->ksnp_lock);
1320
1321         while (!list_empty(&zlist)) {
1322                 tx = list_entry(zlist.next, struct ksock_tx, tx_zc_list);
1323
1324                 list_del(&tx->tx_zc_list);
1325                 ksocknal_tx_decref(tx);
1326         }
1327 }
1328
1329 void
1330 ksocknal_terminate_conn(struct ksock_conn *conn)
1331 {
1332         /* This gets called by the reaper (guaranteed thread context) to
1333          * disengage the socket from its callbacks and close it.
1334          * ksnc_refcount will eventually hit zero, and then the reaper will
1335          * destroy it.
1336          */
1337         struct ksock_peer_ni *peer_ni = conn->ksnc_peer;
1338         struct ksock_sched *sched = conn->ksnc_scheduler;
1339         bool failed = false;
1340
1341         LASSERT(conn->ksnc_closing);
1342
1343         /* wake up the scheduler to "send" all remaining packets to /dev/null */
1344         spin_lock_bh(&sched->kss_lock);
1345
1346         /* a closing conn is always ready to tx */
1347         conn->ksnc_tx_ready = 1;
1348
1349         if (!conn->ksnc_tx_scheduled &&
1350             !list_empty(&conn->ksnc_tx_queue)) {
1351                 list_add_tail(&conn->ksnc_tx_list,
1352                               &sched->kss_tx_conns);
1353                 conn->ksnc_tx_scheduled = 1;
1354                 /* extra ref for scheduler */
1355                 ksocknal_conn_addref(conn);
1356
1357                 wake_up (&sched->kss_waitq);
1358         }
1359
1360         spin_unlock_bh(&sched->kss_lock);
1361
1362         /* serialise with callbacks */
1363         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1364
1365         ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
1366
1367         /* OK, so this conn may not be completely disengaged from its
1368          * scheduler yet, but it _has_ committed to terminate...
1369          */
1370         conn->ksnc_scheduler->kss_nconns--;
1371
1372         if (peer_ni->ksnp_error != 0) {
1373                 /* peer_ni's last conn closed in error */
1374                 LASSERT(list_empty(&peer_ni->ksnp_conns));
1375                 failed = true;
1376                 peer_ni->ksnp_error = 0;     /* avoid multiple notifications */
1377         }
1378
1379         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1380
1381         if (failed)
1382                 ksocknal_peer_failed(peer_ni);
1383
1384         /* The socket is closed on the final put; either here, or in
1385          * ksocknal_{send,recv}msg().  Since we set up the linger2 option
1386          * when the connection was established, this will close the socket
1387          * immediately, aborting anything buffered in it. Any hung
1388          * zero-copy transmits will therefore complete in finite time.
1389          */
1390         ksocknal_connsock_decref(conn);
1391 }
1392
1393 void
1394 ksocknal_queue_zombie_conn(struct ksock_conn *conn)
1395 {
1396         /* Queue the conn for the reaper to destroy */
1397         LASSERT(refcount_read(&conn->ksnc_conn_refcount) == 0);
1398         spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1399
1400         list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
1401         wake_up(&ksocknal_data.ksnd_reaper_waitq);
1402
1403         spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1404 }
1405
1406 void
1407 ksocknal_destroy_conn(struct ksock_conn *conn)
1408 {
1409         time64_t last_rcv;
1410
1411         /* Final coup-de-grace of the reaper */
1412         CDEBUG (D_NET, "connection %p\n", conn);
1413
1414         LASSERT(refcount_read(&conn->ksnc_conn_refcount) == 0);
1415         LASSERT(refcount_read(&conn->ksnc_sock_refcount) == 0);
1416         LASSERT (conn->ksnc_sock == NULL);
1417         LASSERT (conn->ksnc_route == NULL);
1418         LASSERT (!conn->ksnc_tx_scheduled);
1419         LASSERT (!conn->ksnc_rx_scheduled);
1420         LASSERT(list_empty(&conn->ksnc_tx_queue));
1421
1422         /* complete current receive if any */
1423         switch (conn->ksnc_rx_state) {
1424         case SOCKNAL_RX_LNET_PAYLOAD:
1425                 last_rcv = conn->ksnc_rx_deadline -
1426                            ksocknal_timeout();
1427                 CERROR("Completing partial receive from %s[%d], ip %pISp, with error, wanted: %d, left: %d, last alive is %lld secs ago\n",
1428                        libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
1429                        &conn->ksnc_peeraddr,
1430                        conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
1431                        ktime_get_seconds() - last_rcv);
1432                 if (conn->ksnc_lnet_msg)
1433                         conn->ksnc_lnet_msg->msg_health_status =
1434                                 LNET_MSG_STATUS_REMOTE_ERROR;
1435                 lnet_finalize(conn->ksnc_lnet_msg, -EIO);
1436                 break;
1437         case SOCKNAL_RX_LNET_HEADER:
1438                 if (conn->ksnc_rx_started)
1439                         CERROR("Incomplete receive of lnet header from %s, ip %pISp, with error, protocol: %d.x.\n",
1440                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1441                                &conn->ksnc_peeraddr,
1442                                conn->ksnc_proto->pro_version);
1443                 break;
1444         case SOCKNAL_RX_KSM_HEADER:
1445                 if (conn->ksnc_rx_started)
1446                         CERROR("Incomplete receive of ksock message from %s, ip %pISp, with error, protocol: %d.x.\n",
1447                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1448                                &conn->ksnc_peeraddr,
1449                                conn->ksnc_proto->pro_version);
1450                 break;
1451         case SOCKNAL_RX_SLOP:
1452                 if (conn->ksnc_rx_started)
1453                         CERROR("Incomplete receive of slops from %s, ip %pISp, with error\n",
1454                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1455                                &conn->ksnc_peeraddr);
1456                break;
1457         default:
1458                 LBUG ();
1459                 break;
1460         }
1461
1462         ksocknal_peer_decref(conn->ksnc_peer);
1463
1464         LIBCFS_FREE (conn, sizeof (*conn));
1465 }
1466
1467 int
1468 ksocknal_close_peer_conns_locked(struct ksock_peer_ni *peer_ni,
1469                                  struct sockaddr *addr, int why)
1470 {
1471         struct ksock_conn *conn;
1472         struct ksock_conn *cnxt;
1473         int count = 0;
1474
1475         list_for_each_entry_safe(conn, cnxt, &peer_ni->ksnp_conns, ksnc_list) {
1476                 if (!addr ||
1477                     rpc_cmp_addr(addr,
1478                                  (struct sockaddr *)&conn->ksnc_peeraddr)) {
1479                         count++;
1480                         ksocknal_close_conn_locked(conn, why);
1481                 }
1482         }
1483
1484         return count;
1485 }
1486
1487 int
1488 ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why)
1489 {
1490         struct ksock_peer_ni *peer_ni = conn->ksnc_peer;
1491         int count;
1492
1493         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1494
1495         count = ksocknal_close_peer_conns_locked(
1496                 peer_ni, (struct sockaddr *)&conn->ksnc_peeraddr, why);
1497
1498         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1499
1500         return count;
1501 }
1502
1503 int
1504 ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr)
1505 {
1506         struct ksock_peer_ni *peer_ni;
1507         struct hlist_node *pnxt;
1508         int lo;
1509         int hi;
1510         int i;
1511         int count = 0;
1512         struct sockaddr_in sa = {.sin_family = AF_INET};
1513
1514         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1515
1516         if (id.nid != LNET_NID_ANY) {
1517                 lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers));
1518                 hi = lo;
1519         } else {
1520                 lo = 0;
1521                 hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1;
1522         }
1523
1524         sa.sin_addr.s_addr = htonl(ipaddr);
1525         for (i = lo; i <= hi; i++) {
1526                 hlist_for_each_entry_safe(peer_ni, pnxt,
1527                                           &ksocknal_data.ksnd_peers[i],
1528                                           ksnp_list) {
1529
1530                         if (!((id.nid == LNET_NID_ANY ||
1531                                id.nid == peer_ni->ksnp_id.nid) &&
1532                               (id.pid == LNET_PID_ANY ||
1533                                id.pid == peer_ni->ksnp_id.pid)))
1534                                 continue;
1535
1536                         count += ksocknal_close_peer_conns_locked(
1537                                 peer_ni,
1538                                 ipaddr ? (struct sockaddr *)&sa : NULL, 0);
1539                 }
1540         }
1541
1542         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1543
1544         /* wildcards always succeed */
1545         if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0)
1546                 return 0;
1547
1548         return (count == 0 ? -ENOENT : 0);
1549 }
1550
1551 void
1552 ksocknal_notify_gw_down(lnet_nid_t gw_nid)
1553 {
1554         /* The router is telling me she's been notified of a change in
1555          * gateway state....
1556          */
1557         struct lnet_process_id id = {
1558                 .nid    = gw_nid,
1559                 .pid    = LNET_PID_ANY,
1560         };
1561
1562         CDEBUG(D_NET, "gw %s down\n", libcfs_nid2str(gw_nid));
1563
1564         /* If the gateway crashed, close all open connections... */
1565         ksocknal_close_matching_conns(id, 0);
1566         return;
1567
1568         /* We can only establish new connections
1569          * if we have autroutes, and these connect on demand. */
1570 }
1571
1572 static void
1573 ksocknal_push_peer(struct ksock_peer_ni *peer_ni)
1574 {
1575         int index;
1576         int i;
1577         struct list_head *tmp;
1578         struct ksock_conn *conn;
1579
1580         for (index = 0; ; index++) {
1581                 read_lock(&ksocknal_data.ksnd_global_lock);
1582
1583                 i = 0;
1584                 conn = NULL;
1585
1586                 list_for_each(tmp, &peer_ni->ksnp_conns) {
1587                         if (i++ == index) {
1588                                 conn = list_entry(tmp, struct ksock_conn,
1589                                                   ksnc_list);
1590                                 ksocknal_conn_addref(conn);
1591                                 break;
1592                         }
1593                 }
1594
1595                 read_unlock(&ksocknal_data.ksnd_global_lock);
1596
1597                 if (conn == NULL)
1598                         break;
1599
1600                 ksocknal_lib_push_conn (conn);
1601                 ksocknal_conn_decref(conn);
1602         }
1603 }
1604
1605 static int
1606 ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
1607 {
1608         int lo;
1609         int hi;
1610         int bkt;
1611         int rc = -ENOENT;
1612
1613         if (id.nid != LNET_NID_ANY) {
1614                 lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers));
1615                 hi = lo;
1616         } else {
1617                 lo = 0;
1618                 hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1;
1619         }
1620
1621         for (bkt = lo; bkt <= hi; bkt++) {
1622                 int peer_off; /* searching offset in peer_ni hash table */
1623
1624                 for (peer_off = 0; ; peer_off++) {
1625                         struct ksock_peer_ni *peer_ni;
1626                         int           i = 0;
1627
1628                         read_lock(&ksocknal_data.ksnd_global_lock);
1629                         hlist_for_each_entry(peer_ni,
1630                                              &ksocknal_data.ksnd_peers[bkt],
1631                                              ksnp_list) {
1632                                 if (!((id.nid == LNET_NID_ANY ||
1633                                        id.nid == peer_ni->ksnp_id.nid) &&
1634                                       (id.pid == LNET_PID_ANY ||
1635                                        id.pid == peer_ni->ksnp_id.pid)))
1636                                         continue;
1637
1638                                 if (i++ == peer_off) {
1639                                         ksocknal_peer_addref(peer_ni);
1640                                         break;
1641                                 }
1642                         }
1643                         read_unlock(&ksocknal_data.ksnd_global_lock);
1644
1645                         if (i <= peer_off) /* no match */
1646                                 break;
1647
1648                         rc = 0;
1649                         ksocknal_push_peer(peer_ni);
1650                         ksocknal_peer_decref(peer_ni);
1651                 }
1652         }
1653         return rc;
1654 }
1655
1656 int
1657 ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
1658 {
1659         struct lnet_process_id id = {0};
1660         struct libcfs_ioctl_data *data = arg;
1661         int rc;
1662
1663         switch(cmd) {
1664         case IOC_LIBCFS_GET_INTERFACE: {
1665                 struct ksock_net *net = ni->ni_data;
1666                 struct ksock_interface *iface;
1667                 struct sockaddr_in *sa;
1668
1669                 read_lock(&ksocknal_data.ksnd_global_lock);
1670
1671                 if (data->ioc_count >= 1) {
1672                         rc = -ENOENT;
1673                 } else {
1674                         rc = 0;
1675                         iface = &net->ksnn_interface;
1676
1677                         sa = (void *)&iface->ksni_addr;
1678                         if (sa->sin_family == AF_INET)
1679                                 data->ioc_u32[0] = ntohl(sa->sin_addr.s_addr);
1680                         else
1681                                 data->ioc_u32[0] = 0xFFFFFFFF;
1682                         data->ioc_u32[1] = iface->ksni_netmask;
1683                         data->ioc_u32[2] = iface->ksni_npeers;
1684                         data->ioc_u32[3] = iface->ksni_nroutes;
1685                 }
1686
1687                 read_unlock(&ksocknal_data.ksnd_global_lock);
1688                 return rc;
1689         }
1690
1691         case IOC_LIBCFS_GET_PEER: {
1692                 __u32            myip = 0;
1693                 __u32            ip = 0;
1694                 int              port = 0;
1695                 int              conn_count = 0;
1696                 int              share_count = 0;
1697
1698                 rc = ksocknal_get_peer_info(ni, data->ioc_count,
1699                                             &id, &myip, &ip, &port,
1700                                             &conn_count,  &share_count);
1701                 if (rc != 0)
1702                         return rc;
1703
1704                 data->ioc_nid    = id.nid;
1705                 data->ioc_count  = share_count;
1706                 data->ioc_u32[0] = ip;
1707                 data->ioc_u32[1] = port;
1708                 data->ioc_u32[2] = myip;
1709                 data->ioc_u32[3] = conn_count;
1710                 data->ioc_u32[4] = id.pid;
1711                 return 0;
1712         }
1713
1714         case IOC_LIBCFS_ADD_PEER: {
1715                 struct sockaddr_in sa = {.sin_family = AF_INET};
1716
1717                 id.nid = data->ioc_nid;
1718                 id.pid = LNET_PID_LUSTRE;
1719                 sa.sin_addr.s_addr = htonl(data->ioc_u32[0]);
1720                 sa.sin_port = htons(data->ioc_u32[1]);
1721                 return ksocknal_add_peer(ni, id, (struct sockaddr *)&sa);
1722         }
1723         case IOC_LIBCFS_DEL_PEER:
1724                 id.nid = data->ioc_nid;
1725                 id.pid = LNET_PID_ANY;
1726                 return ksocknal_del_peer (ni, id,
1727                                           data->ioc_u32[0]); /* IP */
1728
1729         case IOC_LIBCFS_GET_CONN: {
1730                 int           txmem;
1731                 int           rxmem;
1732                 int           nagle;
1733                 struct ksock_conn *conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
1734                 struct sockaddr_in *psa = (void *)&conn->ksnc_peeraddr;
1735                 struct sockaddr_in *mysa = (void *)&conn->ksnc_myaddr;
1736
1737                 if (conn == NULL)
1738                         return -ENOENT;
1739
1740                 ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
1741
1742                 data->ioc_count  = txmem;
1743                 data->ioc_nid    = conn->ksnc_peer->ksnp_id.nid;
1744                 data->ioc_flags  = nagle;
1745                 if (psa->sin_family == AF_INET)
1746                         data->ioc_u32[0] = ntohl(psa->sin_addr.s_addr);
1747                 else
1748                         data->ioc_u32[0] = 0xFFFFFFFF;
1749                 data->ioc_u32[1] = rpc_get_port((struct sockaddr *)
1750                                                 &conn->ksnc_peeraddr);
1751                 if (mysa->sin_family == AF_INET)
1752                         data->ioc_u32[2] = ntohl(mysa->sin_addr.s_addr);
1753                 else
1754                         data->ioc_u32[2] = 0xFFFFFFFF;
1755                 data->ioc_u32[3] = conn->ksnc_type;
1756                 data->ioc_u32[4] = conn->ksnc_scheduler->kss_cpt;
1757                 data->ioc_u32[5] = rxmem;
1758                 data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
1759                 ksocknal_conn_decref(conn);
1760                 return 0;
1761         }
1762
1763         case IOC_LIBCFS_CLOSE_CONNECTION:
1764                 id.nid = data->ioc_nid;
1765                 id.pid = LNET_PID_ANY;
1766                 return ksocknal_close_matching_conns (id,
1767                                                       data->ioc_u32[0]);
1768
1769         case IOC_LIBCFS_REGISTER_MYNID:
1770                 /* Ignore if this is a noop */
1771                 if (data->ioc_nid == ni->ni_nid)
1772                         return 0;
1773
1774                 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
1775                        libcfs_nid2str(data->ioc_nid),
1776                        libcfs_nid2str(ni->ni_nid));
1777                 return -EINVAL;
1778
1779         case IOC_LIBCFS_PUSH_CONNECTION:
1780                 id.nid = data->ioc_nid;
1781                 id.pid = LNET_PID_ANY;
1782                 return ksocknal_push(ni, id);
1783
1784         default:
1785                 return -EINVAL;
1786         }
1787         /* not reached */
1788 }
1789
1790 static void
1791 ksocknal_free_buffers (void)
1792 {
1793         LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0);
1794
1795         if (ksocknal_data.ksnd_schedulers != NULL)
1796                 cfs_percpt_free(ksocknal_data.ksnd_schedulers);
1797
1798         spin_lock(&ksocknal_data.ksnd_tx_lock);
1799
1800         if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
1801                 LIST_HEAD(zlist);
1802                 struct ksock_tx *tx;
1803
1804                 list_splice_init(&ksocknal_data.ksnd_idle_noop_txs, &zlist);
1805                 spin_unlock(&ksocknal_data.ksnd_tx_lock);
1806
1807                 while (!list_empty(&zlist)) {
1808                         tx = list_entry(zlist.next, struct ksock_tx, tx_list);
1809                         list_del(&tx->tx_list);
1810                         LIBCFS_FREE(tx, tx->tx_desc_size);
1811                 }
1812         } else {
1813                 spin_unlock(&ksocknal_data.ksnd_tx_lock);
1814         }
1815 }
1816
1817 static void
1818 ksocknal_base_shutdown(void)
1819 {
1820         struct ksock_sched *sched;
1821         struct ksock_peer_ni *peer_ni;
1822         int i;
1823
1824         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %lld\n",
1825                libcfs_kmem_read());
1826         LASSERT (ksocknal_data.ksnd_nnets == 0);
1827
1828         switch (ksocknal_data.ksnd_init) {
1829         default:
1830                 LASSERT(0);
1831                 /* fallthrough */
1832
1833         case SOCKNAL_INIT_ALL:
1834         case SOCKNAL_INIT_DATA:
1835                 hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list)
1836                         LASSERT(0);
1837
1838                 LASSERT(list_empty(&ksocknal_data.ksnd_nets));
1839                 LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns));
1840                 LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns));
1841                 LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs));
1842                 LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes));
1843
1844                 if (ksocknal_data.ksnd_schedulers != NULL) {
1845                         cfs_percpt_for_each(sched, i,
1846                                             ksocknal_data.ksnd_schedulers) {
1847
1848                                 LASSERT(list_empty(&sched->kss_tx_conns));
1849                                 LASSERT(list_empty(&sched->kss_rx_conns));
1850                                 LASSERT(list_empty(&sched->kss_zombie_noop_txs));
1851                                 LASSERT(sched->kss_nconns == 0);
1852                         }
1853                 }
1854
1855                 /* flag threads to terminate; wake and wait for them to die */
1856                 ksocknal_data.ksnd_shuttingdown = 1;
1857                 wake_up_all(&ksocknal_data.ksnd_connd_waitq);
1858                 wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
1859
1860                 if (ksocknal_data.ksnd_schedulers != NULL) {
1861                         cfs_percpt_for_each(sched, i,
1862                                             ksocknal_data.ksnd_schedulers)
1863                                         wake_up_all(&sched->kss_waitq);
1864                 }
1865
1866                 wait_var_event_warning(&ksocknal_data.ksnd_nthreads,
1867                                        atomic_read(&ksocknal_data.ksnd_nthreads) == 0,
1868                                        "waiting for %d threads to terminate\n",
1869                                        atomic_read(&ksocknal_data.ksnd_nthreads));
1870
1871                 ksocknal_free_buffers();
1872
1873                 ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
1874                 break;
1875         }
1876
1877         CDEBUG(D_MALLOC, "after NAL cleanup: kmem %lld\n",
1878                libcfs_kmem_read());
1879
1880         module_put(THIS_MODULE);
1881 }
1882
1883 static int
1884 ksocknal_base_startup(void)
1885 {
1886         struct ksock_sched *sched;
1887         int rc;
1888         int i;
1889
1890         LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
1891         LASSERT(ksocknal_data.ksnd_nnets == 0);
1892
1893         memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */
1894
1895         hash_init(ksocknal_data.ksnd_peers);
1896
1897         rwlock_init(&ksocknal_data.ksnd_global_lock);
1898         INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
1899
1900         spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
1901         INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns);
1902         INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns);
1903         INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns);
1904         init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
1905
1906         spin_lock_init(&ksocknal_data.ksnd_connd_lock);
1907         INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs);
1908         INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes);
1909         init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
1910
1911         spin_lock_init(&ksocknal_data.ksnd_tx_lock);
1912         INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs);
1913
1914         /* NB memset above zeros whole of ksocknal_data */
1915
1916         /* flag lists/ptrs/locks initialised */
1917         ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
1918         if (!try_module_get(THIS_MODULE))
1919                 goto failed;
1920
1921         /* Create a scheduler block per available CPT */
1922         ksocknal_data.ksnd_schedulers = cfs_percpt_alloc(lnet_cpt_table(),
1923                                                          sizeof(*sched));
1924         if (ksocknal_data.ksnd_schedulers == NULL)
1925                 goto failed;
1926
1927         cfs_percpt_for_each(sched, i, ksocknal_data.ksnd_schedulers) {
1928                 int nthrs;
1929
1930                 /*
1931                  * make sure not to allocate more threads than there are
1932                  * cores/CPUs in teh CPT
1933                  */
1934                 nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
1935                 if (*ksocknal_tunables.ksnd_nscheds > 0) {
1936                         nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
1937                 } else {
1938                         /*
1939                          * max to half of CPUs, assume another half should be
1940                          * reserved for upper layer modules
1941                          */
1942                         nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
1943                 }
1944
1945                 sched->kss_nthreads_max = nthrs;
1946                 sched->kss_cpt = i;
1947
1948                 spin_lock_init(&sched->kss_lock);
1949                 INIT_LIST_HEAD(&sched->kss_rx_conns);
1950                 INIT_LIST_HEAD(&sched->kss_tx_conns);
1951                 INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
1952                 init_waitqueue_head(&sched->kss_waitq);
1953         }
1954
1955         ksocknal_data.ksnd_connd_starting         = 0;
1956         ksocknal_data.ksnd_connd_failed_stamp     = 0;
1957         ksocknal_data.ksnd_connd_starting_stamp   = ktime_get_real_seconds();
1958         /* must have at least 2 connds to remain responsive to accepts while
1959          * connecting */
1960         if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
1961                 *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
1962
1963         if (*ksocknal_tunables.ksnd_nconnds_max <
1964             *ksocknal_tunables.ksnd_nconnds) {
1965                 ksocknal_tunables.ksnd_nconnds_max =
1966                         ksocknal_tunables.ksnd_nconnds;
1967         }
1968
1969         for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
1970                 char name[16];
1971                 spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
1972                 ksocknal_data.ksnd_connd_starting++;
1973                 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
1974
1975
1976                 snprintf(name, sizeof(name), "socknal_cd%02d", i);
1977                 rc = ksocknal_thread_start(ksocknal_connd,
1978                                            (void *)((uintptr_t)i), name);
1979                 if (rc != 0) {
1980                         spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
1981                         ksocknal_data.ksnd_connd_starting--;
1982                         spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
1983                         CERROR("Can't spawn socknal connd: %d\n", rc);
1984                         goto failed;
1985                 }
1986         }
1987
1988         rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
1989         if (rc != 0) {
1990                 CERROR ("Can't spawn socknal reaper: %d\n", rc);
1991                 goto failed;
1992         }
1993
1994         /* flag everything initialised */
1995         ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
1996
1997         return 0;
1998
1999  failed:
2000         ksocknal_base_shutdown();
2001         return -ENETDOWN;
2002 }
2003
2004 static int
2005 ksocknal_debug_peerhash(struct lnet_ni *ni)
2006 {
2007         struct ksock_peer_ni *peer_ni;
2008         int i;
2009
2010         read_lock(&ksocknal_data.ksnd_global_lock);
2011
2012         hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) {
2013                 struct ksock_route *route;
2014                 struct ksock_conn *conn;
2015
2016                 if (peer_ni->ksnp_ni != ni)
2017                         continue;
2018
2019                 CWARN("Active peer_ni on shutdown: %s, ref %d, "
2020                       "closing %d, accepting %d, err %d, zcookie %llu, "
2021                       "txq %d, zc_req %d\n", libcfs_id2str(peer_ni->ksnp_id),
2022                       refcount_read(&peer_ni->ksnp_refcount),
2023                       peer_ni->ksnp_closing,
2024                       peer_ni->ksnp_accepting, peer_ni->ksnp_error,
2025                       peer_ni->ksnp_zc_next_cookie,
2026                       !list_empty(&peer_ni->ksnp_tx_queue),
2027                       !list_empty(&peer_ni->ksnp_zc_req_list));
2028
2029                 list_for_each_entry(route, &peer_ni->ksnp_routes, ksnr_list) {
2030                         CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
2031                               refcount_read(&route->ksnr_refcount),
2032                               route->ksnr_scheduled, route->ksnr_connecting,
2033                               route->ksnr_connected, route->ksnr_deleted);
2034                 }
2035
2036                 list_for_each_entry(conn, &peer_ni->ksnp_conns, ksnc_list) {
2037                         CWARN("Conn: ref %d, sref %d, t %d, c %d\n",
2038                               refcount_read(&conn->ksnc_conn_refcount),
2039                               refcount_read(&conn->ksnc_sock_refcount),
2040                               conn->ksnc_type, conn->ksnc_closing);
2041                 }
2042                 break;
2043         }
2044
2045         read_unlock(&ksocknal_data.ksnd_global_lock);
2046         return 0;
2047 }
2048
2049 void
2050 ksocknal_shutdown(struct lnet_ni *ni)
2051 {
2052         struct ksock_net *net = ni->ni_data;
2053         struct lnet_process_id anyid = {
2054                 .nid = LNET_NID_ANY,
2055                 .pid = LNET_PID_ANY,
2056         };
2057
2058         LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
2059         LASSERT(ksocknal_data.ksnd_nnets > 0);
2060
2061         /* prevent new peers */
2062         atomic_add(SOCKNAL_SHUTDOWN_BIAS, &net->ksnn_npeers);
2063
2064         /* Delete all peers */
2065         ksocknal_del_peer(ni, anyid, 0);
2066
2067         /* Wait for all peer_ni state to clean up */
2068         wait_var_event_warning(&net->ksnn_npeers,
2069                                atomic_read(&net->ksnn_npeers) ==
2070                                SOCKNAL_SHUTDOWN_BIAS,
2071                                "waiting for %d peers to disconnect\n",
2072                                ksocknal_debug_peerhash(ni) +
2073                                atomic_read(&net->ksnn_npeers) -
2074                                SOCKNAL_SHUTDOWN_BIAS);
2075
2076         LASSERT(net->ksnn_interface.ksni_npeers == 0);
2077         LASSERT(net->ksnn_interface.ksni_nroutes == 0);
2078
2079         list_del(&net->ksnn_list);
2080         LIBCFS_FREE(net, sizeof(*net));
2081
2082         ksocknal_data.ksnd_nnets--;
2083         if (ksocknal_data.ksnd_nnets == 0)
2084                 ksocknal_base_shutdown();
2085 }
2086
2087 static int
2088 ksocknal_search_new_ipif(struct ksock_net *net)
2089 {
2090         int new_ipif = 0;
2091         char *ifnam = &net->ksnn_interface.ksni_name[0];
2092         char *colon = strchr(ifnam, ':');
2093         bool found = false;
2094         struct ksock_net *tmp;
2095
2096         if (colon != NULL)
2097                 *colon = 0;
2098
2099         list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) {
2100                 char *ifnam2 = &tmp->ksnn_interface.ksni_name[0];
2101                 char *colon2 = strchr(ifnam2, ':');
2102
2103                 if (colon2 != NULL)
2104                         *colon2 = 0;
2105
2106                 found = strcmp(ifnam, ifnam2) == 0;
2107                 if (colon2 != NULL)
2108                         *colon2 = ':';
2109         }
2110
2111         new_ipif += !found;
2112         if (colon != NULL)
2113                 *colon = ':';
2114
2115         return new_ipif;
2116 }
2117
2118 static int
2119 ksocknal_start_schedulers(struct ksock_sched *sched)
2120 {
2121         int     nthrs;
2122         int     rc = 0;
2123         int     i;
2124
2125         if (sched->kss_nthreads == 0) {
2126                 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2127                         nthrs = sched->kss_nthreads_max;
2128                 } else {
2129                         nthrs = cfs_cpt_weight(lnet_cpt_table(),
2130                                                sched->kss_cpt);
2131                         nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2132                         nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
2133                 }
2134                 nthrs = min(nthrs, sched->kss_nthreads_max);
2135         } else {
2136                 LASSERT(sched->kss_nthreads <= sched->kss_nthreads_max);
2137                 /* increase two threads if there is new interface */
2138                 nthrs = min(2, sched->kss_nthreads_max - sched->kss_nthreads);
2139         }
2140
2141         for (i = 0; i < nthrs; i++) {
2142                 long id;
2143                 char name[20];
2144
2145                 id = KSOCK_THREAD_ID(sched->kss_cpt, sched->kss_nthreads + i);
2146                 snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
2147                          sched->kss_cpt, (int)KSOCK_THREAD_SID(id));
2148
2149                 rc = ksocknal_thread_start(ksocknal_scheduler,
2150                                            (void *)id, name);
2151                 if (rc == 0)
2152                         continue;
2153
2154                 CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
2155                        sched->kss_cpt, (int) KSOCK_THREAD_SID(id), rc);
2156                 break;
2157         }
2158
2159         sched->kss_nthreads += i;
2160         return rc;
2161 }
2162
2163 static int
2164 ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts)
2165 {
2166         int newif = ksocknal_search_new_ipif(net);
2167         int rc;
2168         int i;
2169
2170         if (ncpts > 0 && ncpts > cfs_cpt_number(lnet_cpt_table()))
2171                 return -EINVAL;
2172
2173         for (i = 0; i < ncpts; i++) {
2174                 struct ksock_sched *sched;
2175                 int cpt = (cpts == NULL) ? i : cpts[i];
2176
2177                 LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
2178                 sched = ksocknal_data.ksnd_schedulers[cpt];
2179
2180                 if (!newif && sched->kss_nthreads > 0)
2181                         continue;
2182
2183                 rc = ksocknal_start_schedulers(sched);
2184                 if (rc != 0)
2185                         return rc;
2186         }
2187         return 0;
2188 }
2189
2190 int
2191 ksocknal_startup(struct lnet_ni *ni)
2192 {
2193         struct ksock_net *net;
2194         struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables;
2195         struct ksock_interface *ksi = NULL;
2196         struct lnet_inetdev *ifaces = NULL;
2197         struct sockaddr_in *sa;
2198         int i = 0;
2199         int rc;
2200
2201         LASSERT (ni->ni_net->net_lnd == &the_ksocklnd);
2202         if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
2203                 rc = ksocknal_base_startup();
2204                 if (rc != 0)
2205                         return rc;
2206         }
2207         LIBCFS_ALLOC(net, sizeof(*net));
2208         if (net == NULL)
2209                 goto fail_0;
2210         net->ksnn_incarnation = ktime_get_real_ns();
2211         ni->ni_data = net;
2212         net_tunables = &ni->ni_net->net_tunables;
2213         if (net_tunables->lct_peer_timeout == -1)
2214                 net_tunables->lct_peer_timeout =
2215                         *ksocknal_tunables.ksnd_peertimeout;
2216
2217         if (net_tunables->lct_max_tx_credits == -1)
2218                 net_tunables->lct_max_tx_credits =
2219                         *ksocknal_tunables.ksnd_credits;
2220
2221         if (net_tunables->lct_peer_tx_credits == -1)
2222                 net_tunables->lct_peer_tx_credits =
2223                         *ksocknal_tunables.ksnd_peertxcredits;
2224
2225         if (net_tunables->lct_peer_tx_credits >
2226             net_tunables->lct_max_tx_credits)
2227                 net_tunables->lct_peer_tx_credits =
2228                         net_tunables->lct_max_tx_credits;
2229
2230         if (net_tunables->lct_peer_rtr_credits == -1)
2231                 net_tunables->lct_peer_rtr_credits =
2232                         *ksocknal_tunables.ksnd_peerrtrcredits;
2233
2234         rc = lnet_inet_enumerate(&ifaces, ni->ni_net_ns);
2235         if (rc < 0)
2236                 goto fail_1;
2237
2238         ksi = &net->ksnn_interface;
2239
2240         /* Use the first discovered interface or look in the list */
2241         if (ni->ni_interface) {
2242                 for (i = 0; i < rc; i++)
2243                         if (strcmp(ifaces[i].li_name, ni->ni_interface) == 0)
2244                                 break;
2245
2246                 /* ni_interfaces doesn't contain the interface we want */
2247                 if (i == rc) {
2248                         CERROR("ksocklnd: failed to find interface %s\n",
2249                                ni->ni_interface);
2250                         goto fail_1;
2251                 }
2252         }
2253
2254         ni->ni_dev_cpt = ifaces[i].li_cpt;
2255         sa = (void *)&ksi->ksni_addr;
2256         memset(sa, 0, sizeof(*sa));
2257         sa->sin_family = AF_INET;
2258         sa->sin_addr.s_addr = htonl(ifaces[i].li_ipaddr);
2259         ksi->ksni_index = ksocknal_ip2index((struct sockaddr *)sa, ni);
2260         ksi->ksni_netmask = ifaces[i].li_netmask;
2261         strlcpy(ksi->ksni_name, ifaces[i].li_name, sizeof(ksi->ksni_name));
2262
2263         /* call it before add it to ksocknal_data.ksnd_nets */
2264         rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
2265         if (rc != 0)
2266                 goto fail_1;
2267
2268         LASSERT(ksi);
2269         LASSERT(ksi->ksni_addr.ss_family == AF_INET);
2270         ni->ni_nid = LNET_MKNID(
2271                 LNET_NIDNET(ni->ni_nid),
2272                 ntohl(((struct sockaddr_in *)
2273                        &ksi->ksni_addr)->sin_addr.s_addr));
2274         list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
2275         ksocknal_data.ksnd_nnets++;
2276
2277         return 0;
2278
2279 fail_1:
2280         LIBCFS_FREE(net, sizeof(*net));
2281 fail_0:
2282         if (ksocknal_data.ksnd_nnets == 0)
2283                 ksocknal_base_shutdown();
2284
2285         return -ENETDOWN;
2286 }
2287
2288
2289 static void __exit ksocklnd_exit(void)
2290 {
2291         lnet_unregister_lnd(&the_ksocklnd);
2292 }
2293
2294 static const struct lnet_lnd the_ksocklnd = {
2295         .lnd_type               = SOCKLND,
2296         .lnd_startup            = ksocknal_startup,
2297         .lnd_shutdown           = ksocknal_shutdown,
2298         .lnd_ctl                = ksocknal_ctl,
2299         .lnd_send               = ksocknal_send,
2300         .lnd_recv               = ksocknal_recv,
2301         .lnd_notify_peer_down   = ksocknal_notify_gw_down,
2302         .lnd_accept             = ksocknal_accept,
2303 };
2304
2305 static int __init ksocklnd_init(void)
2306 {
2307         int rc;
2308
2309         /* check ksnr_connected/connecting field large enough */
2310         BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4);
2311         BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN);
2312
2313         rc = ksocknal_tunables_init();
2314         if (rc != 0)
2315                 return rc;
2316
2317         lnet_register_lnd(&the_ksocklnd);
2318
2319         return 0;
2320 }
2321
2322 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
2323 MODULE_DESCRIPTION("TCP Socket LNet Network Driver");
2324 MODULE_VERSION("2.8.0");
2325 MODULE_LICENSE("GPL");
2326
2327 module_init(ksocklnd_init);
2328 module_exit(ksocklnd_exit);