Whamcloud - gitweb
LU-17744 ldiskfs: mballoc stats fixes
[fs/lustre-release.git] / lnet / lnet / acceptor.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
4  * Use is subject to license terms.
5  *
6  * Copyright (c) 2011, 2017, Intel Corporation.
7  */
8
9 /* This file is part of Lustre, http://www.lustre.org/ */
10
11 #define DEBUG_SUBSYSTEM S_LNET
12
13 #include <linux/completion.h>
14 #include <net/sock.h>
15 #include <lnet/lib-lnet.h>
16 #include <linux/sunrpc/addr.h>
17
18 static int   accept_port    = 988;
19 static int   accept_backlog = 127;
20 static int   accept_timeout = 5;
21
22 static struct {
23         int                     pta_shutdown;
24         struct socket           *pta_sock;
25         struct completion       pta_signal;
26         struct net              *pta_ns;
27         wait_queue_head_t       pta_waitq;
28         atomic_t                pta_ready;
29 #ifdef HAVE_SK_DATA_READY_ONE_ARG
30         void                    (*pta_odata)(struct sock *);
31 #else
32         void                    (*pta_odata)(struct sock *, int);
33 #endif
34 } lnet_acceptor_state = {
35         .pta_shutdown = 1
36 };
37
38 int
39 lnet_acceptor_port(void)
40 {
41         return accept_port;
42 }
43
44 static inline int
45 lnet_accept_magic(__u32 magic, __u32 constant)
46 {
47         return (magic == constant ||
48                 magic == __swab32(constant));
49 }
50
51 EXPORT_SYMBOL(lnet_acceptor_port);
52
53 static char *accept_type = "secure";
54
55 module_param_named(accept, accept_type, charp, 0444);
56 MODULE_PARM_DESC(accept, "Accept connections (secure|all|none)");
57 module_param(accept_port, int, 0444);
58 MODULE_PARM_DESC(accept_port, "Acceptor's port (same on all nodes)");
59 module_param(accept_backlog, int, 0444);
60 MODULE_PARM_DESC(accept_backlog, "Acceptor's listen backlog");
61 module_param(accept_timeout, int, 0644);
62 MODULE_PARM_DESC(accept_timeout, "Acceptor's timeout (seconds)");
63
64 int
65 lnet_acceptor_timeout(void)
66 {
67         return accept_timeout;
68 }
69 EXPORT_SYMBOL(lnet_acceptor_timeout);
70
71 void
72 lnet_connect_console_error(int rc, struct lnet_nid *peer_nid,
73                            struct sockaddr *sa)
74 {
75         switch (rc) {
76         /* "normal" errors */
77         case -ECONNREFUSED:
78                 CNETERR("Connection to %s at host %pIScp was refused: check that Lustre is running on that node.\n",
79                         libcfs_nidstr(peer_nid), sa);
80                 break;
81         case -EHOSTUNREACH:
82         case -ENETUNREACH:
83                 CNETERR("Connection to %s at host %pISc was unreachable: the network or that node may be down, or Lustre may be misconfigured.\n",
84                         libcfs_nidstr(peer_nid), sa);
85                 break;
86         case -ETIMEDOUT:
87                 CNETERR("Connection to %s at host %pIScp took too long: that node may be hung or experiencing high load.\n",
88                         libcfs_nidstr(peer_nid), sa);
89                 break;
90         case -ECONNRESET:
91                 LCONSOLE_ERROR_MSG(0x11b,
92                                    "Connection to %s at host %pIScp was reset: is it running a compatible version of Lustre and is %s one of its NIDs?\n",
93                                    libcfs_nidstr(peer_nid), sa,
94                                    libcfs_nidstr(peer_nid));
95                 break;
96         case -EPROTO:
97                 LCONSOLE_ERROR_MSG(0x11c,
98                                    "Protocol error connecting to %s at host %pIScp: is it running a compatible version of Lustre?\n",
99                                    libcfs_nidstr(peer_nid), sa);
100                 break;
101         case -EADDRINUSE:
102                 LCONSOLE_ERROR_MSG(0x11d,
103                                    "No privileged ports available to connect to %s at host %pIScp\n",
104                                    libcfs_nidstr(peer_nid), sa);
105                 break;
106         default:
107                 LCONSOLE_ERROR_MSG(0x11e,
108                                    "Unexpected error %d connecting to %s at host %pIScp\n",
109                                    rc, libcfs_nidstr(peer_nid), sa);
110                 break;
111         }
112 }
113 EXPORT_SYMBOL(lnet_connect_console_error);
114
115 struct socket *
116 lnet_connect(struct lnet_nid *peer_nid, int interface,
117              struct sockaddr *peeraddr,
118              struct net *ns)
119 {
120         struct lnet_acceptor_connreq cr1;
121         struct lnet_acceptor_connreq_v2 cr2;
122         void *cr;
123         int crsize;
124         struct socket *sock;
125         int rc;
126         int port;
127
128         BUILD_BUG_ON(sizeof(cr) > 16); /* not too big to be on the stack */
129
130         LASSERT(peeraddr->sa_family == AF_INET ||
131                 peeraddr->sa_family == AF_INET6);
132
133         for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
134              port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
135              --port) {
136                 /* Iterate through reserved ports. */
137                 sock = lnet_sock_connect(interface, port, peeraddr, ns);
138                 if (IS_ERR(sock)) {
139                         rc = PTR_ERR(sock);
140                         if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL)
141                                 continue;
142                         goto failed;
143                 }
144
145                 BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
146
147                 if (nid_is_nid4(peer_nid)) {
148                         cr1.acr_magic   = LNET_PROTO_ACCEPTOR_MAGIC;
149                         cr1.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
150                         cr1.acr_nid     = lnet_nid_to_nid4(peer_nid);
151                         cr = &cr1;
152                         crsize = sizeof(cr1);
153
154                         if (the_lnet.ln_testprotocompat) {
155                                 /* single-shot proto check */
156                                 if (test_and_clear_bit(
157                                             2, &the_lnet.ln_testprotocompat))
158                                         cr1.acr_version++;
159                                 if (test_and_clear_bit(
160                                             3, &the_lnet.ln_testprotocompat))
161                                         cr1.acr_magic = LNET_PROTO_MAGIC;
162                         }
163
164                 } else {
165                         cr2.acr_magic   = LNET_PROTO_ACCEPTOR_MAGIC;
166                         cr2.acr_version = LNET_PROTO_ACCEPTOR_VERSION_16;
167                         cr2.acr_nid     = *peer_nid;
168                         cr = &cr2;
169                         crsize = sizeof(cr2);
170                 }
171
172                 rc = lnet_sock_write(sock, cr, crsize, accept_timeout);
173                 if (rc != 0)
174                         goto failed_sock;
175
176                 return sock;
177         }
178
179         rc = -EADDRINUSE;
180         goto failed;
181
182 failed_sock:
183         sock_release(sock);
184 failed:
185         lnet_connect_console_error(rc, peer_nid, peeraddr);
186         return ERR_PTR(rc);
187 }
188 EXPORT_SYMBOL(lnet_connect);
189
190 static int
191 lnet_accept(struct socket *sock, __u32 magic)
192 {
193         struct lnet_acceptor_connreq cr;
194         struct lnet_acceptor_connreq_v2 cr2;
195         struct lnet_nid nid;
196         struct sockaddr_storage peer;
197         int peer_version;
198         int rc;
199         int flip;
200         struct lnet_ni *ni;
201         char *str;
202
203         LASSERT(sizeof(cr) <= 16);              /* not too big for the stack */
204
205         rc = lnet_sock_getaddr(sock, true, &peer);
206         if (rc != 0) {
207                 CERROR("Can't determine new connection's address\n");
208                 return rc;
209         }
210
211         if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
212
213                 if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) {
214                         /* future version compatibility!
215                          * When LNET unifies protocols over all LNDs, the first
216                          * thing sent will be a version query.  I send back
217                          * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" */
218
219                         memset(&cr, 0, sizeof(cr));
220                         cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
221                         cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
222                         rc = lnet_sock_write(sock, &cr, sizeof(cr),
223                                                accept_timeout);
224
225                         if (rc != 0)
226                                 CERROR("Error sending magic+version in response to LNET magic from %pISc: %d\n",
227                                        &peer, rc);
228                         return -EPROTO;
229                 }
230
231                 if (lnet_accept_magic(magic, LNET_PROTO_TCP_MAGIC))
232                         str = "'old' socknal/tcpnal";
233                 else
234                         str = "unrecognised";
235
236                 LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %pISc"
237                                    " magic %08x: %s acceptor protocol\n",
238                                    &peer, magic, str);
239                 return -EPROTO;
240         }
241
242         flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
243
244         rc = lnet_sock_read(sock, &cr.acr_version,
245                               sizeof(cr.acr_version),
246                               accept_timeout);
247         if (rc != 0) {
248                 CERROR("Error %d reading connection request version from %pISc\n",
249                        rc, &peer);
250                 return -EIO;
251         }
252
253         if (flip)
254                 __swab32s(&cr.acr_version);
255
256         switch (cr.acr_version) {
257         default:
258                 /* future version compatibility!
259                  * An acceptor-specific protocol rev will first send a version
260                  * query.  I send back my current version to tell her I'm
261                  * "old". */
262                 peer_version = cr.acr_version;
263
264                 memset(&cr, 0, sizeof(cr));
265                 cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
266                 cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
267
268                 rc = lnet_sock_write(sock, &cr, sizeof(cr),
269                                        accept_timeout);
270
271                 if (rc != 0)
272                         CERROR("Error sending magic+version in response to version %d from %pISc: %d\n",
273                                peer_version, &peer, rc);
274                 return -EPROTO;
275
276         case LNET_PROTO_ACCEPTOR_VERSION:
277
278                 rc = lnet_sock_read(sock, &cr.acr_nid,
279                                     sizeof(cr) -
280                                     offsetof(struct lnet_acceptor_connreq,
281                                              acr_nid),
282                                     accept_timeout);
283                 if (rc)
284                         break;
285                 if (flip)
286                         __swab64s(&cr.acr_nid);
287
288                 lnet_nid4_to_nid(cr.acr_nid, &nid);
289                 break;
290
291         case LNET_PROTO_ACCEPTOR_VERSION_16:
292                 rc = lnet_sock_read(sock, &cr2.acr_nid,
293                                     sizeof(cr2) -
294                                     offsetof(struct lnet_acceptor_connreq_v2,
295                                              acr_nid),
296                                     accept_timeout);
297                 if (rc)
298                         break;
299                 nid = cr2.acr_nid;
300                 break;
301         }
302         if (rc != 0) {
303                 CERROR("Error %d reading connection request from %pISc\n",
304                        rc, &peer);
305                 return -EIO;
306         }
307
308         ni = lnet_nid_to_ni_addref(&nid);
309         if (ni == NULL ||               /* no matching net */
310             !nid_same(&ni->ni_nid, &nid)) {
311                 /* right NET, wrong NID! */
312                 if (ni != NULL)
313                         lnet_ni_decref(ni);
314                 LCONSOLE_ERROR_MSG(0x120,
315                                    "Refusing connection from %pISc for %s: No matching NI\n",
316                                    &peer, libcfs_nidstr(&nid));
317                 return -EPERM;
318         }
319
320         if (ni->ni_net->net_lnd->lnd_accept == NULL) {
321                 /* This catches a request for the loopback LND */
322                 lnet_ni_decref(ni);
323                 LCONSOLE_ERROR_MSG(0x121,
324                                    "Refusing connection from %pISc for %s: NI doesn not accept IP connections\n",
325                                   &peer, libcfs_nidstr(&nid));
326                 return -EPERM;
327         }
328
329         CDEBUG(D_NET, "Accept %s from %pI4h\n", libcfs_nidstr(&nid), &peer);
330
331         rc = ni->ni_net->net_lnd->lnd_accept(ni, sock);
332
333         lnet_ni_decref(ni);
334         return rc;
335 }
336
337 #ifdef HAVE_SK_DATA_READY_ONE_ARG
338 static void lnet_acceptor_ready(struct sock *sk)
339 #else
340 static void lnet_acceptor_ready(struct sock *sk, int len)
341 #endif
342 {
343         /* Ensure pta_odata has actually been set before calling it */
344         rmb();
345 #ifdef HAVE_SK_DATA_READY_ONE_ARG
346         lnet_acceptor_state.pta_odata(sk);
347 #else
348         lnet_acceptor_state.pta_odata(sk, 0);
349 #endif
350
351         atomic_set(&lnet_acceptor_state.pta_ready, 1);
352         wake_up(&lnet_acceptor_state.pta_waitq);
353 }
354
355 static int
356 lnet_acceptor(void *arg)
357 {
358         struct socket  *newsock;
359         int            rc;
360         __u32          magic;
361         struct sockaddr_storage peer;
362         int            secure = (int)((uintptr_t)arg);
363
364         LASSERT(lnet_acceptor_state.pta_sock == NULL);
365
366         lnet_acceptor_state.pta_sock =
367                 lnet_sock_listen(accept_port, accept_backlog,
368                                  lnet_acceptor_state.pta_ns);
369         if (IS_ERR(lnet_acceptor_state.pta_sock)) {
370                 rc = PTR_ERR(lnet_acceptor_state.pta_sock);
371                 if (rc == -EADDRINUSE)
372                         LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port"
373                                            " %d: port already in use\n",
374                                            accept_port);
375                 else
376                         LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port "
377                                            "%d: unexpected error %d\n",
378                                            accept_port, rc);
379
380                 lnet_acceptor_state.pta_sock = NULL;
381         } else {
382                 rc = 0;
383                 LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
384                 init_waitqueue_head(&lnet_acceptor_state.pta_waitq);
385                 lnet_acceptor_state.pta_odata =
386                         lnet_acceptor_state.pta_sock->sk->sk_data_ready;
387                 /* ensure pta_odata gets set before there is any chance of
388                  * lnet_accept_ready() trying to read it.
389                  */
390                 wmb();
391                 lnet_acceptor_state.pta_sock->sk->sk_data_ready =
392                         lnet_acceptor_ready;
393                 atomic_set(&lnet_acceptor_state.pta_ready, 1);
394         }
395
396         /* set init status and unblock parent */
397         lnet_acceptor_state.pta_shutdown = rc;
398         complete(&lnet_acceptor_state.pta_signal);
399
400         if (rc != 0)
401                 return rc;
402
403         while (!lnet_acceptor_state.pta_shutdown) {
404
405                 wait_event_idle(lnet_acceptor_state.pta_waitq,
406                                 lnet_acceptor_state.pta_shutdown ||
407                                 atomic_read(&lnet_acceptor_state.pta_ready));
408                 if (!atomic_read(&lnet_acceptor_state.pta_ready))
409                         continue;
410                 atomic_set(&lnet_acceptor_state.pta_ready, 0);
411                 rc = kernel_accept(lnet_acceptor_state.pta_sock, &newsock,
412                                    SOCK_NONBLOCK);
413                 if (rc != 0) {
414                         if (rc != -EAGAIN) {
415                                 CWARN("Accept error %d: pausing...\n", rc);
416                                 schedule_timeout_uninterruptible(
417                                         cfs_time_seconds(1));
418                         }
419                         continue;
420                 }
421
422                 /* make sure we call lnet_sock_accept() again, until it fails */
423                 atomic_set(&lnet_acceptor_state.pta_ready, 1);
424
425                 rc = lnet_sock_getaddr(newsock, true, &peer);
426                 if (rc != 0) {
427                         CERROR("Can't determine new connection's address\n");
428                         goto failed;
429                 }
430
431                 if (secure &&
432                     rpc_get_port((struct sockaddr *)&peer) >
433                     LNET_ACCEPTOR_MAX_RESERVED_PORT) {
434                         CERROR("Refusing connection from %pIScp: insecure port.\n",
435                                &peer);
436                         goto failed;
437                 }
438
439                 rc = lnet_sock_read(newsock, &magic, sizeof(magic),
440                                       accept_timeout);
441                 if (rc != 0) {
442                         CERROR("Error %d reading connection request from %pISc\n",
443                                rc, &peer);
444                         goto failed;
445                 }
446
447                 rc = lnet_accept(newsock, magic);
448                 if (rc != 0)
449                         goto failed;
450
451                 continue;
452
453 failed:
454                 sock_release(newsock);
455         }
456
457         lnet_acceptor_state.pta_sock->sk->sk_data_ready =
458                 lnet_acceptor_state.pta_odata;
459         sock_release(lnet_acceptor_state.pta_sock);
460         lnet_acceptor_state.pta_sock = NULL;
461
462         CDEBUG(D_NET, "Acceptor stopping\n");
463
464         /* unblock lnet_acceptor_stop() */
465         complete(&lnet_acceptor_state.pta_signal);
466         return 0;
467 }
468
469 static inline int
470 accept2secure(const char *acc, long *sec)
471 {
472         if (!strcmp(acc, "secure")) {
473                 *sec = 1;
474                 return 1;
475         } else if (!strcmp(acc, "all")) {
476                 *sec = 0;
477                 return 1;
478         } else if (!strcmp(acc, "none")) {
479                 return 0;
480         } else {
481                 LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n",
482                                    acc);
483                 return -EINVAL;
484         }
485 }
486
487 int
488 lnet_acceptor_start(void)
489 {
490         struct task_struct *task;
491         int  rc;
492         long rc2;
493         long secure;
494
495         /* if acceptor is already running return immediately */
496         if (!lnet_acceptor_state.pta_shutdown)
497                 return 0;
498
499         LASSERT(lnet_acceptor_state.pta_sock == NULL);
500
501         init_completion(&lnet_acceptor_state.pta_signal);
502         rc = accept2secure(accept_type, &secure);
503         if (rc <= 0)
504                 return rc;
505
506         if (lnet_count_acceptor_nets() == 0)  /* not required */
507                 return 0;
508         if (current->nsproxy && current->nsproxy->net_ns)
509                 lnet_acceptor_state.pta_ns = current->nsproxy->net_ns;
510         else
511                 lnet_acceptor_state.pta_ns = &init_net;
512         task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
513                            "acceptor_%03ld", secure);
514         if (IS_ERR(task)) {
515                 rc2 = PTR_ERR(task);
516                 CERROR("Can't start acceptor thread: %ld\n", rc2);
517                 return -ESRCH;
518         }
519
520         /* wait for acceptor to startup */
521         wait_for_completion(&lnet_acceptor_state.pta_signal);
522
523         if (!lnet_acceptor_state.pta_shutdown) {
524                 /* started OK */
525                 LASSERT(lnet_acceptor_state.pta_sock != NULL);
526                 return 0;
527         }
528
529         LASSERT(lnet_acceptor_state.pta_sock == NULL);
530
531         return -ENETDOWN;
532 }
533
534 void
535 lnet_acceptor_stop(void)
536 {
537         if (lnet_acceptor_state.pta_shutdown) /* not running */
538                 return;
539
540         /* If still required, return immediately */
541         if (the_lnet.ln_refcount && lnet_count_acceptor_nets() > 0)
542                 return;
543
544         lnet_acceptor_state.pta_shutdown = 1;
545         wake_up(&lnet_acceptor_state.pta_waitq);
546
547         /* block until acceptor signals exit */
548         wait_for_completion(&lnet_acceptor_state.pta_signal);
549 }