Whamcloud - gitweb
LU-9859 libcfs: refactor libcfs initialization.
[fs/lustre-release.git] / lnet / klnds / kfilnd / kfilnd.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright 2022 Hewlett Packard Enterprise Development LP
24  */
25 /*
26  * This file is part of Lustre, http://www.lustre.org/
27  */
28 /*
29  * kfilnd main interface.
30  */
31
32 #include <linux/delay.h>
33 #include "kfilnd.h"
34 #include "kfilnd_tn.h"
35 #include "kfilnd_dev.h"
36
37 struct workqueue_struct *kfilnd_wq;
38 struct dentry *kfilnd_debug_dir;
39
40 static void kfilnd_shutdown(struct lnet_ni *ni)
41 {
42         struct kfilnd_dev *dev = ni->ni_data;
43
44         kfilnd_dev_free(dev);
45 }
46
47 static int kfilnd_send_cpt(struct kfilnd_dev *dev, lnet_nid_t nid)
48 {
49         int cpt;
50
51         /* If the current CPT has is within the LNet NI CPTs, use that CPT. */
52         cpt = lnet_cpt_current();
53         if (dev->cpt_to_endpoint[cpt])
54                 return cpt;
55
56         /* Hash to a LNet NI CPT based on target NID. */
57         return  dev->kfd_endpoints[nid % dev->kfd_ni->ni_ncpts]->end_cpt;
58 }
59
60 int kfilnd_send_hello_request(struct kfilnd_dev *dev, int cpt,
61                               struct kfilnd_peer *kp)
62 {
63         struct kfilnd_transaction *tn;
64         int rc;
65
66         if (kfilnd_peer_set_check_hello_pending(kp)) {
67                 CDEBUG(D_NET, "Hello already pending to peer %s(%px)\n",
68                        libcfs_nid2str(kp->kp_nid), kp);
69                 return 0;
70         }
71
72         tn = kfilnd_tn_alloc_for_peer(dev, cpt, kp, true, true, false);
73         if (IS_ERR(tn)) {
74                 rc = PTR_ERR(tn);
75                 CERROR("Failed to allocate transaction struct: rc=%d\n", rc);
76                 kfilnd_peer_clear_hello_pending(kp);
77                 return rc;
78         }
79
80         /* +1 for tn->tn_kp. This ref is dropped when this transaction is
81          * finalized
82          */
83         refcount_inc(&kp->kp_cnt);
84
85         tn->msg_type = KFILND_MSG_HELLO_REQ;
86
87         kp->kp_hello_ts = ktime_get_seconds();
88
89         kfilnd_tn_event_handler(tn, TN_EVENT_TX_HELLO, 0);
90
91         return 0;
92 }
93
94 static int kfilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *msg)
95 {
96         int type = msg->msg_type;
97         struct lnet_processid *target = &msg->msg_target;
98         struct kfilnd_transaction *tn;
99         int nob;
100         struct kfilnd_dev *dev = ni->ni_data;
101         enum kfilnd_msg_type lnd_msg_type;
102         int cpt;
103         enum tn_events event = TN_EVENT_INVALID;
104         int rc;
105         bool tn_key = false;
106         lnet_nid_t tgt_nid4;
107
108         switch (type) {
109         default:
110                 return -EIO;
111
112         case LNET_MSG_ACK:
113                 if (msg->msg_len != 0)
114                         return -EINVAL;
115                 lnd_msg_type = KFILND_MSG_IMMEDIATE;
116                 break;
117
118         case LNET_MSG_GET:
119                 if (msg->msg_routing || msg->msg_target_is_router) {
120                         lnd_msg_type = KFILND_MSG_IMMEDIATE;
121                         break;
122                 }
123
124                 nob = offsetof(struct kfilnd_msg,
125                                proto.immed.payload[msg->msg_md->md_length]);
126                 if (nob <= KFILND_IMMEDIATE_MSG_SIZE) {
127                         lnd_msg_type = KFILND_MSG_IMMEDIATE;
128                         break;
129                 }
130
131                 lnd_msg_type = KFILND_MSG_BULK_GET_REQ;
132                 tn_key = true;
133                 break;
134
135         case LNET_MSG_REPLY:
136         case LNET_MSG_PUT:
137                 nob = offsetof(struct kfilnd_msg,
138                                proto.immed.payload[msg->msg_len]);
139                 if (nob <= KFILND_IMMEDIATE_MSG_SIZE) {
140                         lnd_msg_type = KFILND_MSG_IMMEDIATE;
141                         break;
142                 }
143
144                 lnd_msg_type = KFILND_MSG_BULK_PUT_REQ;
145                 tn_key = true;
146                 break;
147         }
148
149         tgt_nid4 = lnet_nid_to_nid4(&target->nid);
150
151         cpt = kfilnd_send_cpt(dev, tgt_nid4);
152         tn = kfilnd_tn_alloc(dev, cpt, tgt_nid4, true, true, tn_key);
153         if (IS_ERR(tn)) {
154                 rc = PTR_ERR(tn);
155                 CERROR("Failed to allocate transaction struct: rc=%d\n", rc);
156                 return rc;
157         }
158
159         if (kfilnd_peer_needs_hello(tn->tn_kp, true)) {
160                 rc = kfilnd_send_hello_request(dev, cpt, tn->tn_kp);
161                 if (rc && kfilnd_peer_is_new_peer(tn->tn_kp)) {
162                         /* Only fail the send if this is a new peer. Otherwise
163                          * attempt the send using our stale peer information
164                          */
165                         kfilnd_tn_free(tn);
166                         return rc;
167                 }
168         }
169
170         switch (lnd_msg_type) {
171         case KFILND_MSG_IMMEDIATE:
172                 rc = kfilnd_tn_set_kiov_buf(tn, msg->msg_kiov, msg->msg_niov,
173                                             msg->msg_offset, msg->msg_len);
174                 if (rc) {
175                         CERROR("Failed to setup immediate buffer rc %d\n", rc);
176                         kfilnd_tn_free(tn);
177                         return rc;
178                 }
179
180                 event = TN_EVENT_INIT_IMMEDIATE;
181                 break;
182
183         case KFILND_MSG_BULK_PUT_REQ:
184                 tn->sink_buffer = false;
185                 rc = kfilnd_tn_set_kiov_buf(tn, msg->msg_kiov, msg->msg_niov,
186                                             msg->msg_offset, msg->msg_len);
187                 if (rc) {
188                         CERROR("Failed to setup PUT source buffer rc %d\n", rc);
189                         kfilnd_tn_free(tn);
190                         return rc;
191                 }
192
193                 event = TN_EVENT_INIT_BULK;
194                 break;
195
196         case KFILND_MSG_BULK_GET_REQ:
197                 /* We need to create a reply message to inform LNet our
198                  * optimized GET is done.
199                  */
200                 tn->tn_getreply = lnet_create_reply_msg(ni, msg);
201                 if (!tn->tn_getreply) {
202                         CERROR("Can't create reply for GET -> %s\n",
203                                libcfs_nidstr(&target->nid));
204                         kfilnd_tn_free(tn);
205                         return -EIO;
206                 }
207
208                 tn->sink_buffer = true;
209                 rc = kfilnd_tn_set_kiov_buf(tn, msg->msg_md->md_kiov,
210                                             msg->msg_md->md_niov,
211                                             msg->msg_md->md_offset,
212                                             msg->msg_md->md_length);
213                 if (rc) {
214                         CERROR("Failed to setup GET sink buffer rc %d\n", rc);
215                         kfilnd_tn_free(tn);
216                         return rc;
217                 }
218                 event = TN_EVENT_INIT_BULK;
219                 break;
220
221         default:
222                 kfilnd_tn_free(tn);
223                 return -EIO;
224         }
225
226         tn->msg_type = lnd_msg_type;
227         tn->tn_lntmsg = msg;    /* finalise msg on completion */
228         tn->lnet_msg_len = tn->tn_nob;
229
230         KFILND_TN_DEBUG(tn, "%s in %u bytes in %u frags",
231                         msg_type_to_str(lnd_msg_type), tn->tn_nob,
232                         tn->tn_num_iovec);
233
234         /* Start the state machine processing this transaction */
235         kfilnd_tn_event_handler(tn, event, 0);
236
237         return 0;
238 }
239
240 static int kfilnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
241                        int delayed, unsigned int niov,
242                        struct bio_vec *kiov,
243                        unsigned int offset, unsigned int mlen,
244                        unsigned int rlen)
245 {
246         struct kfilnd_transaction *tn = private;
247         struct kfilnd_msg *rxmsg = tn->tn_rx_msg.msg;
248         int nob;
249         int rc = 0;
250         int status = 0;
251         enum tn_events event;
252
253         if (mlen > rlen)
254                 return -EINVAL;
255
256         /* Transaction must be in receive state */
257         if (tn->tn_state != TN_STATE_IMM_RECV)
258                 return -EINVAL;
259
260         tn->tn_lntmsg = msg;
261         tn->lnet_msg_len = rlen;
262
263         switch (rxmsg->type) {
264         case KFILND_MSG_IMMEDIATE:
265                 nob = offsetof(struct kfilnd_msg, proto.immed.payload[rlen]);
266                 if (nob > tn->tn_rx_msg.length) {
267                         CERROR("Immediate message from %s too big: %d(%lu)\n",
268                                libcfs_nidstr(&msg->msg_hdr.src_nid),
269                                nob, tn->tn_rx_msg.length);
270                         return -EPROTO;
271                 }
272                 tn->tn_nob = nob;
273
274                 lnet_copy_flat2kiov(niov, kiov, offset,
275                                     KFILND_IMMEDIATE_MSG_SIZE, rxmsg,
276                                     offsetof(struct kfilnd_msg,
277                                              proto.immed.payload),
278                                     mlen);
279
280                 kfilnd_tn_event_handler(tn, TN_EVENT_RX_OK, 0);
281                 return 0;
282
283         case KFILND_MSG_BULK_PUT_REQ:
284                 if (mlen == 0) {
285                         event = TN_EVENT_SKIP_TAG_RMA;
286                 } else {
287                         /* Post the buffer given us as a sink  */
288                         tn->sink_buffer = true;
289                         rc = kfilnd_tn_set_kiov_buf(tn, kiov, niov, offset,
290                                                     mlen);
291                         if (rc) {
292                                 CERROR("Failed to setup PUT sink buffer rc %d\n", rc);
293                                 kfilnd_tn_free(tn);
294                                 return rc;
295                         }
296                         event = TN_EVENT_INIT_TAG_RMA;
297                 }
298                 break;
299
300         case KFILND_MSG_BULK_GET_REQ:
301                 if (!msg) {
302                         event = TN_EVENT_SKIP_TAG_RMA;
303                         status = -ENODATA;
304                 } else {
305                         /* Post the buffer given to us as a source  */
306                         tn->sink_buffer = false;
307                         rc = kfilnd_tn_set_kiov_buf(tn, msg->msg_kiov,
308                                                     msg->msg_niov,
309                                                     msg->msg_offset,
310                                                     msg->msg_len);
311                         if (rc) {
312                                 CERROR("Failed to setup GET source buffer rc %d\n", rc);
313                                 kfilnd_tn_free(tn);
314                                 return rc;
315                         }
316                         event = TN_EVENT_INIT_TAG_RMA;
317                 }
318                 break;
319
320         default:
321                 /* TODO: TN leaks here. */
322                 CERROR("Invalid message type = %d\n", rxmsg->type);
323                 return -EINVAL;
324         }
325
326         /* Store relevant fields to generate a bulk response. */
327         tn->tn_response_mr_key = rxmsg->proto.bulk_req.key;
328         tn->tn_response_rx = rxmsg->proto.bulk_req.response_rx;
329
330 #if 0
331         tn->tn_tx_msg.length = kfilnd_init_proto(tn->tn_tx_msg.msg,
332                                                  KFILND_MSG_BULK_RSP,
333                                                  sizeof(struct kfilnd_bulk_rsp),
334                                                  ni);
335 #endif
336
337         KFILND_TN_DEBUG(tn, "%s in %u bytes in %u frags",
338                         msg_type_to_str(rxmsg->type), tn->tn_nob,
339                         tn->tn_num_iovec);
340
341         kfilnd_tn_event_handler(tn, event, status);
342
343         return rc;
344 }
345
346 static const struct ln_key_list kfilnd_tunables_keys = {
347         .lkl_maxattr                    = LNET_NET_KFILND_TUNABLES_ATTR_MAX,
348         .lkl_list                       = {
349                 [LNET_NET_KFILND_TUNABLES_ATTR_PROV_MAJOR]      = {
350                         .lkp_value      = "prov_major_version",
351                         .lkp_data_type  = NLA_S32
352                 },
353                 [LNET_NET_KFILND_TUNABLES_ATTR_PROV_MINOR]  = {
354                         .lkp_value      = "prov_minor_version",
355                         .lkp_data_type  = NLA_S32
356                 },
357                 [LNET_NET_KFILND_TUNABLES_ATTR_AUTH_KEY]  = {
358                         .lkp_value      = "auth_key",
359                         .lkp_data_type  = NLA_S32
360                 },
361                 [LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS]  = {
362                         .lkp_value      = "traffic_class",
363                         .lkp_data_type  = NLA_STRING,
364                 },
365         },
366 };
367
368 static int
369 kfilnd_nl_get(int cmd, struct sk_buff *msg, int type, void *data)
370 {
371         struct lnet_lnd_tunables *tunables;
372         struct lnet_ni *ni = data;
373
374         if (!ni || !msg)
375                 return -EINVAL;
376
377         if (cmd != LNET_CMD_NETS || type != LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES)
378                 return -EOPNOTSUPP;
379
380         tunables = &ni->ni_lnd_tunables;
381         nla_put_s32(msg, LNET_NET_KFILND_TUNABLES_ATTR_PROV_MAJOR,
382                     tunables->lnd_tun_u.lnd_kfi.lnd_prov_major_version);
383         nla_put_s32(msg, LNET_NET_KFILND_TUNABLES_ATTR_PROV_MINOR,
384                     tunables->lnd_tun_u.lnd_kfi.lnd_prov_minor_version);
385         nla_put_s32(msg, LNET_NET_KFILND_TUNABLES_ATTR_AUTH_KEY,
386                     tunables->lnd_tun_u.lnd_kfi.lnd_auth_key);
387         nla_put_string(msg, LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS,
388                        tunables->lnd_tun_u.lnd_kfi.lnd_traffic_class_str);
389
390         return 0;
391 }
392
393 static int
394 kfilnd_nl_set(int cmd, struct nlattr *attr, int type, void *data)
395 {
396         struct lnet_lnd_tunables *tunables = data;
397         struct lnet_ioctl_config_kfilnd_tunables *lnd_kfi;
398         int rc = 0;
399
400         if (cmd != LNET_CMD_NETS)
401                 return -EOPNOTSUPP;
402
403         lnd_kfi = &tunables->lnd_tun_u.lnd_kfi;
404
405         switch (type) {
406         case LNET_NET_KFILND_TUNABLES_ATTR_PROV_MAJOR:
407                 lnd_kfi->lnd_prov_major_version = nla_get_s64(attr);
408                 break;
409         case LNET_NET_KFILND_TUNABLES_ATTR_PROV_MINOR:
410                 lnd_kfi->lnd_prov_minor_version = nla_get_s64(attr);
411                 break;
412         case LNET_NET_KFILND_TUNABLES_ATTR_AUTH_KEY:
413                 lnd_kfi->lnd_auth_key = nla_get_s64(attr);
414                 break;
415         case LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS:
416                 rc = nla_strscpy(lnd_kfi->lnd_traffic_class_str, attr,
417                                  sizeof(lnd_kfi->lnd_traffic_class_str));
418                 break;
419         default:
420                 rc = -EINVAL;
421                 break;
422         }
423
424         return rc;
425 }
426
427 static int kfilnd_startup(struct lnet_ni *ni);
428
429 static const struct lnet_lnd the_kfilnd = {
430         .lnd_type       = KFILND,
431         .lnd_startup    = kfilnd_startup,
432         .lnd_shutdown   = kfilnd_shutdown,
433         .lnd_send       = kfilnd_send,
434         .lnd_recv       = kfilnd_recv,
435         .lnd_nl_get     = kfilnd_nl_get,
436         .lnd_nl_set     = kfilnd_nl_set,
437         .lnd_keys       = &kfilnd_tunables_keys,
438 };
439
440 static int kfilnd_startup(struct lnet_ni *ni)
441 {
442         const char *node;
443         int rc;
444         struct kfilnd_dev *kfdev;
445
446         if (!ni)
447                 return -EINVAL;
448
449         if (ni->ni_net->net_lnd != &the_kfilnd) {
450                 CERROR("Wrong lnd type\n");
451                 return -EINVAL;
452         }
453
454         rc = kfilnd_tunables_setup(ni);
455         if (rc) {
456                 CERROR("Can't configure tunable values, rc = %d\n", rc);
457                 goto err;
458         }
459
460         /* Only a single interface is supported. */
461         if (!ni->ni_interface) {
462                 rc = -ENODEV;
463                 CERROR("No LNet network interface address defined\n");
464                 goto err;
465         }
466
467         node = ni->ni_interface;
468
469         kfdev = kfilnd_dev_alloc(ni, node);
470         if (IS_ERR(kfdev)) {
471                 rc = PTR_ERR(kfdev);
472                 CERROR("Failed to allocate KFILND device for %s: rc=%d\n", node,
473                        rc);
474                 goto err;
475         }
476
477         /* Post a series of immediate receive buffers */
478         rc = kfilnd_dev_post_imm_buffers(kfdev);
479         if (rc) {
480                 CERROR("Can't post buffers, rc = %d\n", rc);
481                 goto err_free_dev;
482         }
483
484         return 0;
485
486 err_free_dev:
487         kfilnd_dev_free(kfdev);
488 err:
489         return rc;
490 }
491
492 static void __exit kfilnd_exit(void)
493 {
494         destroy_workqueue(kfilnd_wq);
495
496         kfilnd_tn_cleanup();
497
498         lnet_unregister_lnd(&the_kfilnd);
499
500         debugfs_remove_recursive(kfilnd_debug_dir);
501 }
502
503 static int __init kfilnd_init(void)
504 {
505         int rc;
506         unsigned int flags;
507
508         kfilnd_debug_dir = debugfs_create_dir("kfilnd", NULL);
509
510         rc = kfilnd_tunables_init();
511         if (rc)
512                 goto err;
513
514         rc = libcfs_setup();
515         if (rc)
516                 return rc;
517
518         /* Do any initialization of the transaction system */
519         rc = kfilnd_tn_init();
520         if (rc) {
521                 CERROR("Cannot initialize transaction system\n");
522                 goto err;
523         }
524
525         flags = WQ_MEM_RECLAIM | WQ_SYSFS;
526         if (wq_cpu_intensive)
527                 flags = flags | WQ_CPU_INTENSIVE;
528         if (wq_high_priority)
529                 flags = flags | WQ_HIGHPRI;
530
531         kfilnd_wq = alloc_workqueue("kfilnd_wq", flags, wq_max_active);
532         if (!kfilnd_wq) {
533                 rc = -ENOMEM;
534                 CERROR("Failed to allocated kfilnd work queue\n");
535                 goto err_tn_cleanup;
536         }
537
538         lnet_register_lnd(&the_kfilnd);
539
540         return 0;
541
542 err_tn_cleanup:
543         kfilnd_tn_cleanup();
544 err:
545         return rc;
546 }
547
548 MODULE_AUTHOR("Cray Inc.");
549 MODULE_DESCRIPTION("Kfabric Lustre Network Driver");
550 MODULE_VERSION(KFILND_VERSION);
551 MODULE_LICENSE("GPL");
552
553 module_init(kfilnd_init);
554 module_exit(kfilnd_exit);