4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright 2022 Hewlett Packard Enterprise Development LP
26 * This file is part of Lustre, http://www.lustre.org/
29 * kfilnd main interface.
32 #include <linux/delay.h>
34 #include "kfilnd_tn.h"
35 #include "kfilnd_dev.h"
37 /* These are temp constants to get stuff to compile */
38 #define KFILND_DEFAULT_DEVICE "eth0"
40 /* Some constants which should be turned into tunables */
41 #define KFILND_MAX_WORKER_THREADS 4
42 #define KFILND_MAX_EVENT_QUEUE 100
44 #define KFILND_WQ_FLAGS (WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS)
45 struct workqueue_struct *kfilnd_wq;
46 struct dentry *kfilnd_debug_dir;
48 static void kfilnd_shutdown(struct lnet_ni *ni)
50 struct kfilnd_dev *dev = ni->ni_data;
55 static int kfilnd_send_cpt(struct kfilnd_dev *dev, lnet_nid_t nid)
59 /* If the current CPT has is within the LNet NI CPTs, use that CPT. */
60 cpt = lnet_cpt_current();
61 if (dev->cpt_to_endpoint[cpt])
64 /* Hash to a LNet NI CPT based on target NID. */
65 return dev->kfd_endpoints[nid % dev->kfd_ni->ni_ncpts]->end_cpt;
68 int kfilnd_send_hello_request(struct kfilnd_dev *dev, int cpt, lnet_nid_t nid)
70 struct kfilnd_transaction *tn;
73 tn = kfilnd_tn_alloc(dev, cpt, nid, true, true, false);
76 CERROR("Failed to allocate transaction struct: rc=%d\n", rc);
80 kfilnd_tn_event_handler(tn, TN_EVENT_TX_HELLO, 0);
85 static int kfilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *msg)
87 int type = msg->msg_type;
88 struct lnet_processid *target = &msg->msg_target;
89 struct kfilnd_transaction *tn;
91 struct kfilnd_dev *dev = ni->ni_data;
92 enum kfilnd_msg_type lnd_msg_type;
94 enum tn_events event = TN_EVENT_INVALID;
104 if (msg->msg_len != 0)
106 lnd_msg_type = KFILND_MSG_IMMEDIATE;
110 nob = offsetof(struct kfilnd_msg,
111 proto.immed.payload[msg->msg_md->md_length]);
112 if (nob <= KFILND_IMMEDIATE_MSG_SIZE) {
113 lnd_msg_type = KFILND_MSG_IMMEDIATE;
117 lnd_msg_type = KFILND_MSG_BULK_GET_REQ;
123 nob = offsetof(struct kfilnd_msg,
124 proto.immed.payload[msg->msg_len]);
125 if (nob <= KFILND_IMMEDIATE_MSG_SIZE) {
126 lnd_msg_type = KFILND_MSG_IMMEDIATE;
130 lnd_msg_type = KFILND_MSG_BULK_PUT_REQ;
135 tgt_nid4 = lnet_nid_to_nid4(&target->nid);
137 cpt = kfilnd_send_cpt(dev, tgt_nid4);
138 tn = kfilnd_tn_alloc(dev, cpt, tgt_nid4, true, true, tn_key);
141 CERROR("Failed to allocate transaction struct: rc=%d\n", rc);
145 /* Need to fire off special transaction if this is a new peer. */
146 if (kfilnd_peer_is_new_peer(tn->peer)) {
147 rc = kfilnd_send_hello_request(dev, cpt, tgt_nid4);
154 switch (lnd_msg_type) {
155 case KFILND_MSG_IMMEDIATE:
156 rc = kfilnd_tn_set_kiov_buf(tn, msg->msg_kiov, msg->msg_niov,
157 msg->msg_offset, msg->msg_len);
159 CERROR("Failed to setup immediate buffer rc %d\n", rc);
164 event = TN_EVENT_INIT_IMMEDIATE;
167 case KFILND_MSG_BULK_PUT_REQ:
168 tn->sink_buffer = false;
169 rc = kfilnd_tn_set_kiov_buf(tn, msg->msg_kiov, msg->msg_niov,
170 msg->msg_offset, msg->msg_len);
172 CERROR("Failed to setup PUT source buffer rc %d\n", rc);
177 event = TN_EVENT_INIT_BULK;
180 case KFILND_MSG_BULK_GET_REQ:
181 /* We need to create a reply message to inform LNet our
182 * optimized GET is done.
184 tn->tn_getreply = lnet_create_reply_msg(ni, msg);
185 if (!tn->tn_getreply) {
186 CERROR("Can't create reply for GET -> %s\n",
187 libcfs_nidstr(&target->nid));
192 tn->sink_buffer = true;
193 rc = kfilnd_tn_set_kiov_buf(tn, msg->msg_md->md_kiov,
194 msg->msg_md->md_niov,
195 msg->msg_md->md_offset,
196 msg->msg_md->md_length);
198 CERROR("Failed to setup GET sink buffer rc %d\n", rc);
202 event = TN_EVENT_INIT_BULK;
210 tn->msg_type = lnd_msg_type;
211 tn->tn_lntmsg = msg; /* finalise msg on completion */
212 tn->lnet_msg_len = tn->tn_nob;
214 KFILND_TN_DEBUG(tn, "%s in %u bytes in %u frags",
215 msg_type_to_str(lnd_msg_type), tn->tn_nob,
218 /* Start the state machine processing this transaction */
219 kfilnd_tn_event_handler(tn, event, 0);
224 static int kfilnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
225 int delayed, unsigned int niov,
226 struct bio_vec *kiov,
227 unsigned int offset, unsigned int mlen,
230 struct kfilnd_transaction *tn = private;
231 struct kfilnd_msg *rxmsg = tn->tn_rx_msg.msg;
235 enum tn_events event;
240 /* Transaction must be in receive state */
241 if (tn->tn_state != TN_STATE_IMM_RECV)
245 tn->lnet_msg_len = rlen;
247 switch (rxmsg->type) {
248 case KFILND_MSG_IMMEDIATE:
249 nob = offsetof(struct kfilnd_msg, proto.immed.payload[rlen]);
250 if (nob > tn->tn_rx_msg.length) {
251 CERROR("Immediate message from %s too big: %d(%lu)\n",
252 libcfs_nidstr(&msg->msg_hdr.src_nid),
253 nob, tn->tn_rx_msg.length);
258 lnet_copy_flat2kiov(niov, kiov, offset,
259 KFILND_IMMEDIATE_MSG_SIZE, rxmsg,
260 offsetof(struct kfilnd_msg,
261 proto.immed.payload),
264 kfilnd_tn_event_handler(tn, TN_EVENT_RX_OK, 0);
267 case KFILND_MSG_BULK_PUT_REQ:
269 event = TN_EVENT_SKIP_TAG_RMA;
271 /* Post the buffer given us as a sink */
272 tn->sink_buffer = true;
273 rc = kfilnd_tn_set_kiov_buf(tn, kiov, niov, offset,
276 CERROR("Failed to setup PUT sink buffer rc %d\n", rc);
280 event = TN_EVENT_INIT_TAG_RMA;
284 case KFILND_MSG_BULK_GET_REQ:
286 event = TN_EVENT_SKIP_TAG_RMA;
289 /* Post the buffer given to us as a source */
290 tn->sink_buffer = false;
291 rc = kfilnd_tn_set_kiov_buf(tn, msg->msg_kiov,
296 CERROR("Failed to setup GET source buffer rc %d\n", rc);
300 event = TN_EVENT_INIT_TAG_RMA;
305 /* TODO: TN leaks here. */
306 CERROR("Invalid message type = %d\n", rxmsg->type);
310 /* Store relevant fields to generate a bulk response. */
311 tn->tn_response_mr_key = rxmsg->proto.bulk_req.key;
312 tn->tn_response_rx = rxmsg->proto.bulk_req.response_rx;
315 tn->tn_tx_msg.length = kfilnd_init_proto(tn->tn_tx_msg.msg,
317 sizeof(struct kfilnd_bulk_rsp),
321 KFILND_TN_DEBUG(tn, "%s in %u bytes in %u frags",
322 msg_type_to_str(rxmsg->type), tn->tn_nob,
325 kfilnd_tn_event_handler(tn, event, status);
330 static const struct ln_key_list kfilnd_tunables_keys = {
331 .lkl_maxattr = LNET_NET_KFILND_TUNABLES_ATTR_MAX,
333 [LNET_NET_KFILND_TUNABLES_ATTR_PROV_MAJOR] = {
334 .lkp_value = "prov_major_version",
335 .lkp_data_type = NLA_S32
337 [LNET_NET_KFILND_TUNABLES_ATTR_PROV_MINOR] = {
338 .lkp_value = "prov_minor_version",
339 .lkp_data_type = NLA_S32
341 [LNET_NET_KFILND_TUNABLES_ATTR_AUTH_KEY] = {
342 .lkp_value = "auth_key",
343 .lkp_data_type = NLA_S32
349 kfilnd_nl_set(int cmd, struct nlattr *attr, int type, void *data)
351 struct lnet_lnd_tunables *tunables = data;
354 if (cmd != LNET_CMD_NETS)
358 case LNET_NET_KFILND_TUNABLES_ATTR_PROV_MAJOR:
359 tunables->lnd_tun_u.lnd_kfi.lnd_prov_major_version = nla_get_s64(attr);
361 case LNET_NET_KFILND_TUNABLES_ATTR_PROV_MINOR:
362 tunables->lnd_tun_u.lnd_kfi.lnd_prov_minor_version = nla_get_s64(attr);
364 case LNET_NET_KFILND_TUNABLES_ATTR_AUTH_KEY:
365 tunables->lnd_tun_u.lnd_kfi.lnd_auth_key = nla_get_s64(attr);
375 static int kfilnd_startup(struct lnet_ni *ni);
377 static const struct lnet_lnd the_kfilnd = {
379 .lnd_startup = kfilnd_startup,
380 .lnd_shutdown = kfilnd_shutdown,
381 .lnd_send = kfilnd_send,
382 .lnd_recv = kfilnd_recv,
383 .lnd_nl_set = kfilnd_nl_set,
384 .lnd_keys = &kfilnd_tunables_keys,
387 static int kfilnd_startup(struct lnet_ni *ni)
391 struct kfilnd_dev *kfdev;
396 if (ni->ni_net->net_lnd != &the_kfilnd) {
397 CERROR("Wrong lnd type\n");
401 kfilnd_tunables_setup(ni);
403 /* Only a single interface is supported. */
404 if (!ni->ni_interface) {
406 CERROR("No LNet network interface address defined\n");
410 node = ni->ni_interface;
412 kfdev = kfilnd_dev_alloc(ni, node);
415 CERROR("Failed to allocate KFILND device for %s: rc=%d\n", node,
421 ni->ni_nid.nid_addr[0] = cpu_to_be32(LNET_NIDADDR(kfdev->nic_addr));
423 /* Post a series of immediate receive buffers */
424 rc = kfilnd_dev_post_imm_buffers(kfdev);
426 CERROR("Can't post buffers, rc = %d\n", rc);
433 kfilnd_dev_free(kfdev);
438 static void __exit kfilnd_exit(void)
440 destroy_workqueue(kfilnd_wq);
444 lnet_unregister_lnd(&the_kfilnd);
446 debugfs_remove_recursive(kfilnd_debug_dir);
449 static int __init kfilnd_init(void)
453 kfilnd_debug_dir = debugfs_create_dir("kfilnd", NULL);
455 rc = kfilnd_tunables_init();
459 /* Do any initialization of the transaction system */
460 rc = kfilnd_tn_init();
462 CERROR("Cannot initialize transaction system\n");
466 kfilnd_wq = alloc_workqueue("kfilnd_wq", KFILND_WQ_FLAGS,
470 CERROR("Failed to allocated kfilnd work queue\n");
474 lnet_register_lnd(&the_kfilnd);
484 MODULE_AUTHOR("Cray Inc.");
485 MODULE_DESCRIPTION("Kfabric Lustre Network Driver");
486 MODULE_VERSION(KFILND_VERSION);
487 MODULE_LICENSE("GPL");
489 module_init(kfilnd_init);
490 module_exit(kfilnd_exit);