4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/klnds/socklnd/socklnd_lib-winnt.c
38 * windows socknal library
43 # if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
44 static struct ctl_table ksocknal_ctl_table[21];
46 struct ctl_table ksocknal_top_ctl_table[] = {
49 /* procname */ "socknal",
53 /* child */ ksocknal_ctl_table
59 ksocknal_lib_tunables_init ()
64 ksocknal_ctl_table[i].ctl_name = j++;
65 ksocknal_ctl_table[i].procname = "timeout";
66 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_timeout;
67 ksocknal_ctl_table[i].maxlen = sizeof (int);
68 ksocknal_ctl_table[i].mode = 0644;
69 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
72 ksocknal_ctl_table[i].ctl_name = j++;
73 ksocknal_ctl_table[i].procname = "credits";
74 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_credits;
75 ksocknal_ctl_table[i].maxlen = sizeof (int);
76 ksocknal_ctl_table[i].mode = 0444;
77 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
80 ksocknal_ctl_table[i].ctl_name = j++;
81 ksocknal_ctl_table[i].procname = "peer_credits";
82 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_peertxcredits;
83 ksocknal_ctl_table[i].maxlen = sizeof (int);
84 ksocknal_ctl_table[i].mode = 0444;
85 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
88 ksocknal_ctl_table[i].ctl_name = j++;
89 ksocknal_ctl_table[i].procname = "peer_buffer_credits";
90 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_peerrtrcredits;
91 ksocknal_ctl_table[i].maxlen = sizeof (int);
92 ksocknal_ctl_table[i].mode = 0444;
93 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
96 ksocknal_ctl_table[i].ctl_name = j++;
97 ksocknal_ctl_table[i].procname = "nconnds";
98 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_nconnds;
99 ksocknal_ctl_table[i].maxlen = sizeof (int);
100 ksocknal_ctl_table[i].mode = 0444;
101 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
105 ksocknal_ctl_table[i].ctl_name = j++;
106 ksocknal_ctl_table[i].procname = "min_reconnectms";
107 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_min_reconnectms;
108 ksocknal_ctl_table[i].maxlen = sizeof (int);
109 ksocknal_ctl_table[i].mode = 0444;
110 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
113 ksocknal_ctl_table[i].ctl_name = j++;
114 ksocknal_ctl_table[i].procname = "max_reconnectms";
115 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_max_reconnectms;
116 ksocknal_ctl_table[i].maxlen = sizeof (int);
117 ksocknal_ctl_table[i].mode = 0444;
118 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
121 ksocknal_ctl_table[i].ctl_name = j++;
122 ksocknal_ctl_table[i].procname = "eager_ack";
123 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_eager_ack;
124 ksocknal_ctl_table[i].maxlen = sizeof (int);
125 ksocknal_ctl_table[i].mode = 0644;
126 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
129 ksocknal_ctl_table[i].ctl_name = j++;
130 ksocknal_ctl_table[i].procname = "zero_copy";
131 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_zc_min_payload;
132 ksocknal_ctl_table[i].maxlen = sizeof (int);
133 ksocknal_ctl_table[i].mode = 0644;
134 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
137 ksocknal_ctl_table[i].ctl_name = j++;
138 ksocknal_ctl_table[i].procname = "typed";
139 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_typed_conns;
140 ksocknal_ctl_table[i].maxlen = sizeof (int);
141 ksocknal_ctl_table[i].mode = 0444;
142 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
145 ksocknal_ctl_table[i].ctl_name = j++;
146 ksocknal_ctl_table[i].procname = "min_bulk";
147 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_min_bulk;
148 ksocknal_ctl_table[i].maxlen = sizeof (int);
149 ksocknal_ctl_table[i].mode = 0644;
150 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
153 ksocknal_ctl_table[i].ctl_name = j++;
154 ksocknal_ctl_table[i].procname = "rx_buffer_size";
155 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_rx_buffer_size;
156 ksocknal_ctl_table[i].maxlen = sizeof(int);
157 ksocknal_ctl_table[i].mode = 0644;
158 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
161 ksocknal_ctl_table[i].ctl_name = j++;
162 ksocknal_ctl_table[i].procname = "tx_buffer_size";
163 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_tx_buffer_size;
164 ksocknal_ctl_table[i].maxlen = sizeof(int);
165 ksocknal_ctl_table[i].mode = 0644;
166 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
169 ksocknal_ctl_table[i].ctl_name = j++;
170 ksocknal_ctl_table[i].procname = "nagle";
171 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_nagle;
172 ksocknal_ctl_table[i].maxlen = sizeof(int);
173 ksocknal_ctl_table[i].mode = 0644;
174 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
177 ksocknal_ctl_table[i].ctl_name = j++;
178 ksocknal_ctl_table[i].procname = "round_robin";
179 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_round_robin;
180 ksocknal_ctl_table[i].maxlen = sizeof(int);
181 ksocknal_ctl_table[i].mode = 0644;
182 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
186 ksocknal_ctl_table[i].ctl_name = j++;
187 ksocknal_ctl_table[i].procname = "irq_affinity";
188 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_irq_affinity;
189 ksocknal_ctl_table[i].maxlen = sizeof(int);
190 ksocknal_ctl_table[i].mode = 0644;
191 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
195 ksocknal_ctl_table[i].ctl_name = j++;
196 ksocknal_ctl_table[i].procname = "keepalive_idle";
197 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_idle;
198 ksocknal_ctl_table[i].maxlen = sizeof(int);
199 ksocknal_ctl_table[i].mode = 0644;
200 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
203 ksocknal_ctl_table[i].ctl_name = j++;
204 ksocknal_ctl_table[i].procname = "keepalive_count";
205 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_count;
206 ksocknal_ctl_table[i].maxlen = sizeof(int);
207 ksocknal_ctl_table[i].mode = 0644;
208 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
211 ksocknal_ctl_table[i].ctl_name = j++;
212 ksocknal_ctl_table[i].procname = "keepalive_intvl";
213 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_intvl;
214 ksocknal_ctl_table[i].maxlen = sizeof(int);
215 ksocknal_ctl_table[i].mode = 0644;
216 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
219 #ifdef SOCKNAL_BACKOFF
220 ksocknal_ctl_table[i].ctl_name = j++;
221 ksocknal_ctl_table[i].procname = "backoff_init";
222 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_backoff_init;
223 ksocknal_ctl_table[i].maxlen = sizeof(int);
224 ksocknal_ctl_table[i].mode = 0644;
225 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
228 ksocknal_ctl_table[i].ctl_name = j++;
229 ksocknal_ctl_table[i].procname = "backoff_max";
230 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_backoff_max;
231 ksocknal_ctl_table[i].maxlen = sizeof(int);
232 ksocknal_ctl_table[i].mode = 0644;
233 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
237 #if SOCKNAL_VERSION_DEBUG
238 ksocknal_ctl_table[i].ctl_name = j++;
239 ksocknal_ctl_table[i].procname = "protocol";
240 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_protocol;
241 ksocknal_ctl_table[i].maxlen = sizeof(int);
242 ksocknal_ctl_table[i].mode = 0644;
243 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
247 LASSERT (j == i + 1);
248 LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
250 ksocknal_tunables.ksnd_sysctl =
251 register_sysctl_table(ksocknal_top_ctl_table);
253 if (ksocknal_tunables.ksnd_sysctl == NULL)
254 CWARN("Can't setup /proc tunables\n");
260 ksocknal_lib_tunables_fini ()
262 if (ksocknal_tunables.ksnd_sysctl != NULL)
263 unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
267 ksocknal_lib_tunables_init ()
273 ksocknal_lib_tunables_fini ()
276 #endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */
279 ksocknal_lib_bind_irq (unsigned int irq)
284 ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
286 int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
290 /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
291 LASSERT (!conn->ksnc_closing);
294 CERROR ("Error %d getting sock peer IP\n", rc);
298 rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
299 &conn->ksnc_myipaddr, NULL);
301 CERROR ("Error %d getting sock local IP\n", rc);
309 ksocknal_lib_sock_irq (struct socket *sock)
315 ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
317 struct socket *sock = conn->ksnc_sock;
324 if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
325 conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
326 tx->tx_nob == tx->tx_resid && /* frist sending */
327 tx->tx_msg.ksm_csum == 0) /* not checksummed */
328 ksocknal_lib_csum_tx(tx);
330 nob = ks_query_iovs_length(tx->tx_iov, tx->tx_niov);
331 flags = (!cfs_list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ?
332 (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT;
333 rc = ks_send_iovs(sock, tx->tx_iov, tx->tx_niov, flags, 0);
335 KsPrint((4, "ksocknal_lib_send_iov: conn %p sock %p rc %d\n",
341 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
343 struct socket *sock = conn->ksnc_sock;
344 lnet_kiov_t *kiov = tx->tx_kiov;
350 nkiov = tx->tx_nkiov;
351 nob = ks_query_kiovs_length(tx->tx_kiov, nkiov);
352 flags = (!cfs_list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ?
353 (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT;
354 rc = ks_send_kiovs(sock, tx->tx_kiov, nkiov, flags, 0);
356 KsPrint((4, "ksocknal_lib_send_kiov: conn %p sock %p rc %d\n",
362 ksocknal_lib_recv_iov (ksock_conn_t *conn)
364 struct iovec *iov = conn->ksnc_rx_iov;
368 /* receive payload from tsdu queue */
369 rc = ks_recv_iovs (conn->ksnc_sock, iov, conn->ksnc_rx_niov,
372 /* calcuate package checksum */
378 __u32 saved_csum = 0;
380 if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
381 saved_csum = conn->ksnc_msg.ksm_csum;
382 conn->ksnc_msg.ksm_csum = 0;
385 if (saved_csum != 0) {
387 /* accumulate checksum */
388 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
389 LASSERT (i < conn->ksnc_rx_niov);
391 fragnob = iov[i].iov_len;
395 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
396 iov[i].iov_base, fragnob);
398 conn->ksnc_msg.ksm_csum = saved_csum;
402 KsPrint((4, "ksocknal_lib_recv_iov: conn %p sock %p rc %d.\n",
403 conn, conn->ksnc_sock, rc));
408 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
410 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
413 /* NB we can't trust socket ops to either consume our iovs
414 * or leave them alone, so we only receive 1 frag at a time. */
415 LASSERT (conn->ksnc_rx_nkiov > 0);
417 /* receive payload from tsdu queue */
418 rc = ks_recv_kiovs (conn->ksnc_sock, kiov, conn->ksnc_rx_nkiov,
421 if (rc > 0 && conn->ksnc_msg.ksm_csum != 0) {
428 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
430 LASSERT (i < conn->ksnc_rx_nkiov);
432 base = (char *)(kiov[i].kiov_page->addr) + kiov[i].kiov_offset;
433 fragnob = kiov[i].kiov_len;
437 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
442 KsPrint((4, "ksocknal_lib_recv_kiov: conn %p sock %p rc %d.\n",
443 conn, conn->ksnc_sock, rc));
448 ksocknal_lib_eager_ack (ksock_conn_t *conn)
453 rc = ks_set_tcp_option(
454 conn->ksnc_sock, TCP_SOCKET_NODELAY,
455 &option, sizeof(option) );
457 CERROR("Can't disable nagle: %d\n", rc);
462 ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
464 ks_tconn_t * tconn = conn->ksnc_sock;
468 ks_get_tconn (tconn);
470 len = sizeof(*nagle);
471 rc = ks_get_tcp_option(tconn, TCP_SOCKET_NODELAY, (__u32 *)nagle, &len);
472 ks_put_tconn (tconn);
474 KsPrint((2, "ksocknal_get_conn_tunables: nodelay = %d rc = %d\n", *nagle, rc));
479 *txmem = *rxmem = *nagle = 0;
485 ksocknal_lib_setup_sock (struct socket *sock)
497 /* set the window size */
498 tconn->kstc_snd_wnd = ksocknal_tunables.ksnd_buffer_size;
499 tconn->kstc_rcv_wnd = ksocknal_tunables.ksnd_buffer_size;
503 if (!ksocknal_tunables.ksnd_nagle) {
506 rc = ks_set_tcp_option(
507 sock, TCP_SOCKET_NODELAY,
508 &option, sizeof (option));
510 CERROR ("Can't disable nagle: %d\n", rc);
515 /* snapshot tunables */
516 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
517 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
518 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
520 keep_alive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
522 option = (__u32)(keep_alive ? 1 : 0);
524 rc = ks_set_tcp_option(
525 sock, TCP_SOCKET_KEEPALIVE,
526 &option, sizeof (option));
528 CERROR ("Can't disable nagle: %d\n", rc);
536 ksocknal_lib_push_conn (ksock_conn_t *conn)
543 tconn = conn->ksnc_sock;
547 spin_lock(&tconn->kstc_lock);
548 if (tconn->kstc_type == kstt_sender) {
549 nagle = tconn->sender.kstc_info.nagle;
550 tconn->sender.kstc_info.nagle = 0;
552 LASSERT(tconn->kstc_type == kstt_child);
553 nagle = tconn->child.kstc_info.nagle;
554 tconn->child.kstc_info.nagle = 0;
557 spin_unlock(&tconn->kstc_lock);
560 rc = ks_set_tcp_option(
568 spin_lock(&tconn->kstc_lock);
570 if (tconn->kstc_type == kstt_sender) {
571 tconn->sender.kstc_info.nagle = nagle;
573 LASSERT(tconn->kstc_type == kstt_child);
574 tconn->child.kstc_info.nagle = nagle;
576 spin_unlock(&tconn->kstc_lock);
581 ksocknal_lib_csum_tx(ksock_tx_t *tx)
587 LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg);
588 LASSERT(tx->tx_conn != NULL);
589 LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
591 tx->tx_msg.ksm_csum = 0;
593 csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base,
594 tx->tx_iov[0].iov_len);
596 if (tx->tx_kiov != NULL) {
597 for (i = 0; i < tx->tx_nkiov; i++) {
598 base = (PUCHAR)(tx->tx_kiov[i].kiov_page->addr) +
599 tx->tx_kiov[i].kiov_offset;
601 csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
604 for (i = 1; i < tx->tx_niov; i++)
605 csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base,
606 tx->tx_iov[i].iov_len);
609 if (*ksocknal_tunables.ksnd_inject_csum_error) {
611 *ksocknal_tunables.ksnd_inject_csum_error = 0;
614 tx->tx_msg.ksm_csum = csum;
617 void ksocknal_schedule_callback(struct socket*sock, int mode)
619 ksock_conn_t * conn = (ksock_conn_t *) sock->kstc_conn;
621 read_lock(&ksocknal_data.ksnd_global_lock);
623 ksocknal_write_callback(conn);
625 ksocknal_read_callback(conn);
627 read_unlock(&ksocknal_data.ksnd_global_lock);
631 ksocknal_tx_fini_callback(ksock_conn_t * conn, ksock_tx_t * tx)
633 /* remove tx/conn from conn's outgoing queue */
634 spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
635 cfs_list_del(&tx->tx_list);
636 if (cfs_list_empty(&conn->ksnc_tx_queue))
637 cfs_list_del(&conn->ksnc_tx_list);
639 spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
641 /* complete send; tx -ref */
642 ksocknal_tx_decref(tx);
646 ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
651 ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
653 sock->kstc_conn = conn;
654 sock->kstc_sched_cb = ksocknal_schedule_callback;
658 ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
660 sock->kstc_conn = NULL;
661 sock->kstc_sched_cb = NULL;
665 ksocknal_lib_zc_capable(ksock_conn_t *conn)
671 ksocknal_lib_memory_pressure(ksock_conn_t *conn)
677 ksocknal_lib_bind_thread_to_cpu(int id)