1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/klnds/socklnd/socklnd_lib-winnt.c
38 * windows socknal library
43 # if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
44 static cfs_sysctl_table_t ksocknal_ctl_table[21];
46 cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
49 /* procname */ "socknal",
53 /* child */ ksocknal_ctl_table
59 ksocknal_lib_tunables_init ()
64 ksocknal_ctl_table[i].ctl_name = j++;
65 ksocknal_ctl_table[i].procname = "timeout";
66 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_timeout;
67 ksocknal_ctl_table[i].maxlen = sizeof (int);
68 ksocknal_ctl_table[i].mode = 0644;
69 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
72 ksocknal_ctl_table[i].ctl_name = j++;
73 ksocknal_ctl_table[i].procname = "credits";
74 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_credits;
75 ksocknal_ctl_table[i].maxlen = sizeof (int);
76 ksocknal_ctl_table[i].mode = 0444;
77 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
80 ksocknal_ctl_table[i].ctl_name = j++;
81 ksocknal_ctl_table[i].procname = "peer_credits";
82 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_peercredits;
83 ksocknal_ctl_table[i].maxlen = sizeof (int);
84 ksocknal_ctl_table[i].mode = 0444;
85 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
88 ksocknal_ctl_table[i].ctl_name = j++;
89 ksocknal_ctl_table[i].procname = "nconnds";
90 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_nconnds;
91 ksocknal_ctl_table[i].maxlen = sizeof (int);
92 ksocknal_ctl_table[i].mode = 0444;
93 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
97 ksocknal_ctl_table[i].ctl_name = j++;
98 ksocknal_ctl_table[i].procname = "min_reconnectms";
99 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_min_reconnectms;
100 ksocknal_ctl_table[i].maxlen = sizeof (int);
101 ksocknal_ctl_table[i].mode = 0444;
102 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
105 ksocknal_ctl_table[i].ctl_name = j++;
106 ksocknal_ctl_table[i].procname = "max_reconnectms";
107 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_max_reconnectms;
108 ksocknal_ctl_table[i].maxlen = sizeof (int);
109 ksocknal_ctl_table[i].mode = 0444;
110 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
113 ksocknal_ctl_table[i].ctl_name = j++;
114 ksocknal_ctl_table[i].procname = "eager_ack";
115 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_eager_ack;
116 ksocknal_ctl_table[i].maxlen = sizeof (int);
117 ksocknal_ctl_table[i].mode = 0644;
118 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
121 ksocknal_ctl_table[i].ctl_name = j++;
122 ksocknal_ctl_table[i].procname = "zero_copy";
123 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_zc_min_frag;
124 ksocknal_ctl_table[i].maxlen = sizeof (int);
125 ksocknal_ctl_table[i].mode = 0644;
126 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
129 ksocknal_ctl_table[i].ctl_name = j++;
130 ksocknal_ctl_table[i].procname = "typed";
131 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_typed_conns;
132 ksocknal_ctl_table[i].maxlen = sizeof (int);
133 ksocknal_ctl_table[i].mode = 0444;
134 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
137 ksocknal_ctl_table[i].ctl_name = j++;
138 ksocknal_ctl_table[i].procname = "min_bulk";
139 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_min_bulk;
140 ksocknal_ctl_table[i].maxlen = sizeof (int);
141 ksocknal_ctl_table[i].mode = 0644;
142 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
145 ksocknal_ctl_table[i].ctl_name = j++;
146 ksocknal_ctl_table[i].procname = "rx_buffer_size";
147 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_rx_buffer_size;
148 ksocknal_ctl_table[i].maxlen = sizeof(int);
149 ksocknal_ctl_table[i].mode = 0644;
150 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
153 ksocknal_ctl_table[i].ctl_name = j++;
154 ksocknal_ctl_table[i].procname = "tx_buffer_size";
155 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_tx_buffer_size;
156 ksocknal_ctl_table[i].maxlen = sizeof(int);
157 ksocknal_ctl_table[i].mode = 0644;
158 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
161 ksocknal_ctl_table[i].ctl_name = j++;
162 ksocknal_ctl_table[i].procname = "nagle";
163 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_nagle;
164 ksocknal_ctl_table[i].maxlen = sizeof(int);
165 ksocknal_ctl_table[i].mode = 0644;
166 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
170 ksocknal_ctl_table[i].ctl_name = j++;
171 ksocknal_ctl_table[i].procname = "irq_affinity";
172 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_irq_affinity;
173 ksocknal_ctl_table[i].maxlen = sizeof(int);
174 ksocknal_ctl_table[i].mode = 0644;
175 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
179 ksocknal_ctl_table[i].ctl_name = j++;
180 ksocknal_ctl_table[i].procname = "keepalive_idle";
181 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_idle;
182 ksocknal_ctl_table[i].maxlen = sizeof(int);
183 ksocknal_ctl_table[i].mode = 0644;
184 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
187 ksocknal_ctl_table[i].ctl_name = j++;
188 ksocknal_ctl_table[i].procname = "keepalive_count";
189 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_count;
190 ksocknal_ctl_table[i].maxlen = sizeof(int);
191 ksocknal_ctl_table[i].mode = 0644;
192 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
195 ksocknal_ctl_table[i].ctl_name = j++;
196 ksocknal_ctl_table[i].procname = "keepalive_intvl";
197 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_intvl;
198 ksocknal_ctl_table[i].maxlen = sizeof(int);
199 ksocknal_ctl_table[i].mode = 0644;
200 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
203 #ifdef SOCKNAL_BACKOFF
204 ksocknal_ctl_table[i].ctl_name = j++;
205 ksocknal_ctl_table[i].procname = "backoff_init";
206 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_backoff_init;
207 ksocknal_ctl_table[i].maxlen = sizeof(int);
208 ksocknal_ctl_table[i].mode = 0644;
209 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
212 ksocknal_ctl_table[i].ctl_name = j++;
213 ksocknal_ctl_table[i].procname = "backoff_max";
214 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_backoff_max;
215 ksocknal_ctl_table[i].maxlen = sizeof(int);
216 ksocknal_ctl_table[i].mode = 0644;
217 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
221 #if SOCKNAL_VERSION_DEBUG
222 ksocknal_ctl_table[i].ctl_name = j++;
223 ksocknal_ctl_table[i].procname = "protocol";
224 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_protocol;
225 ksocknal_ctl_table[i].maxlen = sizeof(int);
226 ksocknal_ctl_table[i].mode = 0644;
227 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
231 LASSERT (j == i + 1);
232 LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
234 ksocknal_tunables.ksnd_sysctl =
235 cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
237 if (ksocknal_tunables.ksnd_sysctl == NULL)
238 CWARN("Can't setup /proc tunables\n");
244 ksocknal_lib_tunables_fini ()
246 if (ksocknal_tunables.ksnd_sysctl != NULL)
247 cfs_unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
251 ksocknal_lib_tunables_init ()
257 ksocknal_lib_tunables_fini ()
260 #endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */
263 ksocknal_lib_bind_irq (unsigned int irq)
268 ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
270 int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
274 /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
275 LASSERT (!conn->ksnc_closing);
278 CERROR ("Error %d getting sock peer IP\n", rc);
282 rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
283 &conn->ksnc_myipaddr, NULL);
285 CERROR ("Error %d getting sock local IP\n", rc);
293 ksocknal_lib_sock_irq (struct socket *sock)
299 ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
301 struct socket *sock = conn->ksnc_sock;
308 if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
309 conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
310 tx->tx_nob == tx->tx_resid && /* frist sending */
311 tx->tx_msg.ksm_csum == 0) /* not checksummed */
312 ksocknal_lib_csum_tx(tx);
314 nob = ks_query_iovs_length(tx->tx_iov, tx->tx_niov);
315 flags = (!list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ?
316 (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT;
317 rc = ks_send_iovs(sock, tx->tx_iov, tx->tx_niov, flags, 0);
319 KsPrint((4, "ksocknal_lib_send_iov: conn %p sock %p rc %d\n",
325 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
327 struct socket *sock = conn->ksnc_sock;
328 lnet_kiov_t *kiov = tx->tx_kiov;
334 nkiov = tx->tx_nkiov;
335 nob = ks_query_kiovs_length(tx->tx_kiov, nkiov);
336 flags = (!list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ?
337 (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT;
338 rc = ks_send_kiovs(sock, tx->tx_kiov, nkiov, flags, 0);
340 KsPrint((4, "ksocknal_lib_send_kiov: conn %p sock %p rc %d\n",
346 ksocknal_lib_recv_iov (ksock_conn_t *conn)
348 struct iovec *iov = conn->ksnc_rx_iov;
352 /* receive payload from tsdu queue */
353 rc = ks_recv_iovs (conn->ksnc_sock, iov, conn->ksnc_rx_niov,
356 /* calcuate package checksum */
362 __u32 saved_csum = 0;
364 if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
365 saved_csum = conn->ksnc_msg.ksm_csum;
366 conn->ksnc_msg.ksm_csum = 0;
369 if (saved_csum != 0) {
371 /* accumulate checksum */
372 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
373 LASSERT (i < conn->ksnc_rx_niov);
375 fragnob = iov[i].iov_len;
379 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
380 iov[i].iov_base, fragnob);
382 conn->ksnc_msg.ksm_csum = saved_csum;
386 KsPrint((4, "ksocknal_lib_recv_iov: conn %p sock %p rc %d.\n",
387 conn, conn->ksnc_sock, rc));
392 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
394 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
397 /* NB we can't trust socket ops to either consume our iovs
398 * or leave them alone, so we only receive 1 frag at a time. */
399 LASSERT (conn->ksnc_rx_nkiov > 0);
401 /* receive payload from tsdu queue */
402 rc = ks_recv_kiovs (conn->ksnc_sock, kiov, conn->ksnc_rx_nkiov,
405 if (rc > 0 && conn->ksnc_msg.ksm_csum != 0) {
412 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
414 LASSERT (i < conn->ksnc_rx_nkiov);
416 base = (char *)(kiov[i].kiov_page->addr) + kiov[i].kiov_offset;
417 fragnob = kiov[i].kiov_len;
421 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
426 KsPrint((4, "ksocknal_lib_recv_kiov: conn %p sock %p rc %d.\n",
427 conn, conn->ksnc_sock, rc));
432 ksocknal_lib_eager_ack (ksock_conn_t *conn)
437 rc = ks_set_tcp_option(
438 conn->ksnc_sock, TCP_SOCKET_NODELAY,
439 &option, sizeof(option) );
441 CERROR("Can't disable nagle: %d\n", rc);
446 ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
448 ks_tconn_t * tconn = conn->ksnc_sock;
452 ks_get_tconn (tconn);
454 len = sizeof(*nagle);
455 rc = ks_get_tcp_option(tconn, TCP_SOCKET_NODELAY, (__u32 *)nagle, &len);
456 ks_put_tconn (tconn);
458 KsPrint((2, "ksocknal_get_conn_tunables: nodelay = %d rc = %d\n", *nagle, rc));
463 *txmem = *rxmem = *nagle = 0;
469 ksocknal_lib_setup_sock (struct socket *sock)
481 /* set the window size */
482 tconn->kstc_snd_wnd = ksocknal_tunables.ksnd_buffer_size;
483 tconn->kstc_rcv_wnd = ksocknal_tunables.ksnd_buffer_size;
487 if (!ksocknal_tunables.ksnd_nagle) {
490 rc = ks_set_tcp_option(
491 sock, TCP_SOCKET_NODELAY,
492 &option, sizeof (option));
494 CERROR ("Can't disable nagle: %d\n", rc);
499 /* snapshot tunables */
500 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
501 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
502 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
504 keep_alive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
506 option = (__u32)(keep_alive ? 1 : 0);
508 rc = ks_set_tcp_option(
509 sock, TCP_SOCKET_KEEPALIVE,
510 &option, sizeof (option));
512 CERROR ("Can't disable nagle: %d\n", rc);
520 ksocknal_lib_push_conn (ksock_conn_t *conn)
527 tconn = conn->ksnc_sock;
531 spin_lock(&tconn->kstc_lock);
532 if (tconn->kstc_type == kstt_sender) {
533 nagle = tconn->sender.kstc_info.nagle;
534 tconn->sender.kstc_info.nagle = 0;
536 LASSERT(tconn->kstc_type == kstt_child);
537 nagle = tconn->child.kstc_info.nagle;
538 tconn->child.kstc_info.nagle = 0;
541 spin_unlock(&tconn->kstc_lock);
544 rc = ks_set_tcp_option(
552 spin_lock(&tconn->kstc_lock);
554 if (tconn->kstc_type == kstt_sender) {
555 tconn->sender.kstc_info.nagle = nagle;
557 LASSERT(tconn->kstc_type == kstt_child);
558 tconn->child.kstc_info.nagle = nagle;
560 spin_unlock(&tconn->kstc_lock);
565 ksocknal_lib_csum_tx(ksock_tx_t *tx)
571 LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg);
572 LASSERT(tx->tx_conn != NULL);
573 LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
575 tx->tx_msg.ksm_csum = 0;
577 csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base,
578 tx->tx_iov[0].iov_len);
580 if (tx->tx_kiov != NULL) {
581 for (i = 0; i < tx->tx_nkiov; i++) {
582 base = (PUCHAR)(tx->tx_kiov[i].kiov_page->addr) +
583 tx->tx_kiov[i].kiov_offset;
585 csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
588 for (i = 1; i < tx->tx_niov; i++)
589 csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base,
590 tx->tx_iov[i].iov_len);
593 if (*ksocknal_tunables.ksnd_inject_csum_error) {
595 *ksocknal_tunables.ksnd_inject_csum_error = 0;
598 tx->tx_msg.ksm_csum = csum;
601 void ksocknal_schedule_callback(struct socket*sock, int mode)
603 ksock_conn_t * conn = (ksock_conn_t *) sock->kstc_conn;
605 read_lock (&ksocknal_data.ksnd_global_lock);
607 ksocknal_write_callback(conn);
609 ksocknal_read_callback(conn);
611 read_unlock (&ksocknal_data.ksnd_global_lock);
615 ksocknal_tx_fini_callback(ksock_conn_t * conn, ksock_tx_t * tx)
617 /* remove tx/conn from conn's outgoing queue */
618 spin_lock_bh (&conn->ksnc_scheduler->kss_lock);
619 list_del(&tx->tx_list);
620 if (list_empty(&conn->ksnc_tx_queue)) {
621 list_del (&conn->ksnc_tx_list);
623 spin_unlock_bh (&conn->ksnc_scheduler->kss_lock);
625 /* complete send; tx -ref */
626 ksocknal_tx_decref (tx);
630 ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
635 ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
637 sock->kstc_conn = conn;
638 sock->kstc_sched_cb = ksocknal_schedule_callback;
642 ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
644 sock->kstc_conn = NULL;
645 sock->kstc_sched_cb = NULL;
649 ksocknal_lib_zc_capable(struct socket *sock)