4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
31 * This file is part of Lustre, http://www.lustre.org/
32 * Lustre is a trademark of Sun Microsystems, Inc.
34 * lnet/klnds/socklnd/socklnd_lib-winnt.c
36 * windows socknal library
41 # if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
42 static cfs_sysctl_table_t ksocknal_ctl_table[21];
44 cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
47 /* procname */ "socknal",
51 /* child */ ksocknal_ctl_table
57 ksocknal_lib_tunables_init ()
62 ksocknal_ctl_table[i].ctl_name = j++;
63 ksocknal_ctl_table[i].procname = "timeout";
64 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_timeout;
65 ksocknal_ctl_table[i].maxlen = sizeof (int);
66 ksocknal_ctl_table[i].mode = 0644;
67 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
70 ksocknal_ctl_table[i].ctl_name = j++;
71 ksocknal_ctl_table[i].procname = "credits";
72 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_credits;
73 ksocknal_ctl_table[i].maxlen = sizeof (int);
74 ksocknal_ctl_table[i].mode = 0444;
75 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
78 ksocknal_ctl_table[i].ctl_name = j++;
79 ksocknal_ctl_table[i].procname = "peer_credits";
80 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_peertxcredits;
81 ksocknal_ctl_table[i].maxlen = sizeof (int);
82 ksocknal_ctl_table[i].mode = 0444;
83 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
86 ksocknal_ctl_table[i].ctl_name = j++;
87 ksocknal_ctl_table[i].procname = "peer_buffer_credits";
88 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_peerrtrcredits;
89 ksocknal_ctl_table[i].maxlen = sizeof (int);
90 ksocknal_ctl_table[i].mode = 0444;
91 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
94 ksocknal_ctl_table[i].ctl_name = j++;
95 ksocknal_ctl_table[i].procname = "nconnds";
96 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_nconnds;
97 ksocknal_ctl_table[i].maxlen = sizeof (int);
98 ksocknal_ctl_table[i].mode = 0444;
99 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
103 ksocknal_ctl_table[i].ctl_name = j++;
104 ksocknal_ctl_table[i].procname = "min_reconnectms";
105 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_min_reconnectms;
106 ksocknal_ctl_table[i].maxlen = sizeof (int);
107 ksocknal_ctl_table[i].mode = 0444;
108 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
111 ksocknal_ctl_table[i].ctl_name = j++;
112 ksocknal_ctl_table[i].procname = "max_reconnectms";
113 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_max_reconnectms;
114 ksocknal_ctl_table[i].maxlen = sizeof (int);
115 ksocknal_ctl_table[i].mode = 0444;
116 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
119 ksocknal_ctl_table[i].ctl_name = j++;
120 ksocknal_ctl_table[i].procname = "eager_ack";
121 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_eager_ack;
122 ksocknal_ctl_table[i].maxlen = sizeof (int);
123 ksocknal_ctl_table[i].mode = 0644;
124 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
127 ksocknal_ctl_table[i].ctl_name = j++;
128 ksocknal_ctl_table[i].procname = "zero_copy";
129 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_zc_min_payload;
130 ksocknal_ctl_table[i].maxlen = sizeof (int);
131 ksocknal_ctl_table[i].mode = 0644;
132 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
135 ksocknal_ctl_table[i].ctl_name = j++;
136 ksocknal_ctl_table[i].procname = "typed";
137 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_typed_conns;
138 ksocknal_ctl_table[i].maxlen = sizeof (int);
139 ksocknal_ctl_table[i].mode = 0444;
140 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
143 ksocknal_ctl_table[i].ctl_name = j++;
144 ksocknal_ctl_table[i].procname = "min_bulk";
145 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_min_bulk;
146 ksocknal_ctl_table[i].maxlen = sizeof (int);
147 ksocknal_ctl_table[i].mode = 0644;
148 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
151 ksocknal_ctl_table[i].ctl_name = j++;
152 ksocknal_ctl_table[i].procname = "rx_buffer_size";
153 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_rx_buffer_size;
154 ksocknal_ctl_table[i].maxlen = sizeof(int);
155 ksocknal_ctl_table[i].mode = 0644;
156 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
159 ksocknal_ctl_table[i].ctl_name = j++;
160 ksocknal_ctl_table[i].procname = "tx_buffer_size";
161 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_tx_buffer_size;
162 ksocknal_ctl_table[i].maxlen = sizeof(int);
163 ksocknal_ctl_table[i].mode = 0644;
164 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
167 ksocknal_ctl_table[i].ctl_name = j++;
168 ksocknal_ctl_table[i].procname = "nagle";
169 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_nagle;
170 ksocknal_ctl_table[i].maxlen = sizeof(int);
171 ksocknal_ctl_table[i].mode = 0644;
172 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
175 ksocknal_ctl_table[i].ctl_name = j++;
176 ksocknal_ctl_table[i].procname = "round_robin";
177 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_round_robin;
178 ksocknal_ctl_table[i].maxlen = sizeof(int);
179 ksocknal_ctl_table[i].mode = 0644;
180 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
184 ksocknal_ctl_table[i].ctl_name = j++;
185 ksocknal_ctl_table[i].procname = "irq_affinity";
186 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_irq_affinity;
187 ksocknal_ctl_table[i].maxlen = sizeof(int);
188 ksocknal_ctl_table[i].mode = 0644;
189 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
193 ksocknal_ctl_table[i].ctl_name = j++;
194 ksocknal_ctl_table[i].procname = "keepalive_idle";
195 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_idle;
196 ksocknal_ctl_table[i].maxlen = sizeof(int);
197 ksocknal_ctl_table[i].mode = 0644;
198 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
201 ksocknal_ctl_table[i].ctl_name = j++;
202 ksocknal_ctl_table[i].procname = "keepalive_count";
203 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_count;
204 ksocknal_ctl_table[i].maxlen = sizeof(int);
205 ksocknal_ctl_table[i].mode = 0644;
206 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
209 ksocknal_ctl_table[i].ctl_name = j++;
210 ksocknal_ctl_table[i].procname = "keepalive_intvl";
211 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_intvl;
212 ksocknal_ctl_table[i].maxlen = sizeof(int);
213 ksocknal_ctl_table[i].mode = 0644;
214 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
217 #ifdef SOCKNAL_BACKOFF
218 ksocknal_ctl_table[i].ctl_name = j++;
219 ksocknal_ctl_table[i].procname = "backoff_init";
220 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_backoff_init;
221 ksocknal_ctl_table[i].maxlen = sizeof(int);
222 ksocknal_ctl_table[i].mode = 0644;
223 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
226 ksocknal_ctl_table[i].ctl_name = j++;
227 ksocknal_ctl_table[i].procname = "backoff_max";
228 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_backoff_max;
229 ksocknal_ctl_table[i].maxlen = sizeof(int);
230 ksocknal_ctl_table[i].mode = 0644;
231 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
235 #if SOCKNAL_VERSION_DEBUG
236 ksocknal_ctl_table[i].ctl_name = j++;
237 ksocknal_ctl_table[i].procname = "protocol";
238 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_protocol;
239 ksocknal_ctl_table[i].maxlen = sizeof(int);
240 ksocknal_ctl_table[i].mode = 0644;
241 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
245 LASSERT (j == i + 1);
246 LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
248 ksocknal_tunables.ksnd_sysctl =
249 cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
251 if (ksocknal_tunables.ksnd_sysctl == NULL)
252 CWARN("Can't setup /proc tunables\n");
258 ksocknal_lib_tunables_fini ()
260 if (ksocknal_tunables.ksnd_sysctl != NULL)
261 cfs_unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
265 ksocknal_lib_tunables_init ()
271 ksocknal_lib_tunables_fini ()
274 #endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */
277 ksocknal_lib_bind_irq (unsigned int irq)
282 ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
284 int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
288 /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
289 LASSERT (!conn->ksnc_closing);
292 CERROR ("Error %d getting sock peer IP\n", rc);
296 rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
297 &conn->ksnc_myipaddr, NULL);
299 CERROR ("Error %d getting sock local IP\n", rc);
307 ksocknal_lib_sock_irq (struct socket *sock)
313 ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
315 struct socket *sock = conn->ksnc_sock;
322 if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
323 conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
324 tx->tx_nob == tx->tx_resid && /* frist sending */
325 tx->tx_msg.ksm_csum == 0) /* not checksummed */
326 ksocknal_lib_csum_tx(tx);
328 nob = ks_query_iovs_length(tx->tx_iov, tx->tx_niov);
329 flags = (!cfs_list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ?
330 (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT;
331 rc = ks_send_iovs(sock, tx->tx_iov, tx->tx_niov, flags, 0);
333 KsPrint((4, "ksocknal_lib_send_iov: conn %p sock %p rc %d\n",
339 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
341 struct socket *sock = conn->ksnc_sock;
342 lnet_kiov_t *kiov = tx->tx_kiov;
348 nkiov = tx->tx_nkiov;
349 nob = ks_query_kiovs_length(tx->tx_kiov, nkiov);
350 flags = (!cfs_list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ?
351 (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT;
352 rc = ks_send_kiovs(sock, tx->tx_kiov, nkiov, flags, 0);
354 KsPrint((4, "ksocknal_lib_send_kiov: conn %p sock %p rc %d\n",
360 ksocknal_lib_recv_iov (ksock_conn_t *conn)
362 struct iovec *iov = conn->ksnc_rx_iov;
366 /* receive payload from tsdu queue */
367 rc = ks_recv_iovs (conn->ksnc_sock, iov, conn->ksnc_rx_niov,
370 /* calcuate package checksum */
376 __u32 saved_csum = 0;
378 if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
379 saved_csum = conn->ksnc_msg.ksm_csum;
380 conn->ksnc_msg.ksm_csum = 0;
383 if (saved_csum != 0) {
385 /* accumulate checksum */
386 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
387 LASSERT (i < conn->ksnc_rx_niov);
389 fragnob = iov[i].iov_len;
393 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
394 iov[i].iov_base, fragnob);
396 conn->ksnc_msg.ksm_csum = saved_csum;
400 KsPrint((4, "ksocknal_lib_recv_iov: conn %p sock %p rc %d.\n",
401 conn, conn->ksnc_sock, rc));
406 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
408 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
411 /* NB we can't trust socket ops to either consume our iovs
412 * or leave them alone, so we only receive 1 frag at a time. */
413 LASSERT (conn->ksnc_rx_nkiov > 0);
415 /* receive payload from tsdu queue */
416 rc = ks_recv_kiovs (conn->ksnc_sock, kiov, conn->ksnc_rx_nkiov,
419 if (rc > 0 && conn->ksnc_msg.ksm_csum != 0) {
426 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
428 LASSERT (i < conn->ksnc_rx_nkiov);
430 base = (char *)(kiov[i].kiov_page->addr) + kiov[i].kiov_offset;
431 fragnob = kiov[i].kiov_len;
435 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
440 KsPrint((4, "ksocknal_lib_recv_kiov: conn %p sock %p rc %d.\n",
441 conn, conn->ksnc_sock, rc));
446 ksocknal_lib_eager_ack (ksock_conn_t *conn)
451 rc = ks_set_tcp_option(
452 conn->ksnc_sock, TCP_SOCKET_NODELAY,
453 &option, sizeof(option) );
455 CERROR("Can't disable nagle: %d\n", rc);
460 ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
462 ks_tconn_t * tconn = conn->ksnc_sock;
466 ks_get_tconn (tconn);
468 len = sizeof(*nagle);
469 rc = ks_get_tcp_option(tconn, TCP_SOCKET_NODELAY, (__u32 *)nagle, &len);
470 ks_put_tconn (tconn);
472 KsPrint((2, "ksocknal_get_conn_tunables: nodelay = %d rc = %d\n", *nagle, rc));
477 *txmem = *rxmem = *nagle = 0;
483 ksocknal_lib_setup_sock (struct socket *sock)
495 /* set the window size */
496 tconn->kstc_snd_wnd = ksocknal_tunables.ksnd_buffer_size;
497 tconn->kstc_rcv_wnd = ksocknal_tunables.ksnd_buffer_size;
501 if (!ksocknal_tunables.ksnd_nagle) {
504 rc = ks_set_tcp_option(
505 sock, TCP_SOCKET_NODELAY,
506 &option, sizeof (option));
508 CERROR ("Can't disable nagle: %d\n", rc);
513 /* snapshot tunables */
514 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
515 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
516 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
518 keep_alive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
520 option = (__u32)(keep_alive ? 1 : 0);
522 rc = ks_set_tcp_option(
523 sock, TCP_SOCKET_KEEPALIVE,
524 &option, sizeof (option));
526 CERROR ("Can't disable nagle: %d\n", rc);
534 ksocknal_lib_push_conn (ksock_conn_t *conn)
541 tconn = conn->ksnc_sock;
545 spin_lock(&tconn->kstc_lock);
546 if (tconn->kstc_type == kstt_sender) {
547 nagle = tconn->sender.kstc_info.nagle;
548 tconn->sender.kstc_info.nagle = 0;
550 LASSERT(tconn->kstc_type == kstt_child);
551 nagle = tconn->child.kstc_info.nagle;
552 tconn->child.kstc_info.nagle = 0;
555 spin_unlock(&tconn->kstc_lock);
558 rc = ks_set_tcp_option(
566 spin_lock(&tconn->kstc_lock);
568 if (tconn->kstc_type == kstt_sender) {
569 tconn->sender.kstc_info.nagle = nagle;
571 LASSERT(tconn->kstc_type == kstt_child);
572 tconn->child.kstc_info.nagle = nagle;
574 spin_unlock(&tconn->kstc_lock);
579 ksocknal_lib_csum_tx(ksock_tx_t *tx)
585 LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg);
586 LASSERT(tx->tx_conn != NULL);
587 LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
589 tx->tx_msg.ksm_csum = 0;
591 csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base,
592 tx->tx_iov[0].iov_len);
594 if (tx->tx_kiov != NULL) {
595 for (i = 0; i < tx->tx_nkiov; i++) {
596 base = (PUCHAR)(tx->tx_kiov[i].kiov_page->addr) +
597 tx->tx_kiov[i].kiov_offset;
599 csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
602 for (i = 1; i < tx->tx_niov; i++)
603 csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base,
604 tx->tx_iov[i].iov_len);
607 if (*ksocknal_tunables.ksnd_inject_csum_error) {
609 *ksocknal_tunables.ksnd_inject_csum_error = 0;
612 tx->tx_msg.ksm_csum = csum;
615 void ksocknal_schedule_callback(struct socket*sock, int mode)
617 ksock_conn_t * conn = (ksock_conn_t *) sock->kstc_conn;
619 read_lock(&ksocknal_data.ksnd_global_lock);
621 ksocknal_write_callback(conn);
623 ksocknal_read_callback(conn);
625 read_unlock(&ksocknal_data.ksnd_global_lock);
629 ksocknal_tx_fini_callback(ksock_conn_t * conn, ksock_tx_t * tx)
631 /* remove tx/conn from conn's outgoing queue */
632 spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
633 cfs_list_del(&tx->tx_list);
634 if (cfs_list_empty(&conn->ksnc_tx_queue))
635 cfs_list_del(&conn->ksnc_tx_list);
637 spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
639 /* complete send; tx -ref */
640 ksocknal_tx_decref(tx);
644 ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
649 ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
651 sock->kstc_conn = conn;
652 sock->kstc_sched_cb = ksocknal_schedule_callback;
656 ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
658 sock->kstc_conn = NULL;
659 sock->kstc_sched_cb = NULL;
663 ksocknal_lib_zc_capable(ksock_conn_t *conn)
669 ksocknal_lib_memory_pressure(ksock_conn_t *conn)
675 ksocknal_lib_bind_thread_to_cpu(int id)