1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/klnds/socklnd/socklnd_lib-winnt.c
38 * windows socknal library
43 # if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
44 static cfs_sysctl_table_t ksocknal_ctl_table[21];
46 cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
49 /* procname */ "socknal",
53 /* child */ ksocknal_ctl_table
59 ksocknal_lib_tunables_init ()
64 ksocknal_ctl_table[i].ctl_name = j++;
65 ksocknal_ctl_table[i].procname = "timeout";
66 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_timeout;
67 ksocknal_ctl_table[i].maxlen = sizeof (int);
68 ksocknal_ctl_table[i].mode = 0644;
69 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
72 ksocknal_ctl_table[i].ctl_name = j++;
73 ksocknal_ctl_table[i].procname = "credits";
74 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_credits;
75 ksocknal_ctl_table[i].maxlen = sizeof (int);
76 ksocknal_ctl_table[i].mode = 0444;
77 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
80 ksocknal_ctl_table[i].ctl_name = j++;
81 ksocknal_ctl_table[i].procname = "peer_credits";
82 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_peercredits;
83 ksocknal_ctl_table[i].maxlen = sizeof (int);
84 ksocknal_ctl_table[i].mode = 0444;
85 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
88 ksocknal_ctl_table[i].ctl_name = j++;
89 ksocknal_ctl_table[i].procname = "nconnds";
90 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_nconnds;
91 ksocknal_ctl_table[i].maxlen = sizeof (int);
92 ksocknal_ctl_table[i].mode = 0444;
93 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
97 ksocknal_ctl_table[i].ctl_name = j++;
98 ksocknal_ctl_table[i].procname = "min_reconnectms";
99 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_min_reconnectms;
100 ksocknal_ctl_table[i].maxlen = sizeof (int);
101 ksocknal_ctl_table[i].mode = 0444;
102 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
105 ksocknal_ctl_table[i].ctl_name = j++;
106 ksocknal_ctl_table[i].procname = "max_reconnectms";
107 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_max_reconnectms;
108 ksocknal_ctl_table[i].maxlen = sizeof (int);
109 ksocknal_ctl_table[i].mode = 0444;
110 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
113 ksocknal_ctl_table[i].ctl_name = j++;
114 ksocknal_ctl_table[i].procname = "eager_ack";
115 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_eager_ack;
116 ksocknal_ctl_table[i].maxlen = sizeof (int);
117 ksocknal_ctl_table[i].mode = 0644;
118 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
121 ksocknal_ctl_table[i].ctl_name = j++;
122 ksocknal_ctl_table[i].procname = "zero_copy";
123 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_zc_min_payload;
124 ksocknal_ctl_table[i].maxlen = sizeof (int);
125 ksocknal_ctl_table[i].mode = 0644;
126 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
129 ksocknal_ctl_table[i].ctl_name = j++;
130 ksocknal_ctl_table[i].procname = "typed";
131 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_typed_conns;
132 ksocknal_ctl_table[i].maxlen = sizeof (int);
133 ksocknal_ctl_table[i].mode = 0444;
134 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
137 ksocknal_ctl_table[i].ctl_name = j++;
138 ksocknal_ctl_table[i].procname = "min_bulk";
139 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_min_bulk;
140 ksocknal_ctl_table[i].maxlen = sizeof (int);
141 ksocknal_ctl_table[i].mode = 0644;
142 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
145 ksocknal_ctl_table[i].ctl_name = j++;
146 ksocknal_ctl_table[i].procname = "rx_buffer_size";
147 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_rx_buffer_size;
148 ksocknal_ctl_table[i].maxlen = sizeof(int);
149 ksocknal_ctl_table[i].mode = 0644;
150 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
153 ksocknal_ctl_table[i].ctl_name = j++;
154 ksocknal_ctl_table[i].procname = "tx_buffer_size";
155 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_tx_buffer_size;
156 ksocknal_ctl_table[i].maxlen = sizeof(int);
157 ksocknal_ctl_table[i].mode = 0644;
158 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
161 ksocknal_ctl_table[i].ctl_name = j++;
162 ksocknal_ctl_table[i].procname = "nagle";
163 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_nagle;
164 ksocknal_ctl_table[i].maxlen = sizeof(int);
165 ksocknal_ctl_table[i].mode = 0644;
166 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
169 ksocknal_ctl_table[i].ctl_name = j++;
170 ksocknal_ctl_table[i].procname = "round_robin";
171 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_round_robin;
172 ksocknal_ctl_table[i].maxlen = sizeof(int);
173 ksocknal_ctl_table[i].mode = 0644;
174 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
178 ksocknal_ctl_table[i].ctl_name = j++;
179 ksocknal_ctl_table[i].procname = "irq_affinity";
180 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_irq_affinity;
181 ksocknal_ctl_table[i].maxlen = sizeof(int);
182 ksocknal_ctl_table[i].mode = 0644;
183 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
187 ksocknal_ctl_table[i].ctl_name = j++;
188 ksocknal_ctl_table[i].procname = "keepalive_idle";
189 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_idle;
190 ksocknal_ctl_table[i].maxlen = sizeof(int);
191 ksocknal_ctl_table[i].mode = 0644;
192 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
195 ksocknal_ctl_table[i].ctl_name = j++;
196 ksocknal_ctl_table[i].procname = "keepalive_count";
197 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_count;
198 ksocknal_ctl_table[i].maxlen = sizeof(int);
199 ksocknal_ctl_table[i].mode = 0644;
200 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
203 ksocknal_ctl_table[i].ctl_name = j++;
204 ksocknal_ctl_table[i].procname = "keepalive_intvl";
205 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_keepalive_intvl;
206 ksocknal_ctl_table[i].maxlen = sizeof(int);
207 ksocknal_ctl_table[i].mode = 0644;
208 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
211 #ifdef SOCKNAL_BACKOFF
212 ksocknal_ctl_table[i].ctl_name = j++;
213 ksocknal_ctl_table[i].procname = "backoff_init";
214 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_backoff_init;
215 ksocknal_ctl_table[i].maxlen = sizeof(int);
216 ksocknal_ctl_table[i].mode = 0644;
217 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
220 ksocknal_ctl_table[i].ctl_name = j++;
221 ksocknal_ctl_table[i].procname = "backoff_max";
222 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_backoff_max;
223 ksocknal_ctl_table[i].maxlen = sizeof(int);
224 ksocknal_ctl_table[i].mode = 0644;
225 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
229 #if SOCKNAL_VERSION_DEBUG
230 ksocknal_ctl_table[i].ctl_name = j++;
231 ksocknal_ctl_table[i].procname = "protocol";
232 ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_protocol;
233 ksocknal_ctl_table[i].maxlen = sizeof(int);
234 ksocknal_ctl_table[i].mode = 0644;
235 ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
239 LASSERT (j == i + 1);
240 LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
242 ksocknal_tunables.ksnd_sysctl =
243 cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
245 if (ksocknal_tunables.ksnd_sysctl == NULL)
246 CWARN("Can't setup /proc tunables\n");
252 ksocknal_lib_tunables_fini ()
254 if (ksocknal_tunables.ksnd_sysctl != NULL)
255 cfs_unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
259 ksocknal_lib_tunables_init ()
265 ksocknal_lib_tunables_fini ()
268 #endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */
271 ksocknal_lib_bind_irq (unsigned int irq)
276 ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
278 int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
282 /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
283 LASSERT (!conn->ksnc_closing);
286 CERROR ("Error %d getting sock peer IP\n", rc);
290 rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
291 &conn->ksnc_myipaddr, NULL);
293 CERROR ("Error %d getting sock local IP\n", rc);
301 ksocknal_lib_sock_irq (struct socket *sock)
307 ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
309 struct socket *sock = conn->ksnc_sock;
316 if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
317 conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
318 tx->tx_nob == tx->tx_resid && /* frist sending */
319 tx->tx_msg.ksm_csum == 0) /* not checksummed */
320 ksocknal_lib_csum_tx(tx);
322 nob = ks_query_iovs_length(tx->tx_iov, tx->tx_niov);
323 flags = (!list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ?
324 (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT;
325 rc = ks_send_iovs(sock, tx->tx_iov, tx->tx_niov, flags, 0);
327 KsPrint((4, "ksocknal_lib_send_iov: conn %p sock %p rc %d\n",
333 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
335 struct socket *sock = conn->ksnc_sock;
336 lnet_kiov_t *kiov = tx->tx_kiov;
342 nkiov = tx->tx_nkiov;
343 nob = ks_query_kiovs_length(tx->tx_kiov, nkiov);
344 flags = (!list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ?
345 (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT;
346 rc = ks_send_kiovs(sock, tx->tx_kiov, nkiov, flags, 0);
348 KsPrint((4, "ksocknal_lib_send_kiov: conn %p sock %p rc %d\n",
354 ksocknal_lib_recv_iov (ksock_conn_t *conn)
356 struct iovec *iov = conn->ksnc_rx_iov;
360 /* receive payload from tsdu queue */
361 rc = ks_recv_iovs (conn->ksnc_sock, iov, conn->ksnc_rx_niov,
364 /* calcuate package checksum */
370 __u32 saved_csum = 0;
372 if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
373 saved_csum = conn->ksnc_msg.ksm_csum;
374 conn->ksnc_msg.ksm_csum = 0;
377 if (saved_csum != 0) {
379 /* accumulate checksum */
380 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
381 LASSERT (i < conn->ksnc_rx_niov);
383 fragnob = iov[i].iov_len;
387 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
388 iov[i].iov_base, fragnob);
390 conn->ksnc_msg.ksm_csum = saved_csum;
394 KsPrint((4, "ksocknal_lib_recv_iov: conn %p sock %p rc %d.\n",
395 conn, conn->ksnc_sock, rc));
400 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
402 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
405 /* NB we can't trust socket ops to either consume our iovs
406 * or leave them alone, so we only receive 1 frag at a time. */
407 LASSERT (conn->ksnc_rx_nkiov > 0);
409 /* receive payload from tsdu queue */
410 rc = ks_recv_kiovs (conn->ksnc_sock, kiov, conn->ksnc_rx_nkiov,
413 if (rc > 0 && conn->ksnc_msg.ksm_csum != 0) {
420 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
422 LASSERT (i < conn->ksnc_rx_nkiov);
424 base = (char *)(kiov[i].kiov_page->addr) + kiov[i].kiov_offset;
425 fragnob = kiov[i].kiov_len;
429 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
434 KsPrint((4, "ksocknal_lib_recv_kiov: conn %p sock %p rc %d.\n",
435 conn, conn->ksnc_sock, rc));
440 ksocknal_lib_eager_ack (ksock_conn_t *conn)
445 rc = ks_set_tcp_option(
446 conn->ksnc_sock, TCP_SOCKET_NODELAY,
447 &option, sizeof(option) );
449 CERROR("Can't disable nagle: %d\n", rc);
454 ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
456 ks_tconn_t * tconn = conn->ksnc_sock;
460 ks_get_tconn (tconn);
462 len = sizeof(*nagle);
463 rc = ks_get_tcp_option(tconn, TCP_SOCKET_NODELAY, (__u32 *)nagle, &len);
464 ks_put_tconn (tconn);
466 KsPrint((2, "ksocknal_get_conn_tunables: nodelay = %d rc = %d\n", *nagle, rc));
471 *txmem = *rxmem = *nagle = 0;
477 ksocknal_lib_setup_sock (struct socket *sock)
489 /* set the window size */
490 tconn->kstc_snd_wnd = ksocknal_tunables.ksnd_buffer_size;
491 tconn->kstc_rcv_wnd = ksocknal_tunables.ksnd_buffer_size;
495 if (!ksocknal_tunables.ksnd_nagle) {
498 rc = ks_set_tcp_option(
499 sock, TCP_SOCKET_NODELAY,
500 &option, sizeof (option));
502 CERROR ("Can't disable nagle: %d\n", rc);
507 /* snapshot tunables */
508 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
509 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
510 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
512 keep_alive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
514 option = (__u32)(keep_alive ? 1 : 0);
516 rc = ks_set_tcp_option(
517 sock, TCP_SOCKET_KEEPALIVE,
518 &option, sizeof (option));
520 CERROR ("Can't disable nagle: %d\n", rc);
528 ksocknal_lib_push_conn (ksock_conn_t *conn)
535 tconn = conn->ksnc_sock;
539 spin_lock(&tconn->kstc_lock);
540 if (tconn->kstc_type == kstt_sender) {
541 nagle = tconn->sender.kstc_info.nagle;
542 tconn->sender.kstc_info.nagle = 0;
544 LASSERT(tconn->kstc_type == kstt_child);
545 nagle = tconn->child.kstc_info.nagle;
546 tconn->child.kstc_info.nagle = 0;
549 spin_unlock(&tconn->kstc_lock);
552 rc = ks_set_tcp_option(
560 spin_lock(&tconn->kstc_lock);
562 if (tconn->kstc_type == kstt_sender) {
563 tconn->sender.kstc_info.nagle = nagle;
565 LASSERT(tconn->kstc_type == kstt_child);
566 tconn->child.kstc_info.nagle = nagle;
568 spin_unlock(&tconn->kstc_lock);
573 ksocknal_lib_csum_tx(ksock_tx_t *tx)
579 LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg);
580 LASSERT(tx->tx_conn != NULL);
581 LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
583 tx->tx_msg.ksm_csum = 0;
585 csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base,
586 tx->tx_iov[0].iov_len);
588 if (tx->tx_kiov != NULL) {
589 for (i = 0; i < tx->tx_nkiov; i++) {
590 base = (PUCHAR)(tx->tx_kiov[i].kiov_page->addr) +
591 tx->tx_kiov[i].kiov_offset;
593 csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
596 for (i = 1; i < tx->tx_niov; i++)
597 csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base,
598 tx->tx_iov[i].iov_len);
601 if (*ksocknal_tunables.ksnd_inject_csum_error) {
603 *ksocknal_tunables.ksnd_inject_csum_error = 0;
606 tx->tx_msg.ksm_csum = csum;
609 void ksocknal_schedule_callback(struct socket*sock, int mode)
611 ksock_conn_t * conn = (ksock_conn_t *) sock->kstc_conn;
613 read_lock (&ksocknal_data.ksnd_global_lock);
615 ksocknal_write_callback(conn);
617 ksocknal_read_callback(conn);
619 read_unlock (&ksocknal_data.ksnd_global_lock);
623 ksocknal_tx_fini_callback(ksock_conn_t * conn, ksock_tx_t * tx)
625 /* remove tx/conn from conn's outgoing queue */
626 spin_lock_bh (&conn->ksnc_scheduler->kss_lock);
627 list_del(&tx->tx_list);
628 if (list_empty(&conn->ksnc_tx_queue)) {
629 list_del (&conn->ksnc_tx_list);
631 spin_unlock_bh (&conn->ksnc_scheduler->kss_lock);
633 /* complete send; tx -ref */
634 ksocknal_tx_decref (tx);
638 ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
643 ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
645 sock->kstc_conn = conn;
646 sock->kstc_sched_cb = ksocknal_schedule_callback;
650 ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
652 sock->kstc_conn = NULL;
653 sock->kstc_sched_cb = NULL;
657 ksocknal_lib_zc_capable(ksock_conn_t *conn)
663 ksocknal_lib_memory_pressure(ksock_conn_t *conn)
669 ksocknal_lib_bind_thread_to_cpu(int id)