4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
31 * This file is part of Lustre, http://www.lustre.org/
32 * Lustre is a trademark of Sun Microsystems, Inc.
34 * lnet/klnds/socklnd/socklnd_lib-darwin.c
36 * Darwin porting library
37 * Make things easy to port
39 * Author: Phil Schwan <phil@clusterfs.com>
41 #include <mach/mach_types.h>
43 #include <netinet/in.h>
44 #include <netinet/tcp.h>
49 # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
53 SYSCTL_NODE (_lnet, OID_AUTO, ksocknal, CTLFLAG_RW,
54 0, "ksocknal_sysctl");
56 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, timeout,
57 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_timeout,
59 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, credits,
60 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_credits,
62 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, peer_credits,
63 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_peertxcredits,
65 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nconnds,
66 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nconnds,
68 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_reconnectms,
69 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_reconnectms,
70 0, "min_reconnectms");
71 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, max_reconnectms,
72 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_max_reconnectms,
73 0, "max_reconnectms");
74 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, eager_ack,
75 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_eager_ack,
77 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, typed,
78 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_typed_conns,
80 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_bulk,
81 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_bulk,
83 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, rx_buffer_size,
84 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_rx_buffer_size,
86 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, tx_buffer_size,
87 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_tx_buffer_size,
89 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nagle,
90 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nagle,
92 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_idle,
93 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_idle,
95 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_count,
96 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_count,
97 0, "keepalive_count");
98 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_intvl,
99 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_intvl,
100 0, "keepalive_intvl");
102 cfs_sysctl_table_t ksocknal_top_ctl_table [] = {
103 &sysctl__lnet_ksocknal,
104 &sysctl__lnet_ksocknal_timeout,
105 &sysctl__lnet_ksocknal_credits,
106 &sysctl__lnet_ksocknal_peer_credits,
107 &sysctl__lnet_ksocknal_nconnds,
108 &sysctl__lnet_ksocknal_min_reconnectms,
109 &sysctl__lnet_ksocknal_max_reconnectms,
110 &sysctl__lnet_ksocknal_eager_ack,
111 &sysctl__lnet_ksocknal_typed,
112 &sysctl__lnet_ksocknal_min_bulk,
113 &sysctl__lnet_ksocknal_rx_buffer_size,
114 &sysctl__lnet_ksocknal_tx_buffer_size,
115 &sysctl__lnet_ksocknal_nagle,
116 &sysctl__lnet_ksocknal_keepalive_idle,
117 &sysctl__lnet_ksocknal_keepalive_count,
118 &sysctl__lnet_ksocknal_keepalive_intvl,
123 ksocknal_lib_tunables_init ()
125 ksocknal_tunables.ksnd_sysctl =
126 cfs_register_sysctl_table (ksocknal_top_ctl_table, 0);
128 if (ksocknal_tunables.ksnd_sysctl == NULL)
135 ksocknal_lib_tunables_fini ()
137 if (ksocknal_tunables.ksnd_sysctl != NULL)
138 cfs_unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl);
142 ksocknal_lib_tunables_init ()
148 ksocknal_lib_tunables_fini ()
154 * To use bigger buffer for socket:
155 * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so
156 * we must patch kernel).
157 * 2. Increase net.inet.tcp.reass.maxsegments
158 * 3. Increase net.inet.tcp.sendspace
159 * 4. Increase net.inet.tcp.recvspace
160 * 5. Increase kern.ipc.maxsockbuf
162 #define KSOCKNAL_MAX_BUFFER (1152*1024)
165 ksocknal_lib_bind_irq (unsigned int irq)
171 ksocknal_lib_sock_irq (cfs_socket_t *sock)
177 ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
179 int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
183 /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
184 LASSERT (!conn->ksnc_closing);
187 CERROR ("Error %d getting sock peer IP\n", rc);
191 rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
192 &conn->ksnc_myipaddr, NULL);
194 CERROR ("Error %d getting sock local IP\n", rc);
204 ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
206 socket_t sock = C2B_SOCK(conn->ksnc_sock);
211 #if SOCKNAL_SINGLE_FRAG_TX
212 struct iovec scratch;
213 struct iovec *scratchiov = &scratch;
214 unsigned int niov = 1;
216 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
217 unsigned int niov = tx->tx_niov;
219 struct msghdr msg = {
222 .msg_iov = scratchiov,
226 .msg_flags = MSG_DONTWAIT
231 for (nob = i = 0; i < niov; i++) {
232 scratchiov[i] = tx->tx_iov[i];
233 nob += scratchiov[i].iov_len;
238 * Linux has MSG_MORE, do we have anything to
239 * reduce number of partial TCP segments sent?
241 rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen);
248 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
250 socket_t sock = C2B_SOCK(conn->ksnc_sock);
251 lnet_kiov_t *kiov = tx->tx_kiov;
256 #if SOCKNAL_SINGLE_FRAG_TX
257 struct iovec scratch;
258 struct iovec *scratchiov = &scratch;
259 unsigned int niov = 1;
261 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
262 unsigned int niov = tx->tx_nkiov;
264 struct msghdr msg = {
267 .msg_iov = scratchiov,
271 .msg_flags = MSG_DONTWAIT
276 for (nob = i = 0; i < niov; i++) {
277 scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) +
279 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
284 * Linux has MSG_MORE, do wen have anyting to
285 * reduce number of partial TCP segments sent?
287 rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen);
288 for (i = 0; i < niov; i++)
289 cfs_kunmap(kiov[i].kiov_page);
296 ksocknal_lib_recv_iov (ksock_conn_t *conn)
298 #if SOCKNAL_SINGLE_FRAG_RX
299 struct iovec scratch;
300 struct iovec *scratchiov = &scratch;
301 unsigned int niov = 1;
303 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
304 unsigned int niov = conn->ksnc_rx_niov;
306 struct iovec *iov = conn->ksnc_rx_iov;
307 struct msghdr msg = {
310 .msg_iov = scratchiov,
323 for (nob = i = 0; i < niov; i++) {
324 scratchiov[i] = iov[i];
325 nob += scratchiov[i].iov_len;
327 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
328 rc = -sock_receive (C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen);
336 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
338 #if SOCKNAL_SINGLE_FRAG_RX
339 struct iovec scratch;
340 struct iovec *scratchiov = &scratch;
341 unsigned int niov = 1;
343 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
344 unsigned int niov = conn->ksnc_rx_nkiov;
346 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
347 struct msghdr msg = {
350 .msg_iov = scratchiov,
361 /* NB we can't trust socket ops to either consume our iovs
362 * or leave them alone. */
363 for (nob = i = 0; i < niov; i++) {
364 scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + \
366 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
368 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
369 rc = -sock_receive(C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen);
370 for (i = 0; i < niov; i++)
371 cfs_kunmap(kiov[i].kiov_page);
378 ksocknal_lib_eager_ack (ksock_conn_t *conn)
384 ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
386 socket_t sock = C2B_SOCK(conn->ksnc_sock);
390 rc = ksocknal_connsock_addref(conn);
392 LASSERT (conn->ksnc_closing);
393 *txmem = *rxmem = *nagle = 0;
396 rc = libcfs_sock_getbuf(conn->ksnc_sock, txmem, rxmem);
398 len = sizeof(*nagle);
399 rc = -sock_getsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
402 ksocknal_connsock_decref(conn);
407 *txmem = *rxmem = *nagle = 0;
413 ksocknal_lib_setup_sock (cfs_socket_t *sock)
421 socket_t so = C2B_SOCK(sock);
422 struct linger linger;
424 /* Ensure this socket aborts active sends immediately when we close
428 rc = -sock_setsockopt(so, SOL_SOCKET, SO_LINGER, &linger, sizeof(linger));
430 CERROR ("Can't set SO_LINGER: %d\n", rc);
434 if (!*ksocknal_tunables.ksnd_nagle) {
436 rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option));
438 CERROR ("Can't disable nagle: %d\n", rc);
443 rc = libcfs_sock_setbuf(sock,
444 *ksocknal_tunables.ksnd_tx_buffer_size,
445 *ksocknal_tunables.ksnd_rx_buffer_size);
447 CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
448 *ksocknal_tunables.ksnd_tx_buffer_size,
449 *ksocknal_tunables.ksnd_rx_buffer_size, rc);
453 /* snapshot tunables */
454 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
455 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
456 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
458 do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
459 option = (do_keepalive ? 1 : 0);
461 rc = -sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &option, sizeof(option));
463 CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
469 rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_KEEPALIVE,
470 &keep_idle, sizeof(keep_idle));
476 ksocknal_lib_push_conn(ksock_conn_t *conn)
482 rc = ksocknal_connsock_addref(conn);
483 if (rc != 0) /* being shut down */
485 sock = C2B_SOCK(conn->ksnc_sock);
487 rc = -sock_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val));
490 ksocknal_connsock_decref(conn);
494 extern void ksocknal_read_callback (ksock_conn_t *conn);
495 extern void ksocknal_write_callback (ksock_conn_t *conn);
498 ksocknal_upcall(socket_t so, void *arg, int waitf)
500 ksock_conn_t *conn = (ksock_conn_t *)arg;
503 read_lock (&ksocknal_data.ksnd_global_lock);
507 ksocknal_read_callback (conn);
509 ksocknal_write_callback (conn);
511 read_unlock (&ksocknal_data.ksnd_global_lock);
516 ksocknal_lib_save_callback(cfs_socket_t *sock, ksock_conn_t *conn)
518 /* No callback need to save in osx */
523 ksocknal_lib_set_callback(cfs_socket_t *sock, ksock_conn_t *conn)
525 libcfs_sock_set_cb(sock, ksocknal_upcall, (void *)conn);
530 ksocknal_lib_reset_callback(cfs_socket_t *sock, ksock_conn_t *conn)
532 libcfs_sock_reset_cb(sock);
535 #else /* !__DARWIN8__ */
538 ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
540 #if SOCKNAL_SINGLE_FRAG_TX
541 struct iovec scratch;
542 struct iovec *scratchiov = &scratch;
543 unsigned int niov = 1;
545 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
546 unsigned int niov = tx->tx_niov;
548 struct socket *sock = conn->ksnc_sock;
553 .uio_iov = scratchiov,
556 .uio_resid = 0, /* This will be valued after a while */
557 .uio_segflg = UIO_SYSSPACE,
561 int flags = MSG_DONTWAIT;
564 for (nob = i = 0; i < niov; i++) {
565 scratchiov[i] = tx->tx_iov[i];
566 nob += scratchiov[i].iov_len;
568 suio.uio_resid = nob;
571 rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags);
574 /* NB there is no return value can indicate how many
575 * have been sent and how many resid, we have to get
576 * sent bytes from suio. */
578 if (suio.uio_resid != nob &&\
579 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
580 /* We have sent something */
581 rc = nob - suio.uio_resid;
582 else if ( rc == EWOULDBLOCK )
583 /* Actually, EAGAIN and EWOULDBLOCK have same value in OSX */
588 rc = nob - suio.uio_resid;
594 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
596 #if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
597 struct iovec scratch;
598 struct iovec *scratchiov = &scratch;
599 unsigned int niov = 1;
601 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
602 unsigned int niov = tx->tx_nkiov;
604 struct socket *sock = conn->ksnc_sock;
605 lnet_kiov_t *kiov = tx->tx_kiov;
610 .uio_iov = scratchiov,
613 .uio_resid = 0, /* It should be valued after a while */
614 .uio_segflg = UIO_SYSSPACE,
618 int flags = MSG_DONTWAIT;
621 for (nob = i = 0; i < niov; i++) {
622 scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) +
624 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
626 suio.uio_resid = nob;
629 rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags);
632 for (i = 0; i < niov; i++)
633 cfs_kunmap(kiov[i].kiov_page);
636 if (suio.uio_resid != nob &&\
637 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
638 /* We have sent something */
639 rc = nob - suio.uio_resid;
640 else if ( rc == EWOULDBLOCK )
641 /* EAGAIN and EWOULD BLOCK have same value in OSX */
646 rc = nob - suio.uio_resid;
652 * liang: Hack of inpcb and tcpcb.
653 * To get tcpcb of a socket, and call tcp_output
664 LIST_HEAD(ks_tsegqe_head, ks_tseg_qent);
667 struct ks_tsegqe_head t_segq;
669 struct ks_tcptemp *unused;
671 struct inpcb *t_inpcb;
675 * There are more fields but we dont need
680 #define TF_ACKNOW 0x00001
681 #define TF_DELACK 0x00002
684 LIST_ENTRY(ks_inpcb) inp_hash;
685 struct in_addr reserved1;
686 struct in_addr reserved2;
689 LIST_ENTRY(inpcb) inp_list;
692 * There are more fields but we dont need
697 #define ks_sotoinpcb(so) ((struct ks_inpcb *)(so)->so_pcb)
698 #define ks_intotcpcb(ip) ((struct ks_tcpcb *)(ip)->inp_ppcb)
699 #define ks_sototcpcb(so) (intotcpcb(sotoinpcb(so)))
702 ksocknal_lib_eager_ack (ksock_conn_t *conn)
704 struct socket *sock = conn->ksnc_sock;
705 struct ks_inpcb *inp = ks_sotoinpcb(sock);
706 struct ks_tcpcb *tp = ks_intotcpcb(inp);
710 extern int tcp_output(register struct ks_tcpcb *tp);
716 * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo
717 * to send immediate ACK.
719 if (tp && tp->t_flags & TF_DELACK){
720 tp->t_flags &= ~TF_DELACK;
721 tp->t_flags |= TF_ACKNOW;
722 (void) tcp_output(tp);
732 ksocknal_lib_recv_iov (ksock_conn_t *conn)
734 #if SOCKNAL_SINGLE_FRAG_RX
735 struct iovec scratch;
736 struct iovec *scratchiov = &scratch;
737 unsigned int niov = 1;
739 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
740 unsigned int niov = conn->ksnc_rx_niov;
742 struct iovec *iov = conn->ksnc_rx_iov;
747 .uio_iov = scratchiov,
750 .uio_resid = 0, /* It should be valued after a while */
751 .uio_segflg = UIO_SYSSPACE,
755 int flags = MSG_DONTWAIT;
758 for (nob = i = 0; i < niov; i++) {
759 scratchiov[i] = iov[i];
760 nob += scratchiov[i].iov_len;
762 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
764 ruio.uio_resid = nob;
767 rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, &flags);
770 if (ruio.uio_resid != nob && \
771 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK || rc == EAGAIN))
772 /* data particially received */
773 rc = nob - ruio.uio_resid;
774 else if (rc == EWOULDBLOCK)
775 /* EAGAIN and EWOULD BLOCK have same value in OSX */
780 rc = nob - ruio.uio_resid;
786 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
788 #if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
789 struct iovec scratch;
790 struct iovec *scratchiov = &scratch;
791 unsigned int niov = 1;
793 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
794 unsigned int niov = conn->ksnc_rx_nkiov;
796 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
801 .uio_iov = scratchiov,
805 .uio_segflg = UIO_SYSSPACE,
809 int flags = MSG_DONTWAIT;
812 for (nob = i = 0; i < niov; i++) {
813 scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
814 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
816 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
818 ruio.uio_resid = nob;
821 rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, NULL, &flags);
824 for (i = 0; i < niov; i++)
825 cfs_kunmap(kiov[i].kiov_page);
828 if (ruio.uio_resid != nob && \
829 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
830 /* data particially received */
831 rc = nob - ruio.uio_resid;
832 else if (rc == EWOULDBLOCK)
833 /* receive blocked, EWOULDBLOCK == EAGAIN */
838 rc = nob - ruio.uio_resid;
844 ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
846 struct socket *sock = conn->ksnc_sock;
849 rc = ksocknal_connsock_addref(conn);
851 LASSERT (conn->ksnc_closing);
852 *txmem = *rxmem = *nagle = 0;
855 rc = libcfs_sock_getbuf(sock, txmem, rxmem);
861 len = sizeof(*nagle);
862 bzero(&sopt, sizeof sopt);
863 sopt.sopt_dir = SOPT_GET;
864 sopt.sopt_level = IPPROTO_TCP;
865 sopt.sopt_name = TCP_NODELAY;
866 sopt.sopt_val = nagle;
867 sopt.sopt_valsize = len;
870 rc = -sogetopt(sock, &sopt);
874 ksocknal_connsock_decref(conn);
879 *txmem = *rxmem = *nagle = 0;
884 ksocknal_lib_setup_sock (struct socket *so)
893 struct linger linger;
896 rc = libcfs_sock_setbuf(so,
897 *ksocknal_tunables.ksnd_tx_buffer_size,
898 *ksocknal_tunables.ksnd_rx_buffer_size);
900 CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
901 *ksocknal_tunables.ksnd_tx_buffer_size,
902 *ksocknal_tunables.ksnd_rx_buffer_size, rc);
906 /* Ensure this socket aborts active sends immediately when we close
908 bzero(&sopt, sizeof sopt);
912 sopt.sopt_dir = SOPT_SET;
913 sopt.sopt_level = SOL_SOCKET;
914 sopt.sopt_name = SO_LINGER;
915 sopt.sopt_val = &linger;
916 sopt.sopt_valsize = sizeof(linger);
919 rc = -sosetopt(so, &sopt);
921 CERROR ("Can't set SO_LINGER: %d\n", rc);
925 if (!*ksocknal_tunables.ksnd_nagle) {
927 bzero(&sopt, sizeof sopt);
928 sopt.sopt_dir = SOPT_SET;
929 sopt.sopt_level = IPPROTO_TCP;
930 sopt.sopt_name = TCP_NODELAY;
931 sopt.sopt_val = &option;
932 sopt.sopt_valsize = sizeof(option);
933 rc = -sosetopt(so, &sopt);
935 CERROR ("Can't disable nagle: %d\n", rc);
940 /* snapshot tunables */
941 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
942 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
943 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
945 do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
946 option = (do_keepalive ? 1 : 0);
947 bzero(&sopt, sizeof sopt);
948 sopt.sopt_dir = SOPT_SET;
949 sopt.sopt_level = SOL_SOCKET;
950 sopt.sopt_name = SO_KEEPALIVE;
951 sopt.sopt_val = &option;
952 sopt.sopt_valsize = sizeof(option);
953 rc = -sosetopt(so, &sopt);
955 CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
960 /* no more setting, just return */
965 bzero(&sopt, sizeof sopt);
966 sopt.sopt_dir = SOPT_SET;
967 sopt.sopt_level = IPPROTO_TCP;
968 sopt.sopt_name = TCP_KEEPALIVE;
969 sopt.sopt_val = &keep_idle;
970 sopt.sopt_valsize = sizeof(keep_idle);
971 rc = -sosetopt(so, &sopt);
973 CERROR ("Can't set TCP_KEEPALIVE : %d\n", rc);
982 ksocknal_lib_push_conn(ksock_conn_t *conn)
990 rc = ksocknal_connsock_addref(conn);
991 if (rc != 0) /* being shut down */
993 sock = conn->ksnc_sock;
994 bzero(&sopt, sizeof sopt);
995 sopt.sopt_dir = SOPT_SET;
996 sopt.sopt_level = IPPROTO_TCP;
997 sopt.sopt_name = TCP_NODELAY;
998 sopt.sopt_val = &val;
999 sopt.sopt_valsize = sizeof val;
1002 sosetopt(sock, &sopt);
1005 ksocknal_connsock_decref(conn);
1010 extern void ksocknal_read_callback (ksock_conn_t *conn);
1011 extern void ksocknal_write_callback (ksock_conn_t *conn);
1014 ksocknal_upcall(struct socket *so, caddr_t arg, int waitf)
1016 ksock_conn_t *conn = (ksock_conn_t *)arg;
1019 read_lock (&ksocknal_data.ksnd_global_lock);
1023 if (so->so_rcv.sb_flags & SB_UPCALL) {
1024 extern int soreadable(struct socket *so);
1025 if (conn->ksnc_rx_nob_wanted && soreadable(so))
1026 /* To verify whether the upcall is for receive */
1027 ksocknal_read_callback (conn);
1030 if (so->so_snd.sb_flags & SB_UPCALL){
1031 extern int sowriteable(struct socket *so);
1032 if (sowriteable(so))
1033 /* socket is writable */
1034 ksocknal_write_callback(conn);
1037 read_unlock (&ksocknal_data.ksnd_global_lock);
1043 ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
1045 /* No callback need to save in osx */
1050 ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
1055 sock->so_upcallarg = (void *)conn;
1056 sock->so_upcall = ksocknal_upcall;
1057 sock->so_snd.sb_timeo = 0;
1058 sock->so_rcv.sb_timeo = cfs_time_seconds(2);
1059 sock->so_rcv.sb_flags |= SB_UPCALL;
1060 sock->so_snd.sb_flags |= SB_UPCALL;
1066 ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn)
1071 ksocknal_upcall (sock, (void *)conn, 0);
1076 ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
1081 sock->so_rcv.sb_flags &= ~SB_UPCALL;
1082 sock->so_snd.sb_flags &= ~SB_UPCALL;
1083 sock->so_upcall = NULL;
1084 sock->so_upcallarg = NULL;
1088 #endif /* !__DARWIN8__ */