4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/klnds/socklnd/socklnd_lib-darwin.c
38 * Darwin porting library
39 * Make things easy to port
41 * Author: Phil Schwan <phil@clusterfs.com>
43 #include <mach/mach_types.h>
45 #include <netinet/in.h>
46 #include <netinet/tcp.h>
51 # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
55 SYSCTL_NODE (_lnet, OID_AUTO, ksocknal, CTLFLAG_RW,
56 0, "ksocknal_sysctl");
58 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, timeout,
59 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_timeout,
61 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, credits,
62 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_credits,
64 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, peer_credits,
65 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_peertxcredits,
67 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nconnds,
68 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nconnds,
70 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_reconnectms,
71 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_reconnectms,
72 0, "min_reconnectms");
73 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, max_reconnectms,
74 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_max_reconnectms,
75 0, "max_reconnectms");
76 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, eager_ack,
77 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_eager_ack,
79 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, typed,
80 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_typed_conns,
82 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_bulk,
83 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_bulk,
85 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, rx_buffer_size,
86 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_rx_buffer_size,
88 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, tx_buffer_size,
89 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_tx_buffer_size,
91 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nagle,
92 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nagle,
94 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_idle,
95 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_idle,
97 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_count,
98 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_count,
99 0, "keepalive_count");
100 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_intvl,
101 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_intvl,
102 0, "keepalive_intvl");
104 struct ctl_table ksocknal_top_ctl_table [] = {
105 &sysctl__lnet_ksocknal,
106 &sysctl__lnet_ksocknal_timeout,
107 &sysctl__lnet_ksocknal_credits,
108 &sysctl__lnet_ksocknal_peer_credits,
109 &sysctl__lnet_ksocknal_nconnds,
110 &sysctl__lnet_ksocknal_min_reconnectms,
111 &sysctl__lnet_ksocknal_max_reconnectms,
112 &sysctl__lnet_ksocknal_eager_ack,
113 &sysctl__lnet_ksocknal_typed,
114 &sysctl__lnet_ksocknal_min_bulk,
115 &sysctl__lnet_ksocknal_rx_buffer_size,
116 &sysctl__lnet_ksocknal_tx_buffer_size,
117 &sysctl__lnet_ksocknal_nagle,
118 &sysctl__lnet_ksocknal_keepalive_idle,
119 &sysctl__lnet_ksocknal_keepalive_count,
120 &sysctl__lnet_ksocknal_keepalive_intvl,
125 ksocknal_lib_tunables_init ()
127 ksocknal_tunables.ksnd_sysctl =
128 register_sysctl_table (ksocknal_top_ctl_table);
130 if (ksocknal_tunables.ksnd_sysctl == NULL)
137 ksocknal_lib_tunables_fini ()
139 if (ksocknal_tunables.ksnd_sysctl != NULL)
140 unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl);
144 ksocknal_lib_tunables_init ()
150 ksocknal_lib_tunables_fini ()
156 * To use bigger buffer for socket:
157 * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so
158 * we must patch kernel).
159 * 2. Increase net.inet.tcp.reass.maxsegments
160 * 3. Increase net.inet.tcp.sendspace
161 * 4. Increase net.inet.tcp.recvspace
162 * 5. Increase kern.ipc.maxsockbuf
164 #define KSOCKNAL_MAX_BUFFER (1152*1024)
167 ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
169 int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
173 /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
174 LASSERT (!conn->ksnc_closing);
177 CERROR ("Error %d getting sock peer IP\n", rc);
181 rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
182 &conn->ksnc_myipaddr, NULL);
184 CERROR ("Error %d getting sock local IP\n", rc);
194 ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
196 socket_t sock = C2B_SOCK(conn->ksnc_sock);
201 #if SOCKNAL_SINGLE_FRAG_TX
202 struct iovec scratch;
203 struct iovec *scratchiov = &scratch;
204 unsigned int niov = 1;
206 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
207 unsigned int niov = tx->tx_niov;
209 struct msghdr msg = {
212 .msg_iov = scratchiov,
216 .msg_flags = MSG_DONTWAIT
221 for (nob = i = 0; i < niov; i++) {
222 scratchiov[i] = tx->tx_iov[i];
223 nob += scratchiov[i].iov_len;
228 * Linux has MSG_MORE, do we have anything to
229 * reduce number of partial TCP segments sent?
231 rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen);
238 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
240 socket_t sock = C2B_SOCK(conn->ksnc_sock);
241 lnet_kiov_t *kiov = tx->tx_kiov;
246 #if SOCKNAL_SINGLE_FRAG_TX
247 struct iovec scratch;
248 struct iovec *scratchiov = &scratch;
249 unsigned int niov = 1;
251 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
252 unsigned int niov = tx->tx_nkiov;
254 struct msghdr msg = {
257 .msg_iov = scratchiov,
261 .msg_flags = MSG_DONTWAIT
266 for (nob = i = 0; i < niov; i++) {
267 scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
269 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
274 * Linux has MSG_MORE, do wen have anyting to
275 * reduce number of partial TCP segments sent?
277 rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen);
278 for (i = 0; i < niov; i++)
279 kunmap(kiov[i].kiov_page);
286 ksocknal_lib_recv_iov (ksock_conn_t *conn)
288 #if SOCKNAL_SINGLE_FRAG_RX
289 struct iovec scratch;
290 struct iovec *scratchiov = &scratch;
291 unsigned int niov = 1;
293 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
294 unsigned int niov = conn->ksnc_rx_niov;
296 struct iovec *iov = conn->ksnc_rx_iov;
297 struct msghdr msg = {
300 .msg_iov = scratchiov,
313 for (nob = i = 0; i < niov; i++) {
314 scratchiov[i] = iov[i];
315 nob += scratchiov[i].iov_len;
317 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
318 rc = -sock_receive (C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen);
326 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
328 #if SOCKNAL_SINGLE_FRAG_RX
329 struct iovec scratch;
330 struct iovec *scratchiov = &scratch;
331 unsigned int niov = 1;
333 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
334 unsigned int niov = conn->ksnc_rx_nkiov;
336 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
337 struct msghdr msg = {
340 .msg_iov = scratchiov,
351 /* NB we can't trust socket ops to either consume our iovs
352 * or leave them alone. */
353 for (nob = i = 0; i < niov; i++) {
354 scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + \
356 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
358 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
359 rc = -sock_receive(C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen);
360 for (i = 0; i < niov; i++)
361 kunmap(kiov[i].kiov_page);
368 ksocknal_lib_eager_ack (ksock_conn_t *conn)
374 ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
376 socket_t sock = C2B_SOCK(conn->ksnc_sock);
380 rc = ksocknal_connsock_addref(conn);
382 LASSERT (conn->ksnc_closing);
383 *txmem = *rxmem = *nagle = 0;
386 rc = libcfs_sock_getbuf(conn->ksnc_sock, txmem, rxmem);
388 len = sizeof(*nagle);
389 rc = -sock_getsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
392 ksocknal_connsock_decref(conn);
397 *txmem = *rxmem = *nagle = 0;
403 ksocknal_lib_setup_sock (cfs_socket_t *sock)
411 socket_t so = C2B_SOCK(sock);
412 struct linger linger;
414 /* Ensure this socket aborts active sends immediately when we close
418 rc = -sock_setsockopt(so, SOL_SOCKET, SO_LINGER, &linger, sizeof(linger));
420 CERROR ("Can't set SO_LINGER: %d\n", rc);
424 if (!*ksocknal_tunables.ksnd_nagle) {
426 rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option));
428 CERROR ("Can't disable nagle: %d\n", rc);
433 rc = libcfs_sock_setbuf(sock,
434 *ksocknal_tunables.ksnd_tx_buffer_size,
435 *ksocknal_tunables.ksnd_rx_buffer_size);
437 CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
438 *ksocknal_tunables.ksnd_tx_buffer_size,
439 *ksocknal_tunables.ksnd_rx_buffer_size, rc);
443 /* snapshot tunables */
444 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
445 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
446 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
448 do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
449 option = (do_keepalive ? 1 : 0);
451 rc = -sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &option, sizeof(option));
453 CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
459 rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_KEEPALIVE,
460 &keep_idle, sizeof(keep_idle));
466 ksocknal_lib_push_conn(ksock_conn_t *conn)
472 rc = ksocknal_connsock_addref(conn);
473 if (rc != 0) /* being shut down */
475 sock = C2B_SOCK(conn->ksnc_sock);
477 rc = -sock_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val));
480 ksocknal_connsock_decref(conn);
484 extern void ksocknal_read_callback (ksock_conn_t *conn);
485 extern void ksocknal_write_callback (ksock_conn_t *conn);
488 ksocknal_upcall(socket_t so, void *arg, int waitf)
490 ksock_conn_t *conn = (ksock_conn_t *)arg;
493 read_lock (&ksocknal_data.ksnd_global_lock);
497 ksocknal_read_callback (conn);
499 ksocknal_write_callback (conn);
501 read_unlock (&ksocknal_data.ksnd_global_lock);
506 ksocknal_lib_save_callback(cfs_socket_t *sock, ksock_conn_t *conn)
508 /* No callback need to save in osx */
513 ksocknal_lib_set_callback(cfs_socket_t *sock, ksock_conn_t *conn)
515 libcfs_sock_set_cb(sock, ksocknal_upcall, (void *)conn);
520 ksocknal_lib_reset_callback(cfs_socket_t *sock, ksock_conn_t *conn)
522 libcfs_sock_reset_cb(sock);
525 #else /* !__DARWIN8__ */
528 ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
530 #if SOCKNAL_SINGLE_FRAG_TX
531 struct iovec scratch;
532 struct iovec *scratchiov = &scratch;
533 unsigned int niov = 1;
535 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
536 unsigned int niov = tx->tx_niov;
538 struct socket *sock = conn->ksnc_sock;
543 .uio_iov = scratchiov,
546 .uio_resid = 0, /* This will be valued after a while */
547 .uio_segflg = UIO_SYSSPACE,
551 int flags = MSG_DONTWAIT;
554 for (nob = i = 0; i < niov; i++) {
555 scratchiov[i] = tx->tx_iov[i];
556 nob += scratchiov[i].iov_len;
558 suio.uio_resid = nob;
561 rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags);
564 /* NB there is no return value can indicate how many
565 * have been sent and how many resid, we have to get
566 * sent bytes from suio. */
568 if (suio.uio_resid != nob &&\
569 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
570 /* We have sent something */
571 rc = nob - suio.uio_resid;
572 else if ( rc == EWOULDBLOCK )
573 /* Actually, EAGAIN and EWOULDBLOCK have same value in OSX */
578 rc = nob - suio.uio_resid;
584 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
586 #if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
587 struct iovec scratch;
588 struct iovec *scratchiov = &scratch;
589 unsigned int niov = 1;
591 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
592 unsigned int niov = tx->tx_nkiov;
594 struct socket *sock = conn->ksnc_sock;
595 lnet_kiov_t *kiov = tx->tx_kiov;
600 .uio_iov = scratchiov,
603 .uio_resid = 0, /* It should be valued after a while */
604 .uio_segflg = UIO_SYSSPACE,
608 int flags = MSG_DONTWAIT;
611 for (nob = i = 0; i < niov; i++) {
612 scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
614 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
616 suio.uio_resid = nob;
619 rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags);
622 for (i = 0; i < niov; i++)
623 kunmap(kiov[i].kiov_page);
626 if (suio.uio_resid != nob &&\
627 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
628 /* We have sent something */
629 rc = nob - suio.uio_resid;
630 else if ( rc == EWOULDBLOCK )
631 /* EAGAIN and EWOULD BLOCK have same value in OSX */
636 rc = nob - suio.uio_resid;
642 * liang: Hack of inpcb and tcpcb.
643 * To get tcpcb of a socket, and call tcp_output
654 LIST_HEAD(ks_tsegqe_head, ks_tseg_qent);
657 struct ks_tsegqe_head t_segq;
659 struct ks_tcptemp *unused;
661 struct inpcb *t_inpcb;
665 * There are more fields but we dont need
670 #define TF_ACKNOW 0x00001
671 #define TF_DELACK 0x00002
674 LIST_ENTRY(ks_inpcb) inp_hash;
675 struct in_addr reserved1;
676 struct in_addr reserved2;
679 LIST_ENTRY(inpcb) inp_list;
682 * There are more fields but we dont need
687 #define ks_sotoinpcb(so) ((struct ks_inpcb *)(so)->so_pcb)
688 #define ks_intotcpcb(ip) ((struct ks_tcpcb *)(ip)->inp_ppcb)
689 #define ks_sototcpcb(so) (intotcpcb(sotoinpcb(so)))
692 ksocknal_lib_eager_ack (ksock_conn_t *conn)
694 struct socket *sock = conn->ksnc_sock;
695 struct ks_inpcb *inp = ks_sotoinpcb(sock);
696 struct ks_tcpcb *tp = ks_intotcpcb(inp);
700 extern int tcp_output(register struct ks_tcpcb *tp);
706 * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo
707 * to send immediate ACK.
709 if (tp && tp->t_flags & TF_DELACK){
710 tp->t_flags &= ~TF_DELACK;
711 tp->t_flags |= TF_ACKNOW;
712 (void) tcp_output(tp);
722 ksocknal_lib_recv_iov (ksock_conn_t *conn)
724 #if SOCKNAL_SINGLE_FRAG_RX
725 struct iovec scratch;
726 struct iovec *scratchiov = &scratch;
727 unsigned int niov = 1;
729 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
730 unsigned int niov = conn->ksnc_rx_niov;
732 struct iovec *iov = conn->ksnc_rx_iov;
737 .uio_iov = scratchiov,
740 .uio_resid = 0, /* It should be valued after a while */
741 .uio_segflg = UIO_SYSSPACE,
745 int flags = MSG_DONTWAIT;
748 for (nob = i = 0; i < niov; i++) {
749 scratchiov[i] = iov[i];
750 nob += scratchiov[i].iov_len;
752 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
754 ruio.uio_resid = nob;
757 rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, &flags);
760 if (ruio.uio_resid != nob && \
761 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK || rc == EAGAIN))
762 /* data particially received */
763 rc = nob - ruio.uio_resid;
764 else if (rc == EWOULDBLOCK)
765 /* EAGAIN and EWOULD BLOCK have same value in OSX */
770 rc = nob - ruio.uio_resid;
776 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
778 #if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
779 struct iovec scratch;
780 struct iovec *scratchiov = &scratch;
781 unsigned int niov = 1;
783 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
784 unsigned int niov = conn->ksnc_rx_nkiov;
786 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
791 .uio_iov = scratchiov,
795 .uio_segflg = UIO_SYSSPACE,
799 int flags = MSG_DONTWAIT;
802 for (nob = i = 0; i < niov; i++) {
803 scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
804 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
806 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
808 ruio.uio_resid = nob;
811 rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, NULL, &flags);
814 for (i = 0; i < niov; i++)
815 kunmap(kiov[i].kiov_page);
818 if (ruio.uio_resid != nob && \
819 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
820 /* data particially received */
821 rc = nob - ruio.uio_resid;
822 else if (rc == EWOULDBLOCK)
823 /* receive blocked, EWOULDBLOCK == EAGAIN */
828 rc = nob - ruio.uio_resid;
834 ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
836 struct socket *sock = conn->ksnc_sock;
839 rc = ksocknal_connsock_addref(conn);
841 LASSERT (conn->ksnc_closing);
842 *txmem = *rxmem = *nagle = 0;
845 rc = libcfs_sock_getbuf(sock, txmem, rxmem);
851 len = sizeof(*nagle);
852 bzero(&sopt, sizeof sopt);
853 sopt.sopt_dir = SOPT_GET;
854 sopt.sopt_level = IPPROTO_TCP;
855 sopt.sopt_name = TCP_NODELAY;
856 sopt.sopt_val = nagle;
857 sopt.sopt_valsize = len;
860 rc = -sogetopt(sock, &sopt);
864 ksocknal_connsock_decref(conn);
869 *txmem = *rxmem = *nagle = 0;
874 ksocknal_lib_setup_sock (struct socket *so)
883 struct linger linger;
886 rc = libcfs_sock_setbuf(so,
887 *ksocknal_tunables.ksnd_tx_buffer_size,
888 *ksocknal_tunables.ksnd_rx_buffer_size);
890 CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
891 *ksocknal_tunables.ksnd_tx_buffer_size,
892 *ksocknal_tunables.ksnd_rx_buffer_size, rc);
896 /* Ensure this socket aborts active sends immediately when we close
898 bzero(&sopt, sizeof sopt);
902 sopt.sopt_dir = SOPT_SET;
903 sopt.sopt_level = SOL_SOCKET;
904 sopt.sopt_name = SO_LINGER;
905 sopt.sopt_val = &linger;
906 sopt.sopt_valsize = sizeof(linger);
909 rc = -sosetopt(so, &sopt);
911 CERROR ("Can't set SO_LINGER: %d\n", rc);
915 if (!*ksocknal_tunables.ksnd_nagle) {
917 bzero(&sopt, sizeof sopt);
918 sopt.sopt_dir = SOPT_SET;
919 sopt.sopt_level = IPPROTO_TCP;
920 sopt.sopt_name = TCP_NODELAY;
921 sopt.sopt_val = &option;
922 sopt.sopt_valsize = sizeof(option);
923 rc = -sosetopt(so, &sopt);
925 CERROR ("Can't disable nagle: %d\n", rc);
930 /* snapshot tunables */
931 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
932 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
933 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
935 do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
936 option = (do_keepalive ? 1 : 0);
937 bzero(&sopt, sizeof sopt);
938 sopt.sopt_dir = SOPT_SET;
939 sopt.sopt_level = SOL_SOCKET;
940 sopt.sopt_name = SO_KEEPALIVE;
941 sopt.sopt_val = &option;
942 sopt.sopt_valsize = sizeof(option);
943 rc = -sosetopt(so, &sopt);
945 CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
950 /* no more setting, just return */
955 bzero(&sopt, sizeof sopt);
956 sopt.sopt_dir = SOPT_SET;
957 sopt.sopt_level = IPPROTO_TCP;
958 sopt.sopt_name = TCP_KEEPALIVE;
959 sopt.sopt_val = &keep_idle;
960 sopt.sopt_valsize = sizeof(keep_idle);
961 rc = -sosetopt(so, &sopt);
963 CERROR ("Can't set TCP_KEEPALIVE : %d\n", rc);
972 ksocknal_lib_push_conn(ksock_conn_t *conn)
980 rc = ksocknal_connsock_addref(conn);
981 if (rc != 0) /* being shut down */
983 sock = conn->ksnc_sock;
984 bzero(&sopt, sizeof sopt);
985 sopt.sopt_dir = SOPT_SET;
986 sopt.sopt_level = IPPROTO_TCP;
987 sopt.sopt_name = TCP_NODELAY;
988 sopt.sopt_val = &val;
989 sopt.sopt_valsize = sizeof val;
992 sosetopt(sock, &sopt);
995 ksocknal_connsock_decref(conn);
1000 extern void ksocknal_read_callback (ksock_conn_t *conn);
1001 extern void ksocknal_write_callback (ksock_conn_t *conn);
1004 ksocknal_upcall(struct socket *so, caddr_t arg, int waitf)
1006 ksock_conn_t *conn = (ksock_conn_t *)arg;
1009 read_lock (&ksocknal_data.ksnd_global_lock);
1013 if (so->so_rcv.sb_flags & SB_UPCALL) {
1014 extern int soreadable(struct socket *so);
1015 if (conn->ksnc_rx_nob_wanted && soreadable(so))
1016 /* To verify whether the upcall is for receive */
1017 ksocknal_read_callback (conn);
1020 if (so->so_snd.sb_flags & SB_UPCALL){
1021 extern int sowriteable(struct socket *so);
1022 if (sowriteable(so))
1023 /* socket is writable */
1024 ksocknal_write_callback(conn);
1027 read_unlock (&ksocknal_data.ksnd_global_lock);
1033 ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
1035 /* No callback need to save in osx */
1040 ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
1045 sock->so_upcallarg = (void *)conn;
1046 sock->so_upcall = ksocknal_upcall;
1047 sock->so_snd.sb_timeo = 0;
1048 sock->so_rcv.sb_timeo = cfs_time_seconds(2);
1049 sock->so_rcv.sb_flags |= SB_UPCALL;
1050 sock->so_snd.sb_flags |= SB_UPCALL;
1056 ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn)
1061 ksocknal_upcall (sock, (void *)conn, 0);
1066 ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
1071 sock->so_rcv.sb_flags &= ~SB_UPCALL;
1072 sock->so_snd.sb_flags &= ~SB_UPCALL;
1073 sock->so_upcall = NULL;
1074 sock->so_upcallarg = NULL;
1078 #endif /* !__DARWIN8__ */