4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
31 * This file is part of Lustre, http://www.lustre.org/
32 * Lustre is a trademark of Sun Microsystems, Inc.
34 * lnet/klnds/socklnd/socklnd_lib-darwin.c
36 * Darwin porting library
37 * Make things easy to port
39 * Author: Phil Schwan <phil@clusterfs.com>
41 #include <mach/mach_types.h>
43 #include <netinet/in.h>
44 #include <netinet/tcp.h>
49 # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
53 SYSCTL_NODE (_lnet, OID_AUTO, ksocknal, CTLFLAG_RW,
54 0, "ksocknal_sysctl");
56 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, timeout,
57 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_timeout,
59 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, credits,
60 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_credits,
62 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, peer_credits,
63 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_peertxcredits,
65 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nconnds,
66 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nconnds,
68 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_reconnectms,
69 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_reconnectms,
70 0, "min_reconnectms");
71 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, max_reconnectms,
72 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_max_reconnectms,
73 0, "max_reconnectms");
74 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, eager_ack,
75 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_eager_ack,
77 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, typed,
78 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_typed_conns,
80 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_bulk,
81 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_bulk,
83 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, rx_buffer_size,
84 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_rx_buffer_size,
86 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, tx_buffer_size,
87 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_tx_buffer_size,
89 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nagle,
90 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nagle,
92 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_idle,
93 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_idle,
95 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_count,
96 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_count,
97 0, "keepalive_count");
98 SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_intvl,
99 CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_intvl,
100 0, "keepalive_intvl");
102 cfs_sysctl_table_t ksocknal_top_ctl_table [] = {
103 &sysctl__lnet_ksocknal,
104 &sysctl__lnet_ksocknal_timeout,
105 &sysctl__lnet_ksocknal_credits,
106 &sysctl__lnet_ksocknal_peer_credits,
107 &sysctl__lnet_ksocknal_nconnds,
108 &sysctl__lnet_ksocknal_min_reconnectms,
109 &sysctl__lnet_ksocknal_max_reconnectms,
110 &sysctl__lnet_ksocknal_eager_ack,
111 &sysctl__lnet_ksocknal_typed,
112 &sysctl__lnet_ksocknal_min_bulk,
113 &sysctl__lnet_ksocknal_rx_buffer_size,
114 &sysctl__lnet_ksocknal_tx_buffer_size,
115 &sysctl__lnet_ksocknal_nagle,
116 &sysctl__lnet_ksocknal_keepalive_idle,
117 &sysctl__lnet_ksocknal_keepalive_count,
118 &sysctl__lnet_ksocknal_keepalive_intvl,
123 ksocknal_lib_tunables_init ()
125 ksocknal_tunables.ksnd_sysctl =
126 cfs_register_sysctl_table (ksocknal_top_ctl_table, 0);
128 if (ksocknal_tunables.ksnd_sysctl == NULL)
135 ksocknal_lib_tunables_fini ()
137 if (ksocknal_tunables.ksnd_sysctl != NULL)
138 cfs_unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl);
142 ksocknal_lib_tunables_init ()
148 ksocknal_lib_tunables_fini ()
154 * To use bigger buffer for socket:
155 * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so
156 * we must patch kernel).
157 * 2. Increase net.inet.tcp.reass.maxsegments
158 * 3. Increase net.inet.tcp.sendspace
159 * 4. Increase net.inet.tcp.recvspace
160 * 5. Increase kern.ipc.maxsockbuf
162 #define KSOCKNAL_MAX_BUFFER (1152*1024)
165 ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
167 int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
171 /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
172 LASSERT (!conn->ksnc_closing);
175 CERROR ("Error %d getting sock peer IP\n", rc);
179 rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
180 &conn->ksnc_myipaddr, NULL);
182 CERROR ("Error %d getting sock local IP\n", rc);
192 ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
194 socket_t sock = C2B_SOCK(conn->ksnc_sock);
199 #if SOCKNAL_SINGLE_FRAG_TX
200 struct iovec scratch;
201 struct iovec *scratchiov = &scratch;
202 unsigned int niov = 1;
204 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
205 unsigned int niov = tx->tx_niov;
207 struct msghdr msg = {
210 .msg_iov = scratchiov,
214 .msg_flags = MSG_DONTWAIT
219 for (nob = i = 0; i < niov; i++) {
220 scratchiov[i] = tx->tx_iov[i];
221 nob += scratchiov[i].iov_len;
226 * Linux has MSG_MORE, do we have anything to
227 * reduce number of partial TCP segments sent?
229 rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen);
236 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
238 socket_t sock = C2B_SOCK(conn->ksnc_sock);
239 lnet_kiov_t *kiov = tx->tx_kiov;
244 #if SOCKNAL_SINGLE_FRAG_TX
245 struct iovec scratch;
246 struct iovec *scratchiov = &scratch;
247 unsigned int niov = 1;
249 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
250 unsigned int niov = tx->tx_nkiov;
252 struct msghdr msg = {
255 .msg_iov = scratchiov,
259 .msg_flags = MSG_DONTWAIT
264 for (nob = i = 0; i < niov; i++) {
265 scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) +
267 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
272 * Linux has MSG_MORE, do wen have anyting to
273 * reduce number of partial TCP segments sent?
275 rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen);
276 for (i = 0; i < niov; i++)
277 cfs_kunmap(kiov[i].kiov_page);
284 ksocknal_lib_recv_iov (ksock_conn_t *conn)
286 #if SOCKNAL_SINGLE_FRAG_RX
287 struct iovec scratch;
288 struct iovec *scratchiov = &scratch;
289 unsigned int niov = 1;
291 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
292 unsigned int niov = conn->ksnc_rx_niov;
294 struct iovec *iov = conn->ksnc_rx_iov;
295 struct msghdr msg = {
298 .msg_iov = scratchiov,
311 for (nob = i = 0; i < niov; i++) {
312 scratchiov[i] = iov[i];
313 nob += scratchiov[i].iov_len;
315 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
316 rc = -sock_receive (C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen);
324 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
326 #if SOCKNAL_SINGLE_FRAG_RX
327 struct iovec scratch;
328 struct iovec *scratchiov = &scratch;
329 unsigned int niov = 1;
331 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
332 unsigned int niov = conn->ksnc_rx_nkiov;
334 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
335 struct msghdr msg = {
338 .msg_iov = scratchiov,
349 /* NB we can't trust socket ops to either consume our iovs
350 * or leave them alone. */
351 for (nob = i = 0; i < niov; i++) {
352 scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + \
354 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
356 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
357 rc = -sock_receive(C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen);
358 for (i = 0; i < niov; i++)
359 cfs_kunmap(kiov[i].kiov_page);
366 ksocknal_lib_eager_ack (ksock_conn_t *conn)
372 ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
374 socket_t sock = C2B_SOCK(conn->ksnc_sock);
378 rc = ksocknal_connsock_addref(conn);
380 LASSERT (conn->ksnc_closing);
381 *txmem = *rxmem = *nagle = 0;
384 rc = libcfs_sock_getbuf(conn->ksnc_sock, txmem, rxmem);
386 len = sizeof(*nagle);
387 rc = -sock_getsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
390 ksocknal_connsock_decref(conn);
395 *txmem = *rxmem = *nagle = 0;
401 ksocknal_lib_setup_sock (cfs_socket_t *sock)
409 socket_t so = C2B_SOCK(sock);
410 struct linger linger;
412 /* Ensure this socket aborts active sends immediately when we close
416 rc = -sock_setsockopt(so, SOL_SOCKET, SO_LINGER, &linger, sizeof(linger));
418 CERROR ("Can't set SO_LINGER: %d\n", rc);
422 if (!*ksocknal_tunables.ksnd_nagle) {
424 rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option));
426 CERROR ("Can't disable nagle: %d\n", rc);
431 rc = libcfs_sock_setbuf(sock,
432 *ksocknal_tunables.ksnd_tx_buffer_size,
433 *ksocknal_tunables.ksnd_rx_buffer_size);
435 CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
436 *ksocknal_tunables.ksnd_tx_buffer_size,
437 *ksocknal_tunables.ksnd_rx_buffer_size, rc);
441 /* snapshot tunables */
442 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
443 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
444 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
446 do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
447 option = (do_keepalive ? 1 : 0);
449 rc = -sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &option, sizeof(option));
451 CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
457 rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_KEEPALIVE,
458 &keep_idle, sizeof(keep_idle));
464 ksocknal_lib_push_conn(ksock_conn_t *conn)
470 rc = ksocknal_connsock_addref(conn);
471 if (rc != 0) /* being shut down */
473 sock = C2B_SOCK(conn->ksnc_sock);
475 rc = -sock_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val));
478 ksocknal_connsock_decref(conn);
482 extern void ksocknal_read_callback (ksock_conn_t *conn);
483 extern void ksocknal_write_callback (ksock_conn_t *conn);
486 ksocknal_upcall(socket_t so, void *arg, int waitf)
488 ksock_conn_t *conn = (ksock_conn_t *)arg;
491 read_lock (&ksocknal_data.ksnd_global_lock);
495 ksocknal_read_callback (conn);
497 ksocknal_write_callback (conn);
499 read_unlock (&ksocknal_data.ksnd_global_lock);
504 ksocknal_lib_save_callback(cfs_socket_t *sock, ksock_conn_t *conn)
506 /* No callback need to save in osx */
511 ksocknal_lib_set_callback(cfs_socket_t *sock, ksock_conn_t *conn)
513 libcfs_sock_set_cb(sock, ksocknal_upcall, (void *)conn);
518 ksocknal_lib_reset_callback(cfs_socket_t *sock, ksock_conn_t *conn)
520 libcfs_sock_reset_cb(sock);
523 #else /* !__DARWIN8__ */
526 ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
528 #if SOCKNAL_SINGLE_FRAG_TX
529 struct iovec scratch;
530 struct iovec *scratchiov = &scratch;
531 unsigned int niov = 1;
533 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
534 unsigned int niov = tx->tx_niov;
536 struct socket *sock = conn->ksnc_sock;
541 .uio_iov = scratchiov,
544 .uio_resid = 0, /* This will be valued after a while */
545 .uio_segflg = UIO_SYSSPACE,
549 int flags = MSG_DONTWAIT;
552 for (nob = i = 0; i < niov; i++) {
553 scratchiov[i] = tx->tx_iov[i];
554 nob += scratchiov[i].iov_len;
556 suio.uio_resid = nob;
559 rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags);
562 /* NB there is no return value can indicate how many
563 * have been sent and how many resid, we have to get
564 * sent bytes from suio. */
566 if (suio.uio_resid != nob &&\
567 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
568 /* We have sent something */
569 rc = nob - suio.uio_resid;
570 else if ( rc == EWOULDBLOCK )
571 /* Actually, EAGAIN and EWOULDBLOCK have same value in OSX */
576 rc = nob - suio.uio_resid;
582 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
584 #if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
585 struct iovec scratch;
586 struct iovec *scratchiov = &scratch;
587 unsigned int niov = 1;
589 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
590 unsigned int niov = tx->tx_nkiov;
592 struct socket *sock = conn->ksnc_sock;
593 lnet_kiov_t *kiov = tx->tx_kiov;
598 .uio_iov = scratchiov,
601 .uio_resid = 0, /* It should be valued after a while */
602 .uio_segflg = UIO_SYSSPACE,
606 int flags = MSG_DONTWAIT;
609 for (nob = i = 0; i < niov; i++) {
610 scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) +
612 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
614 suio.uio_resid = nob;
617 rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags);
620 for (i = 0; i < niov; i++)
621 cfs_kunmap(kiov[i].kiov_page);
624 if (suio.uio_resid != nob &&\
625 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
626 /* We have sent something */
627 rc = nob - suio.uio_resid;
628 else if ( rc == EWOULDBLOCK )
629 /* EAGAIN and EWOULD BLOCK have same value in OSX */
634 rc = nob - suio.uio_resid;
640 * liang: Hack of inpcb and tcpcb.
641 * To get tcpcb of a socket, and call tcp_output
652 LIST_HEAD(ks_tsegqe_head, ks_tseg_qent);
655 struct ks_tsegqe_head t_segq;
657 struct ks_tcptemp *unused;
659 struct inpcb *t_inpcb;
663 * There are more fields but we dont need
668 #define TF_ACKNOW 0x00001
669 #define TF_DELACK 0x00002
672 LIST_ENTRY(ks_inpcb) inp_hash;
673 struct in_addr reserved1;
674 struct in_addr reserved2;
677 LIST_ENTRY(inpcb) inp_list;
680 * There are more fields but we dont need
685 #define ks_sotoinpcb(so) ((struct ks_inpcb *)(so)->so_pcb)
686 #define ks_intotcpcb(ip) ((struct ks_tcpcb *)(ip)->inp_ppcb)
687 #define ks_sototcpcb(so) (intotcpcb(sotoinpcb(so)))
690 ksocknal_lib_eager_ack (ksock_conn_t *conn)
692 struct socket *sock = conn->ksnc_sock;
693 struct ks_inpcb *inp = ks_sotoinpcb(sock);
694 struct ks_tcpcb *tp = ks_intotcpcb(inp);
698 extern int tcp_output(register struct ks_tcpcb *tp);
704 * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo
705 * to send immediate ACK.
707 if (tp && tp->t_flags & TF_DELACK){
708 tp->t_flags &= ~TF_DELACK;
709 tp->t_flags |= TF_ACKNOW;
710 (void) tcp_output(tp);
720 ksocknal_lib_recv_iov (ksock_conn_t *conn)
722 #if SOCKNAL_SINGLE_FRAG_RX
723 struct iovec scratch;
724 struct iovec *scratchiov = &scratch;
725 unsigned int niov = 1;
727 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
728 unsigned int niov = conn->ksnc_rx_niov;
730 struct iovec *iov = conn->ksnc_rx_iov;
735 .uio_iov = scratchiov,
738 .uio_resid = 0, /* It should be valued after a while */
739 .uio_segflg = UIO_SYSSPACE,
743 int flags = MSG_DONTWAIT;
746 for (nob = i = 0; i < niov; i++) {
747 scratchiov[i] = iov[i];
748 nob += scratchiov[i].iov_len;
750 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
752 ruio.uio_resid = nob;
755 rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, &flags);
758 if (ruio.uio_resid != nob && \
759 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK || rc == EAGAIN))
760 /* data particially received */
761 rc = nob - ruio.uio_resid;
762 else if (rc == EWOULDBLOCK)
763 /* EAGAIN and EWOULD BLOCK have same value in OSX */
768 rc = nob - ruio.uio_resid;
774 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
776 #if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
777 struct iovec scratch;
778 struct iovec *scratchiov = &scratch;
779 unsigned int niov = 1;
781 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
782 unsigned int niov = conn->ksnc_rx_nkiov;
784 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
789 .uio_iov = scratchiov,
793 .uio_segflg = UIO_SYSSPACE,
797 int flags = MSG_DONTWAIT;
800 for (nob = i = 0; i < niov; i++) {
801 scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
802 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
804 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
806 ruio.uio_resid = nob;
809 rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, NULL, &flags);
812 for (i = 0; i < niov; i++)
813 cfs_kunmap(kiov[i].kiov_page);
816 if (ruio.uio_resid != nob && \
817 (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
818 /* data particially received */
819 rc = nob - ruio.uio_resid;
820 else if (rc == EWOULDBLOCK)
821 /* receive blocked, EWOULDBLOCK == EAGAIN */
826 rc = nob - ruio.uio_resid;
832 ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
834 struct socket *sock = conn->ksnc_sock;
837 rc = ksocknal_connsock_addref(conn);
839 LASSERT (conn->ksnc_closing);
840 *txmem = *rxmem = *nagle = 0;
843 rc = libcfs_sock_getbuf(sock, txmem, rxmem);
849 len = sizeof(*nagle);
850 bzero(&sopt, sizeof sopt);
851 sopt.sopt_dir = SOPT_GET;
852 sopt.sopt_level = IPPROTO_TCP;
853 sopt.sopt_name = TCP_NODELAY;
854 sopt.sopt_val = nagle;
855 sopt.sopt_valsize = len;
858 rc = -sogetopt(sock, &sopt);
862 ksocknal_connsock_decref(conn);
867 *txmem = *rxmem = *nagle = 0;
872 ksocknal_lib_setup_sock (struct socket *so)
881 struct linger linger;
884 rc = libcfs_sock_setbuf(so,
885 *ksocknal_tunables.ksnd_tx_buffer_size,
886 *ksocknal_tunables.ksnd_rx_buffer_size);
888 CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
889 *ksocknal_tunables.ksnd_tx_buffer_size,
890 *ksocknal_tunables.ksnd_rx_buffer_size, rc);
894 /* Ensure this socket aborts active sends immediately when we close
896 bzero(&sopt, sizeof sopt);
900 sopt.sopt_dir = SOPT_SET;
901 sopt.sopt_level = SOL_SOCKET;
902 sopt.sopt_name = SO_LINGER;
903 sopt.sopt_val = &linger;
904 sopt.sopt_valsize = sizeof(linger);
907 rc = -sosetopt(so, &sopt);
909 CERROR ("Can't set SO_LINGER: %d\n", rc);
913 if (!*ksocknal_tunables.ksnd_nagle) {
915 bzero(&sopt, sizeof sopt);
916 sopt.sopt_dir = SOPT_SET;
917 sopt.sopt_level = IPPROTO_TCP;
918 sopt.sopt_name = TCP_NODELAY;
919 sopt.sopt_val = &option;
920 sopt.sopt_valsize = sizeof(option);
921 rc = -sosetopt(so, &sopt);
923 CERROR ("Can't disable nagle: %d\n", rc);
928 /* snapshot tunables */
929 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
930 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
931 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
933 do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
934 option = (do_keepalive ? 1 : 0);
935 bzero(&sopt, sizeof sopt);
936 sopt.sopt_dir = SOPT_SET;
937 sopt.sopt_level = SOL_SOCKET;
938 sopt.sopt_name = SO_KEEPALIVE;
939 sopt.sopt_val = &option;
940 sopt.sopt_valsize = sizeof(option);
941 rc = -sosetopt(so, &sopt);
943 CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
948 /* no more setting, just return */
953 bzero(&sopt, sizeof sopt);
954 sopt.sopt_dir = SOPT_SET;
955 sopt.sopt_level = IPPROTO_TCP;
956 sopt.sopt_name = TCP_KEEPALIVE;
957 sopt.sopt_val = &keep_idle;
958 sopt.sopt_valsize = sizeof(keep_idle);
959 rc = -sosetopt(so, &sopt);
961 CERROR ("Can't set TCP_KEEPALIVE : %d\n", rc);
970 ksocknal_lib_push_conn(ksock_conn_t *conn)
978 rc = ksocknal_connsock_addref(conn);
979 if (rc != 0) /* being shut down */
981 sock = conn->ksnc_sock;
982 bzero(&sopt, sizeof sopt);
983 sopt.sopt_dir = SOPT_SET;
984 sopt.sopt_level = IPPROTO_TCP;
985 sopt.sopt_name = TCP_NODELAY;
986 sopt.sopt_val = &val;
987 sopt.sopt_valsize = sizeof val;
990 sosetopt(sock, &sopt);
993 ksocknal_connsock_decref(conn);
998 extern void ksocknal_read_callback (ksock_conn_t *conn);
999 extern void ksocknal_write_callback (ksock_conn_t *conn);
1002 ksocknal_upcall(struct socket *so, caddr_t arg, int waitf)
1004 ksock_conn_t *conn = (ksock_conn_t *)arg;
1007 read_lock (&ksocknal_data.ksnd_global_lock);
1011 if (so->so_rcv.sb_flags & SB_UPCALL) {
1012 extern int soreadable(struct socket *so);
1013 if (conn->ksnc_rx_nob_wanted && soreadable(so))
1014 /* To verify whether the upcall is for receive */
1015 ksocknal_read_callback (conn);
1018 if (so->so_snd.sb_flags & SB_UPCALL){
1019 extern int sowriteable(struct socket *so);
1020 if (sowriteable(so))
1021 /* socket is writable */
1022 ksocknal_write_callback(conn);
1025 read_unlock (&ksocknal_data.ksnd_global_lock);
1031 ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
1033 /* No callback need to save in osx */
1038 ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
1043 sock->so_upcallarg = (void *)conn;
1044 sock->so_upcall = ksocknal_upcall;
1045 sock->so_snd.sb_timeo = 0;
1046 sock->so_rcv.sb_timeo = cfs_time_seconds(2);
1047 sock->so_rcv.sb_flags |= SB_UPCALL;
1048 sock->so_snd.sb_flags |= SB_UPCALL;
1054 ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn)
1059 ksocknal_upcall (sock, (void *)conn, 0);
1064 ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
1069 sock->so_rcv.sb_flags &= ~SB_UPCALL;
1070 sock->so_snd.sb_flags &= ~SB_UPCALL;
1071 sock->so_upcall = NULL;
1072 sock->so_upcallarg = NULL;
1076 #endif /* !__DARWIN8__ */