+++ /dev/null
-Index: linux-2.6.16.i686/net/core/dev.c
-===================================================================
---- linux-2.6.16.i686.orig/net/core/dev.c 2006-05-30 15:47:10.000000000 +0800
-+++ linux-2.6.16.i686/net/core/dev.c 2006-05-30 21:24:07.000000000 +0800
-@@ -1181,6 +1181,9 @@
- ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
- ninfo->nr_frags = 0;
- ninfo->frag_list = NULL;
-+ ninfo->zccd = NULL; /* copied data => no user zero copy descriptor */
-+ ninfo->zccd2 = NULL;
-+
-
- /* Offset between the two in bytes */
- offset = data - skb->head;
-Index: linux-2.6.16.i686/net/core/skbuff.c
-===================================================================
---- linux-2.6.16.i686.orig/net/core/skbuff.c 2006-05-30 15:47:12.000000000 +0800
-+++ linux-2.6.16.i686/net/core/skbuff.c 2006-05-30 21:26:35.000000000 +0800
-@@ -170,7 +170,8 @@
- shinfo->ufo_size = 0;
- shinfo->ip6_frag_id = 0;
- shinfo->frag_list = NULL;
--
-+ shinfo->zccd = NULL; /* skbuffs kick off with NO user zero copy descriptors */
-+ shinfo->zccd2 = NULL;
- if (fclone) {
- struct sk_buff *child = skb + 1;
- atomic_t *fclone_ref = (atomic_t *) (child + 1);
-@@ -242,7 +243,9 @@
- shinfo->ufo_size = 0;
- shinfo->ip6_frag_id = 0;
- shinfo->frag_list = NULL;
--
-+ shinfo->zccd = NULL; /* skbuffs kick off with NO user zero copy descriptors */
-+ shinfo->zccd2 = NULL;
-+
- if (fclone) {
- struct sk_buff *child = skb + 1;
- atomic_t *fclone_ref = (atomic_t *) (child + 1);
-@@ -287,6 +290,10 @@
- if (!skb->cloned ||
- !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
- &skb_shinfo(skb)->dataref)) {
-+ if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
-+ zccd_put (skb_shinfo(skb)->zccd); /* release hold */
-+ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
-+ zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
- if (skb_shinfo(skb)->nr_frags) {
- int i;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-@@ -606,6 +613,14 @@
- n->data_len = skb->data_len;
- n->len = skb->len;
-
-+ if (skb_shinfo(skb)->zccd != NULL) /* user zero copy descriptor? */
-+ zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
-+ skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
-+
-+ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd user zero copy descriptor? */
-+ zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
-+ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
-+
- if (skb_shinfo(skb)->nr_frags) {
- int i;
-
-@@ -649,6 +664,9 @@
- u8 *data;
- int size = nhead + (skb->end - skb->head) + ntail;
- long off;
-+ zccd_t *zccd = skb_shinfo(skb)->zccd; /* stash user zero copy descriptor */
-+ zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
-+
-
- if (skb_shared(skb))
- BUG();
-@@ -670,6 +688,11 @@
- if (skb_shinfo(skb)->frag_list)
- skb_clone_fraglist(skb);
-
-+ if (zccd != NULL) /* user zero copy descriptor? */
-+ zccd_get (zccd); /* extra ref (pages are shared) */
-+ if (zccd2 != NULL) /* 2nd user zero copy descriptor? */
-+ zccd_get (zccd2); /* extra ref (pages are shared) */
-+
- skb_release_data(skb);
-
- off = (data + nhead) - skb->head;
-@@ -684,6 +707,8 @@
- skb->cloned = 0;
- skb->nohdr = 0;
- atomic_set(&skb_shinfo(skb)->dataref, 1);
-+ skb_shinfo(skb)->zccd = zccd;
-+ skb_shinfo(skb)->zccd2 = zccd2;
- return 0;
-
- nodata:
-Index: linux-2.6.16.i686/net/ipv4/tcp.c
-===================================================================
---- linux-2.6.16.i686.orig/net/ipv4/tcp.c 2006-05-30 15:47:12.000000000 +0800
-+++ linux-2.6.16.i686/net/ipv4/tcp.c 2006-05-30 21:24:07.000000000 +0800
-@@ -498,8 +498,10 @@
- }
- }
-
-+/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
- static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
-- size_t psize, int flags)
-+ size_t psize, int flags, zccd_t *zccd)
-+
- {
- struct tcp_sock *tp = tcp_sk(sk);
- int mss_now, size_goal;
-@@ -547,6 +549,17 @@
- copy = size;
-
- i = skb_shinfo(skb)->nr_frags;
-+
-+ if (zccd != NULL && /* this is a zcc I/O */
-+ skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
-+ skb_shinfo(skb)->zccd2 != NULL &&
-+ skb_shinfo(skb)->zccd != zccd && /* not the same one */
-+ skb_shinfo(skb)->zccd2 != zccd)
-+ {
-+ tcp_mark_push (tp, skb);
-+ goto new_segment;
-+ }
-+
- can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= MAX_SKB_FRAGS) {
- tcp_mark_push(tp, skb);
-@@ -562,6 +575,20 @@
- skb_fill_page_desc(skb, i, page, offset, copy);
- }
-
-+ if (zccd != NULL && /* this is a zcc I/O */
-+ skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
-+ skb_shinfo(skb)->zccd2 != zccd)
-+ {
-+ zccd_get (zccd); /* bump ref count */
-+
-+ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
-+
-+ if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
-+ skb_shinfo(skb)->zccd = zccd;
-+ else
-+ skb_shinfo(skb)->zccd2 = zccd;
-+ }
-+
- skb->len += copy;
- skb->data_len += copy;
- skb->truesize += copy;
-@@ -631,12 +658,37 @@
-
- lock_sock(sk);
- TCP_CHECK_TIMER(sk);
-- res = do_tcp_sendpages(sk, &page, offset, size, flags);
-+ res = do_tcp_sendpages(sk, &page, offset, size, flags,NULL);
-+ TCP_CHECK_TIMER(sk);
-+ release_sock(sk);
-+ return res;
-+}
-+
-+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+ int flags, zccd_t *zccd)
-+{
-+ ssize_t res;
-+ struct sock *sk = sock->sk;
-+
-+#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
-+
-+ if (!(sk->sk_route_caps & NETIF_F_SG) || /* caller shouldn't waste her time */
-+ !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
-+ BUG ();
-+
-+#undef TCP_ZC_CSUM_FLAGS
-+
-+ lock_sock(sk);
-+ TCP_CHECK_TIMER(sk);
-+
-+ res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
-+
- TCP_CHECK_TIMER(sk);
- release_sock(sk);
- return res;
- }
-
-+
- #define TCP_PAGE(sk) (sk->sk_sndmsg_page)
- #define TCP_OFF(sk) (sk->sk_sndmsg_off)
-
-@@ -1406,6 +1458,202 @@
- goto out;
- }
-
-+int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
-+ int len, int nonblock)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ int copied;
-+ long timeo;
-+
-+ BUG_TRAP (len > 0);
-+ /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
-+
-+ lock_sock(sk);
-+
-+ TCP_CHECK_TIMER(sk);
-+
-+ copied = -ENOTCONN;
-+ if (sk->sk_state == TCP_LISTEN)
-+ goto out;
-+
-+ copied = 0;
-+ timeo = sock_rcvtimeo(sk, nonblock);
-+
-+ do {
-+ struct sk_buff * skb;
-+ u32 offset;
-+ unsigned long used;
-+ int exhausted;
-+ int eaten;
-+
-+ /* Are we at urgent data? Stop if we have read anything. */
-+ if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
-+ break;
-+
-+ /* We need to check signals first, to get correct SIGURG
-+ * handling. FIXME: Need to check this doesnt impact 1003.1g
-+ * and move it down to the bottom of the loop
-+ */
-+ if (signal_pending(current)) {
-+ if (copied)
-+ break;
-+ copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
-+ break;
-+ }
-+
-+ /* Next get a buffer. */
-+
-+ skb = skb_peek(&sk->sk_receive_queue);
-+
-+ if (skb == NULL) /* nothing ready */
-+ {
-+ if (copied) {
-+ if (sk->sk_err ||
-+ sk->sk_state == TCP_CLOSE ||
-+ (sk->sk_shutdown & RCV_SHUTDOWN) ||
-+ !timeo ||
-+ (0))
-+ break;
-+ } else {
-+ if (sock_flag(sk, SOCK_DONE))
-+ break;
-+
-+ if (sk->sk_err) {
-+ copied = sock_error(sk);
-+ break;
-+ }
-+
-+ if (sk->sk_shutdown & RCV_SHUTDOWN)
-+ break;
-+
-+ if (sk->sk_state == TCP_CLOSE) {
-+ if (!(sock_flag(sk, SOCK_DONE))) {
-+ /* This occurs when user tries to read
-+ * from never connected socket.
-+ */
-+ copied = -ENOTCONN;
-+ break;
-+ }
-+ break;
-+ }
-+
-+ if (!timeo) {
-+ copied = -EAGAIN;
-+ break;
-+ }
-+ }
-+
-+ cleanup_rbuf(sk, copied);
-+ sk_wait_data(sk, &timeo);
-+ continue;
-+ }
-+
-+ BUG_TRAP (atomic_read (&skb->users) == 1);
-+
-+ exhausted = eaten = 0;
-+
-+ offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
-+ if (skb->h.th->syn)
-+ offset--;
-+
-+ used = skb->len - offset;
-+
-+ if (tp->urg_data) {
-+ u32 urg_offset = tp->urg_seq - tp->copied_seq;
-+ if (urg_offset < used) {
-+ if (!urg_offset) { /* at urgent date */
-+ if (!(sock_flag(sk, SOCK_URGINLINE))) {
-+ tp->copied_seq++; /* discard the single byte of urgent data */
-+ offset++;
-+ used--;
-+ }
-+ } else /* truncate read */
-+ used = urg_offset;
-+ }
-+ }
-+
-+ BUG_TRAP (used >= 0);
-+ if (len < used)
-+ used = len;
-+
-+ if (used == 0)
-+ exhausted = 1;
-+ else
-+ {
-+ if (skb_is_nonlinear (skb))
-+ {
-+ int rc = skb_linearize (skb, GFP_KERNEL);
-+
-+ printk ("tcp_recvpackets(): linearising: %d\n", rc);
-+
-+ if (rc)
-+ {
-+ if (!copied)
-+ copied = rc;
-+ break;
-+ }
-+ }
-+
-+ if ((offset + used) == skb->len) /* consuming the whole packet */
-+ {
-+ __skb_unlink (skb, &sk->sk_receive_queue);
-+ dst_release (skb->dst);
-+ skb_orphan (skb);
-+ __skb_pull (skb, offset);
-+ __skb_queue_tail (packets, skb);
-+ exhausted = eaten = 1;
-+ }
-+ else /* consuming only part of the packet */
-+ {
-+ struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
-+
-+ if (skb2 == NULL)
-+ {
-+ if (!copied)
-+ copied = -ENOMEM;
-+ break;
-+ }
-+
-+ dst_release (skb2->dst);
-+ __skb_pull (skb2, offset);
-+ __skb_trim (skb2, used);
-+ __skb_queue_tail (packets, skb2);
-+ }
-+
-+ tp->copied_seq += used;
-+ copied += used;
-+ len -= used;
-+ }
-+
-+ if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
-+ tp->urg_data = 0;
-+ tcp_fast_path_check(sk, tp);
-+ }
-+
-+ if (!exhausted)
-+ continue;
-+
-+ if (skb->h.th->fin)
-+ {
-+ tp->copied_seq++;
-+ if (!eaten)
-+ sk_eat_skb (sk, skb);
-+ break;
-+ }
-+
-+ if (!eaten)
-+ sk_eat_skb (sk, skb);
-+
-+ } while (len > 0);
-+
-+ out:
-+ /* Clean up data we have read: This will do ACK frames. */
-+ cleanup_rbuf(sk, copied);
-+ TCP_CHECK_TIMER(sk);
-+ release_sock(sk);
-+ return copied;
-+}
-+
- /*
- * State processing on a close. This implements the state shift for
- * sending our FIN frame. Note that we only send a FIN for some
-@@ -2139,6 +2387,8 @@
- EXPORT_SYMBOL(tcp_recvmsg);
- EXPORT_SYMBOL(tcp_sendmsg);
- EXPORT_SYMBOL(tcp_sendpage);
-+EXPORT_SYMBOL(tcp_sendpage_zccd);
-+EXPORT_SYMBOL(tcp_recvpackets);
- EXPORT_SYMBOL(tcp_setsockopt);
- EXPORT_SYMBOL(tcp_shutdown);
- EXPORT_SYMBOL(tcp_statistics);
-Index: linux-2.6.16.i686/include/linux/skbuff.h
-===================================================================
---- linux-2.6.16.i686.orig/include/linux/skbuff.h 2006-05-30 15:47:11.000000000 +0800
-+++ linux-2.6.16.i686/include/linux/skbuff.h 2006-05-30 21:24:07.000000000 +0800
-@@ -128,6 +128,30 @@
- __u16 size;
- };
-
-+/* Support for callback when skb data has been released */
-+typedef struct zccd /* Zero Copy Callback Descriptor */
-+{ /* (embed as first member of custom struct) */
-+ atomic_t zccd_count; /* reference count */
-+ void (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
-+} zccd_t;
-+
-+static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
-+{
-+ atomic_set (&d->zccd_count, 1);
-+ d->zccd_destructor = callback;
-+}
-+
-+static inline void zccd_get (zccd_t *d) /* take a reference */
-+{
-+ atomic_inc (&d->zccd_count);
-+}
-+
-+static inline void zccd_put (zccd_t *d) /* release a reference */
-+{
-+ if (atomic_dec_and_test (&d->zccd_count))
-+ (d->zccd_destructor)(d);
-+}
-+
- /* This data is invariant across clones and lives at
- * the end of the header data, ie. at skb->end.
- */
-@@ -139,6 +163,13 @@
- unsigned short ufo_size;
- unsigned int ip6_frag_id;
- struct sk_buff *frag_list;
-+ zccd_t *zccd; /* zero copy descriptor */
-+ zccd_t *zccd2; /* 2nd zero copy descriptor */
-+ /* NB we expect zero-copy data to be at least 1 packet, so
-+ * having 2 zccds means we don't unneccessarily split the packet
-+ * where consecutive zero-copy sends abutt.
-+ */
-+
- skb_frag_t frags[MAX_SKB_FRAGS];
- };
-
-Index: linux-2.6.16.i686/include/net/tcp.h
-===================================================================
---- linux-2.6.16.i686.orig/include/net/tcp.h 2006-05-30 15:47:11.000000000 +0800
-+++ linux-2.6.16.i686/include/net/tcp.h 2006-05-30 21:24:07.000000000 +0800
-@@ -272,6 +272,9 @@
- extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size);
- extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
-+extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+ int flags, zccd_t *zccd);
-+
-
- extern int tcp_ioctl(struct sock *sk,
- int cmd,
-@@ -354,6 +357,9 @@
- struct msghdr *msg,
- size_t len, int nonblock,
- int flags, int *addr_len);
-+extern int tcp_recvpackets(struct sock *sk,
-+ struct sk_buff_head *packets,
-+ int len, int nonblock);
-
- extern void tcp_parse_options(struct sk_buff *skb,
- struct tcp_options_received *opt_rx,
+++ /dev/null
-Index: linux-2.6/net/core/skbuff.c
-===================================================================
---- linux-2.6.orig/net/core/skbuff.c 2006-07-15 21:08:45.000000000 +0800
-+++ linux-2.6/net/core/skbuff.c 2006-07-15 21:12:21.000000000 +0800
-@@ -183,7 +183,8 @@ struct sk_buff *__alloc_skb(unsigned int
- shinfo->gso_type = 0;
- shinfo->ip6_frag_id = 0;
- shinfo->frag_list = NULL;
--
-+ shinfo->zccd = NULL; /* skbuffs kick off with NO user zero copy descriptors */
-+ shinfo->zccd2 = NULL;
- if (fclone) {
- struct sk_buff *child = skb + 1;
- atomic_t *fclone_ref = (atomic_t *) (child + 1);
-@@ -283,6 +284,10 @@ static void skb_release_data(struct sk_b
- if (!skb->cloned ||
- !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
- &skb_shinfo(skb)->dataref)) {
-+ if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
-+ zccd_put (skb_shinfo(skb)->zccd); /* release hold */
-+ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
-+ zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
- if (skb_shinfo(skb)->nr_frags) {
- int i;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-@@ -618,6 +623,14 @@ struct sk_buff *pskb_copy(struct sk_buff
- n->data_len = skb->data_len;
- n->len = skb->len;
-
-+ if (skb_shinfo(skb)->zccd != NULL) /* user zero copy descriptor? */
-+ zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
-+ skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
-+
-+ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd user zero copy descriptor? */
-+ zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
-+ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
-+
- if (skb_shinfo(skb)->nr_frags) {
- int i;
-
-@@ -661,6 +674,9 @@ int pskb_expand_head(struct sk_buff *skb
- u8 *data;
- int size = nhead + (skb->end - skb->head) + ntail;
- long off;
-+ zccd_t *zccd = skb_shinfo(skb)->zccd; /* stash user zero copy descriptor */
-+ zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
-+
-
- if (skb_shared(skb))
- BUG();
-@@ -682,6 +698,11 @@ int pskb_expand_head(struct sk_buff *skb
- if (skb_shinfo(skb)->frag_list)
- skb_clone_fraglist(skb);
-
-+ if (zccd != NULL) /* user zero copy descriptor? */
-+ zccd_get (zccd); /* extra ref (pages are shared) */
-+ if (zccd2 != NULL) /* 2nd user zero copy descriptor? */
-+ zccd_get (zccd2); /* extra ref (pages are shared) */
-+
- skb_release_data(skb);
-
- off = (data + nhead) - skb->head;
-@@ -696,6 +717,8 @@ int pskb_expand_head(struct sk_buff *skb
- skb->cloned = 0;
- skb->nohdr = 0;
- atomic_set(&skb_shinfo(skb)->dataref, 1);
-+ skb_shinfo(skb)->zccd = zccd;
-+ skb_shinfo(skb)->zccd2 = zccd2;
- return 0;
-
- nodata:
-Index: linux-2.6/net/ipv4/tcp.c
-===================================================================
---- linux-2.6.orig/net/ipv4/tcp.c 2006-07-15 21:08:45.000000000 +0800
-+++ linux-2.6/net/ipv4/tcp.c 2006-07-15 22:32:12.000000000 +0800
-@@ -499,8 +499,10 @@ static inline void tcp_push(struct sock
- }
- }
-
-+/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
- static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
-- size_t psize, int flags)
-+ size_t psize, int flags, zccd_t *zccd)
-+
- {
- struct tcp_sock *tp = tcp_sk(sk);
- int mss_now, size_goal;
-@@ -548,6 +550,17 @@ new_segment:
- copy = size;
-
- i = skb_shinfo(skb)->nr_frags;
-+
-+ if (zccd != NULL && /* this is a zcc I/O */
-+ skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
-+ skb_shinfo(skb)->zccd2 != NULL &&
-+ skb_shinfo(skb)->zccd != zccd && /* not the same one */
-+ skb_shinfo(skb)->zccd2 != zccd)
-+ {
-+ tcp_mark_push (tp, skb);
-+ goto new_segment;
-+ }
-+
- can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= MAX_SKB_FRAGS) {
- tcp_mark_push(tp, skb);
-@@ -563,6 +576,20 @@ new_segment:
- skb_fill_page_desc(skb, i, page, offset, copy);
- }
-
-+ if (zccd != NULL && /* this is a zcc I/O */
-+ skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
-+ skb_shinfo(skb)->zccd2 != zccd)
-+ {
-+ zccd_get (zccd); /* bump ref count */
-+
-+ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
-+
-+ if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
-+ skb_shinfo(skb)->zccd = zccd;
-+ else
-+ skb_shinfo(skb)->zccd2 = zccd;
-+ }
-+
- skb->len += copy;
- skb->data_len += copy;
- skb->truesize += copy;
-@@ -628,12 +655,37 @@ ssize_t tcp_sendpage(struct socket *sock
-
- lock_sock(sk);
- TCP_CHECK_TIMER(sk);
-- res = do_tcp_sendpages(sk, &page, offset, size, flags);
-+ res = do_tcp_sendpages(sk, &page, offset, size, flags,NULL);
-+ TCP_CHECK_TIMER(sk);
-+ release_sock(sk);
-+ return res;
-+}
-+
-+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+ int flags, zccd_t *zccd)
-+{
-+ ssize_t res;
-+ struct sock *sk = sock->sk;
-+
-+#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
-+
-+ if (!(sk->sk_route_caps & NETIF_F_SG) || /* caller shouldn't waste her time */
-+ !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
-+ BUG ();
-+
-+#undef TCP_ZC_CSUM_FLAGS
-+
-+ lock_sock(sk);
-+ TCP_CHECK_TIMER(sk);
-+
-+ res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
-+
- TCP_CHECK_TIMER(sk);
- release_sock(sk);
- return res;
- }
-
-+
- #define TCP_PAGE(sk) (sk->sk_sndmsg_page)
- #define TCP_OFF(sk) (sk->sk_sndmsg_off)
-
-@@ -1477,6 +1529,202 @@ recv_urg:
- goto out;
- }
-
-+int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
-+ int len, int nonblock)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ int copied;
-+ long timeo;
-+
-+ BUG_TRAP (len > 0);
-+ /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
-+
-+ lock_sock(sk);
-+
-+ TCP_CHECK_TIMER(sk);
-+
-+ copied = -ENOTCONN;
-+ if (sk->sk_state == TCP_LISTEN)
-+ goto out;
-+
-+ copied = 0;
-+ timeo = sock_rcvtimeo(sk, nonblock);
-+
-+ do {
-+ struct sk_buff * skb;
-+ u32 offset;
-+ unsigned long used;
-+ int exhausted;
-+ int eaten;
-+
-+ /* Are we at urgent data? Stop if we have read anything. */
-+ if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
-+ break;
-+
-+ /* We need to check signals first, to get correct SIGURG
-+ * handling. FIXME: Need to check this doesnt impact 1003.1g
-+ * and move it down to the bottom of the loop
-+ */
-+ if (signal_pending(current)) {
-+ if (copied)
-+ break;
-+ copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
-+ break;
-+ }
-+
-+ /* Next get a buffer. */
-+
-+ skb = skb_peek(&sk->sk_receive_queue);
-+
-+ if (skb == NULL) /* nothing ready */
-+ {
-+ if (copied) {
-+ if (sk->sk_err ||
-+ sk->sk_state == TCP_CLOSE ||
-+ (sk->sk_shutdown & RCV_SHUTDOWN) ||
-+ !timeo ||
-+ (0))
-+ break;
-+ } else {
-+ if (sock_flag(sk, SOCK_DONE))
-+ break;
-+
-+ if (sk->sk_err) {
-+ copied = sock_error(sk);
-+ break;
-+ }
-+
-+ if (sk->sk_shutdown & RCV_SHUTDOWN)
-+ break;
-+
-+ if (sk->sk_state == TCP_CLOSE) {
-+ if (!(sock_flag(sk, SOCK_DONE))) {
-+ /* This occurs when user tries to read
-+ * from never connected socket.
-+ */
-+ copied = -ENOTCONN;
-+ break;
-+ }
-+ break;
-+ }
-+
-+ if (!timeo) {
-+ copied = -EAGAIN;
-+ break;
-+ }
-+ }
-+
-+ tcp_cleanup_rbuf(sk, copied);
-+ sk_wait_data(sk, &timeo);
-+ continue;
-+ }
-+
-+ BUG_TRAP (atomic_read (&skb->users) == 1);
-+
-+ exhausted = eaten = 0;
-+
-+ offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
-+ if (skb->h.th->syn)
-+ offset--;
-+
-+ used = skb->len - offset;
-+
-+ if (tp->urg_data) {
-+ u32 urg_offset = tp->urg_seq - tp->copied_seq;
-+ if (urg_offset < used) {
-+ if (!urg_offset) { /* at urgent date */
-+ if (!(sock_flag(sk, SOCK_URGINLINE))) {
-+ tp->copied_seq++; /* discard the single byte of urgent data */
-+ offset++;
-+ used--;
-+ }
-+ } else /* truncate read */
-+ used = urg_offset;
-+ }
-+ }
-+
-+ BUG_TRAP (used >= 0);
-+ if (len < used)
-+ used = len;
-+
-+ if (used == 0)
-+ exhausted = 1;
-+ else
-+ {
-+ if (skb_is_nonlinear (skb))
-+ {
-+ int rc = skb_linearize (skb);
-+
-+ printk ("tcp_recvpackets(): linearising: %d\n", rc);
-+
-+ if (rc)
-+ {
-+ if (!copied)
-+ copied = rc;
-+ break;
-+ }
-+ }
-+
-+ if ((offset + used) == skb->len) /* consuming the whole packet */
-+ {
-+ __skb_unlink (skb, &sk->sk_receive_queue);
-+ dst_release (skb->dst);
-+ skb_orphan (skb);
-+ __skb_pull (skb, offset);
-+ __skb_queue_tail (packets, skb);
-+ exhausted = eaten = 1;
-+ }
-+ else /* consuming only part of the packet */
-+ {
-+ struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
-+
-+ if (skb2 == NULL)
-+ {
-+ if (!copied)
-+ copied = -ENOMEM;
-+ break;
-+ }
-+
-+ dst_release (skb2->dst);
-+ __skb_pull (skb2, offset);
-+ __skb_trim (skb2, used);
-+ __skb_queue_tail (packets, skb2);
-+ }
-+
-+ tp->copied_seq += used;
-+ copied += used;
-+ len -= used;
-+ }
-+
-+ if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
-+ tp->urg_data = 0;
-+ tcp_fast_path_check(sk, tp);
-+ }
-+
-+ if (!exhausted)
-+ continue;
-+
-+ if (skb->h.th->fin)
-+ {
-+ tp->copied_seq++;
-+ if (!eaten)
-+ sk_eat_skb (sk, skb, 0);
-+ break;
-+ }
-+
-+ if (!eaten)
-+ sk_eat_skb (sk, skb, 0);
-+
-+ } while (len > 0);
-+
-+ out:
-+ /* Clean up data we have read: This will do ACK frames. */
-+ tcp_cleanup_rbuf(sk, copied);
-+ TCP_CHECK_TIMER(sk);
-+ release_sock(sk);
-+ return copied;
-+}
-+
- /*
- * State processing on a close. This implements the state shift for
- * sending our FIN frame. Note that we only send a FIN for some
-@@ -2345,6 +2593,8 @@ EXPORT_SYMBOL(tcp_read_sock);
- EXPORT_SYMBOL(tcp_recvmsg);
- EXPORT_SYMBOL(tcp_sendmsg);
- EXPORT_SYMBOL(tcp_sendpage);
-+EXPORT_SYMBOL(tcp_sendpage_zccd);
-+EXPORT_SYMBOL(tcp_recvpackets);
- EXPORT_SYMBOL(tcp_setsockopt);
- EXPORT_SYMBOL(tcp_shutdown);
- EXPORT_SYMBOL(tcp_statistics);
-Index: linux-2.6/include/linux/skbuff.h
-===================================================================
---- linux-2.6.orig/include/linux/skbuff.h 2006-07-15 21:08:45.000000000 +0800
-+++ linux-2.6/include/linux/skbuff.h 2006-07-15 21:12:21.000000000 +0800
-@@ -128,6 +128,30 @@ struct skb_frag_struct {
- __u16 size;
- };
-
-+/* Support for callback when skb data has been released */
-+typedef struct zccd /* Zero Copy Callback Descriptor */
-+{ /* (embed as first member of custom struct) */
-+ atomic_t zccd_count; /* reference count */
-+ void (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
-+} zccd_t;
-+
-+static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
-+{
-+ atomic_set (&d->zccd_count, 1);
-+ d->zccd_destructor = callback;
-+}
-+
-+static inline void zccd_get (zccd_t *d) /* take a reference */
-+{
-+ atomic_inc (&d->zccd_count);
-+}
-+
-+static inline void zccd_put (zccd_t *d) /* release a reference */
-+{
-+ if (atomic_dec_and_test (&d->zccd_count))
-+ (d->zccd_destructor)(d);
-+}
-+
- /* This data is invariant across clones and lives at
- * the end of the header data, ie. at skb->end.
- */
-@@ -140,6 +164,13 @@ struct skb_shared_info {
- unsigned short gso_type;
- unsigned int ip6_frag_id;
- struct sk_buff *frag_list;
-+ zccd_t *zccd; /* zero copy descriptor */
-+ zccd_t *zccd2; /* 2nd zero copy descriptor */
-+ /* NB we expect zero-copy data to be at least 1 packet, so
-+ * having 2 zccds means we don't unneccessarily split the packet
-+ * where consecutive zero-copy sends abutt.
-+ */
-+
- skb_frag_t frags[MAX_SKB_FRAGS];
- };
-
-Index: linux-2.6/include/net/tcp.h
-===================================================================
---- linux-2.6.orig/include/net/tcp.h 2006-07-15 21:08:45.000000000 +0800
-+++ linux-2.6/include/net/tcp.h 2006-07-15 21:12:21.000000000 +0800
-@@ -278,6 +278,9 @@ extern int tcp_v4_tw_remember_stam
- extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size);
- extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
-+extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+ int flags, zccd_t *zccd);
-+
-
- extern int tcp_ioctl(struct sock *sk,
- int cmd,
-@@ -368,6 +371,9 @@ extern int tcp_recvmsg(struct kiocb *i
- struct msghdr *msg,
- size_t len, int nonblock,
- int flags, int *addr_len);
-+extern int tcp_recvpackets(struct sock *sk,
-+ struct sk_buff_head *packets,
-+ int len, int nonblock);
-
- extern void tcp_parse_options(struct sk_buff *skb,
- struct tcp_options_received *opt_rx,