---- linux/./include/net/tcp.h 2006-10-10 01:49:23.000000000 +0100
-+++ ../2.6.9-41.2chaos/linux/./include/net/tcp.h 2006-09-21 17:15:21.000000000 +0100
-@@ -787,6 +787,8 @@ extern int tcp_v4_tw_remember_stam
- extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size);
- extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
-+extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+ int flags, struct zccd *zccd);
-
- extern int tcp_ioctl(struct sock *sk,
- int cmd,
---- linux/./include/linux/skbuff.h 2006-10-10 01:49:23.000000000 +0100
-+++ ../2.6.9-41.2chaos/linux/./include/linux/skbuff.h 2006-10-06 18:09:35.000000000 +0100
-@@ -134,6 +134,36 @@ struct skb_frag_struct {
- __u16 size;
- };
-
-+/* Zero Copy Callback Descriptor
-+ * This struct supports receiving notification when zero-copy network I/O has
-+ * completed. The ZCCD can be embedded in a struct containing the state of a
-+ * zero-copy network send. Every skbuff that references that send's pages also
-+ * keeps a reference on the ZCCD. When they have all been disposed of, the
-+ * reference count on the ZCCD drops to zero and the callback is made, telling
-+ * the original caller that the pages may now be overwritten. */
-+struct zccd
-+{
-+ atomic_t zccd_refcount;
-+ void (*zccd_callback)(struct zccd *);
-+};
-+
-+static inline void zccd_init (struct zccd *d, void (*callback)(struct zccd *))
-+{
-+ atomic_set (&d->zccd_refcount, 1);
-+ d->zccd_callback = callback;
-+}
-+
-+static inline void zccd_incref (struct zccd *d) /* take a reference */
-+{
-+ atomic_inc (&d->zccd_refcount);
-+}
-+
-+static inline void zccd_decref (struct zccd *d) /* release a reference */
-+{
-+ if (atomic_dec_and_test (&d->zccd_refcount))
-+ (d->zccd_callback)(d);
-+}
-+
- /* This data is invariant across clones and lives at
- * the end of the header data, ie. at skb->end.
- */
-@@ -143,6 +173,11 @@ struct skb_shared_info {
- unsigned short tso_size;
- unsigned short tso_segs;
- struct sk_buff *frag_list;
-+ struct zccd *zccd1;
-+ struct zccd *zccd2;
-+ /* NB zero-copy data is normally whole pages. We have 2 zccds in an
-+ * skbuff so we don't unneccessarily split the packet where pages fall
-+ * into the same packet. */
- skb_frag_t frags[MAX_SKB_FRAGS];
- };
-
-@@ -1070,6 +1105,23 @@ static inline void kunmap_skb_frag(void
- #endif
- }
-
-+/* This skbuf has dropped its pages: drop refs on any zero-copy callback
-+ * descriptors it has. */
-+static inline void skb_complete_zccd (struct sk_buff *skb)
-+{
-+ struct skb_shared_info *info = skb_shinfo(skb);
-+
-+ if (info->zccd1 != NULL) {
-+ zccd_decref(info->zccd1);
-+ info->zccd1 = NULL;
-+ }
-+
-+ if (info->zccd2 != NULL) {
-+ zccd_decref(info->zccd2);
-+ info->zccd2 = NULL;
-+ }
-+}
-+
- #define skb_queue_walk(queue, skb) \
- for (skb = (queue)->next, prefetch(skb->next); \
- (skb != (struct sk_buff *)(queue)); \
---- linux/./net/core/dev.c 2006-10-10 01:49:23.000000000 +0100
-+++ ../2.6.9-41.2chaos/linux/./net/core/dev.c 2006-09-21 16:53:45.000000000 +0100
-@@ -1140,6 +1140,8 @@ int __skb_linearize(struct sk_buff *skb,
- ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
- ninfo->nr_frags = 0;
- ninfo->frag_list = NULL;
-+ ninfo->zccd1 = NULL; /* zero copy completion callback */
-+ ninfo->zccd2 = NULL; /* not required */
-
- /* Offset between the two in bytes */
- offset = data - skb->head;
---- linux/./net/core/skbuff.c 2006-10-10 01:49:23.000000000 +0100
-+++ ../2.6.9-41.2chaos/linux/./net/core/skbuff.c 2006-10-10 01:46:16.000000000 +0100
-@@ -155,6 +155,8 @@ struct sk_buff *alloc_skb(unsigned int s
- skb_shinfo(skb)->tso_size = 0;
- skb_shinfo(skb)->tso_segs = 0;
- skb_shinfo(skb)->frag_list = NULL;
-+ skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */
-+ skb_shinfo(skb)->zccd2 = NULL; /* not required (yet) */
- out:
- return skb;
- nodata:
-@@ -189,6 +191,9 @@ void skb_release_data(struct sk_buff *sk
- {
- if (!skb->cloned ||
- atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
-+ /* complete zero-copy callbacks (if any) */
-+ skb_complete_zccd(skb);
-+
- if (skb_shinfo(skb)->nr_frags) {
- int i;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-@@ -484,6 +489,18 @@ struct sk_buff *pskb_copy(struct sk_buff
- get_page(skb_shinfo(n)->frags[i].page);
- }
- skb_shinfo(n)->nr_frags = i;
-+
-+ if (skb_shinfo(skb)->zccd1 != NULL) {
-+ BUG_TRAP(skb_shinfo(n)->zccd1 == NULL);
-+ skb_shinfo(n)->zccd1 = skb_shinfo(skb)->zccd1;
-+ zccd_incref(skb_shinfo(n)->zccd1);
-+ }
-+
-+ if (skb_shinfo(skb)->zccd2 != NULL) {
-+ BUG_TRAP(skb_shinfo(n)->zccd2 == NULL);
-+ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
-+ zccd_incref(skb_shinfo(n)->zccd2);
-+ }
- }
-
- if (skb_shinfo(skb)->frag_list) {
-@@ -533,6 +550,13 @@ int pskb_expand_head(struct sk_buff *skb
- memcpy(data + nhead, skb->head, skb->tail - skb->head);
- memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
-
-+ /* zero-copy descriptors have been copied into the new shinfo -
-+ * account the new references */
-+ if (skb_shinfo(skb)->zccd1 != NULL)
-+ zccd_incref(skb_shinfo(skb)->zccd1);
-+ if (skb_shinfo(skb)->zccd2 != NULL)
-+ zccd_incref(skb_shinfo(skb)->zccd2);
-+
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
-
-@@ -694,6 +718,9 @@ int ___pskb_trim(struct sk_buff *skb, un
- offset = end;
- }
-
-+ if (skb_shinfo(skb)->nr_frags == 0) /* dropped all the pages */
-+ skb_complete_zccd(skb); /* drop zccd refs */
-+
- if (offset < len) {
- skb->data_len -= skb->len - len;
- skb->len = len;
-@@ -846,6 +873,9 @@ pull_pages:
- }
- skb_shinfo(skb)->nr_frags = k;
-
-+ if (k == 0) /* dropped all the pages */
-+ skb_complete_zccd(skb); /* drop zccd refs */
-+
- skb->tail += delta;
- skb->data_len -= delta;
-
-@@ -1362,6 +1392,15 @@ static void inline skb_split_inside_head
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
-
-+ /* Transfer zero-copy callback descriptors */
-+ BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL);
-+ skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1;
-+ skb_shinfo(skb)->zccd1 = NULL;
-+
-+ BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL);
-+ skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2;
-+ skb_shinfo(skb)->zccd2 = NULL;
-+
- skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
- skb_shinfo(skb)->nr_frags = 0;
- skb1->data_len = skb->data_len;
-@@ -1410,6 +1449,30 @@ static void inline skb_split_no_header(s
- pos += size;
- }
- skb_shinfo(skb1)->nr_frags = k;
-+
-+ if (k != 0) {
-+ /* skb1 has pages. Transfer or clone the zccds */
-+
-+ if (skb_shinfo(skb)->zccd1 != NULL) {
-+ BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL);
-+ skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1;
-+
-+ if (skb_shinfo(skb)->nr_frags == 0)
-+ skb_shinfo(skb)->zccd1 = NULL;
-+ else
-+ zccd_incref(skb_shinfo(skb)->zccd1);
-+ }
-+
-+ if (skb_shinfo(skb)->zccd2 != NULL) {
-+ BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL);
-+ skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2;
-+
-+ if (skb_shinfo(skb)->nr_frags == 0)
-+ skb_shinfo(skb)->zccd2 = NULL;
-+ else
-+ zccd_incref(skb_shinfo(skb)->zccd2);
-+ }
-+ }
- }
-
- /**
---- linux/./net/ipv4/tcp_output.c 2006-09-21 00:13:11.000000000 +0100
-+++ ../2.6.9-41.2chaos/linux/./net/ipv4/tcp_output.c 2006-09-21 18:24:26.000000000 +0100
-@@ -562,6 +562,9 @@ static unsigned char *__pskb_trim_head(s
- }
- skb_shinfo(skb)->nr_frags = k;
-
-+ if (k == 0) /* dropped all pages */
-+ skb_complete_zccd(skb);
-+
- skb->tail = skb->data;
- skb->data_len -= len;
- skb->len = skb->data_len;
---- linux/./net/ipv4/tcp.c 2006-10-10 01:49:23.000000000 +0100
-+++ ../2.6.9-41.2chaos/linux/./net/ipv4/tcp.c 2006-10-09 19:03:15.000000000 +0100
-@@ -628,8 +628,9 @@ static inline void tcp_push(struct sock
- }
- }
-
-+/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
- static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
-- size_t psize, int flags)
-+ size_t psize, int flags, struct zccd *zccd)
- {
- struct tcp_opt *tp = tcp_sk(sk);
- int mss_now;
-@@ -676,6 +677,16 @@ new_segment:
- copy = size;
-
- i = skb_shinfo(skb)->nr_frags;
-+
-+ if (zccd != NULL && /* completion callback wanted */
-+ skb_shinfo(skb)->zccd1 != NULL && /* no room for zccd */
-+ skb_shinfo(skb)->zccd2 != NULL &&
-+ skb_shinfo(skb)->zccd1 != zccd && /* room needed */
-+ skb_shinfo(skb)->zccd2 != zccd) {
-+ tcp_mark_push (tp, skb);
-+ goto new_segment;
-+ }
-+
- can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= MAX_SKB_FRAGS) {
- tcp_mark_push(tp, skb);
-@@ -692,6 +703,18 @@ new_segment:
- skb_fill_page_desc(skb, i, page, offset, copy);
- }
-
-+ if (zccd != NULL && /* completion callback wanted */
-+ skb_shinfo(skb)->zccd1 != zccd && /* new to this skbuf */
-+ skb_shinfo(skb)->zccd2 != zccd) {
-+ if (skb_shinfo(skb)->zccd1 == NULL) {
-+ skb_shinfo(skb)->zccd1 = zccd;
-+ } else {
-+ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
-+ skb_shinfo(skb)->zccd2 = zccd;
-+ }
-+ zccd_incref(zccd); /* new reference */
-+ }
-+
- skb->len += copy;
- skb->data_len += copy;
- skb->truesize += copy;
-@@ -744,8 +767,8 @@ out_err:
- return sk_stream_error(sk, flags, err);
- }
-
--ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
-- size_t size, int flags)
-+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset,
-+ size_t size, int flags, struct zccd *zccd)
- {
- ssize_t res;
- struct sock *sk = sock->sk;
-@@ -760,12 +783,18 @@ ssize_t tcp_sendpage(struct socket *sock
-
- lock_sock(sk);
- TCP_CHECK_TIMER(sk);
-- res = do_tcp_sendpages(sk, &page, offset, size, flags);
-+ res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
- TCP_CHECK_TIMER(sk);
- release_sock(sk);
- return res;
- }
-
-+ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
-+ size_t size, int flags)
-+{
-+ return tcp_sendpage_zccd(sock, page, offset, size, flags, NULL);
-+}
-+
- #define TCP_PAGE(sk) (sk->sk_sndmsg_page)
- #define TCP_OFF(sk) (sk->sk_sndmsg_off)
-
-@@ -2343,6 +2372,7 @@ EXPORT_SYMBOL(tcp_read_sock);
- EXPORT_SYMBOL(tcp_recvmsg);
- EXPORT_SYMBOL(tcp_sendmsg);
- EXPORT_SYMBOL(tcp_sendpage);
-+EXPORT_SYMBOL(tcp_sendpage_zccd);
- EXPORT_SYMBOL(tcp_setsockopt);
- EXPORT_SYMBOL(tcp_shutdown);
- EXPORT_SYMBOL(tcp_statistics);