From: eeb Date: Mon, 23 Apr 2007 13:49:51 +0000 (+0000) Subject: * Removed the (new) tcp zero-copy patches X-Git-Tag: v1_7_100~167 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=f30db7aa0f565e9903f0de84ba2b0027e80992e5 * Removed the (new) tcp zero-copy patches --- diff --git a/lustre/kernel_patches/patches/new-tcp-zero-copy-2.4.21-rhel3.patch b/lustre/kernel_patches/patches/new-tcp-zero-copy-2.4.21-rhel3.patch deleted file mode 100644 index 8ce670f..0000000 --- a/lustre/kernel_patches/patches/new-tcp-zero-copy-2.4.21-rhel3.patch +++ /dev/null @@ -1,330 +0,0 @@ -diff -uNr linux-2.4.21-32.0.1.EL/include/linux/skbuff.h linux-2.4.21-32.0.1.EL-newzc/include/linux/skbuff.h ---- linux-2.4.21-32.0.1.EL/include/linux/skbuff.h 2005-06-02 05:51:57.000000000 +0300 -+++ linux-2.4.21-32.0.1.EL-newzc/include/linux/skbuff.h 2006-10-23 23:54:31.000000000 +0300 -@@ -116,6 +116,36 @@ - __u16 size; - }; - -+/* Zero Copy Callback Descriptor -+ * This struct supports receiving notification when zero-copy network I/O has -+ * completed. The ZCCD can be embedded in a struct containing the state of a -+ * zero-copy network send. Every skbuff that references that send's pages also -+ * keeps a reference on the ZCCD. When they have all been disposed of, the -+ * reference count on the ZCCD drops to zero and the callback is made, telling -+ * the original caller that the pages may now be overwritten. */ -+struct zccd -+{ -+ atomic_t zccd_refcount; -+ void (*zccd_callback)(struct zccd *); -+}; -+ -+static inline void zccd_init (struct zccd *d, void (*callback)(struct zccd *)) -+{ -+ atomic_set (&d->zccd_refcount, 1); -+ d->zccd_callback = callback; -+} -+ -+static inline void zccd_incref (struct zccd *d) /* take a reference */ -+{ -+ atomic_inc (&d->zccd_refcount); -+} -+ -+static inline void zccd_decref (struct zccd *d) /* release a reference */ -+{ -+ if (atomic_dec_and_test (&d->zccd_refcount)) -+ (d->zccd_callback)(d); -+} -+ - /* This data is invariant across clones and lives at - * the end of the header data, ie. at skb->end. - */ -@@ -123,6 +153,11 @@ - atomic_t dataref; - unsigned int nr_frags; - struct sk_buff *frag_list; -+ struct zccd *zccd1; -+ struct zccd *zccd2; -+ /* NB zero-copy data is normally whole pages. We have 2 zccds in an -+ * skbuff so we don't unneccessarily split the packet where pages fall -+ * into the same packet. */ - skb_frag_t frags[MAX_SKB_FRAGS]; - }; - -@@ -1153,6 +1188,23 @@ - #endif - } - -+/* This skbuf has dropped its pages: drop refs on any zero-copy callback -+ * descriptors it has. */ -+static inline void skb_complete_zccd (struct sk_buff *skb) -+{ -+ struct skb_shared_info *info = skb_shinfo(skb); -+ -+ if (info->zccd1 != NULL) { -+ zccd_decref(info->zccd1); -+ info->zccd1 = NULL; -+ } -+ -+ if (info->zccd2 != NULL) { -+ zccd_decref(info->zccd2); -+ info->zccd2 = NULL; -+ } -+} -+ - #define skb_queue_walk(queue, skb) \ - for (skb = (queue)->next; \ - (skb != (struct sk_buff *)(queue)); \ -diff -uNr linux-2.4.21-32.0.1.EL/include/net/tcp.h linux-2.4.21-32.0.1.EL-newzc/include/net/tcp.h ---- linux-2.4.21-32.0.1.EL/include/net/tcp.h 2005-06-02 05:51:51.000000000 +0300 -+++ linux-2.4.21-32.0.1.EL-newzc/include/net/tcp.h 2006-10-23 23:54:37.000000000 +0300 -@@ -636,6 +636,8 @@ - - extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size); - extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); -+extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size, -+ int flags, struct zccd *zccd); - - extern int tcp_ioctl(struct sock *sk, - int cmd, -diff -uNr linux-2.4.21-32.0.1.EL/net/core/skbuff.c linux-2.4.21-32.0.1.EL-newzc/net/core/skbuff.c ---- linux-2.4.21-32.0.1.EL/net/core/skbuff.c 2005-06-02 05:51:57.000000000 +0300 -+++ linux-2.4.21-32.0.1.EL-newzc/net/core/skbuff.c 2006-10-23 23:44:10.000000000 +0300 -@@ -210,6 +210,9 @@ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; -+ skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */ -+ skb_shinfo(skb)->zccd2 = NULL; /* not required (yet) */ -+ - return skb; - - nodata: -@@ -280,6 +283,9 @@ - { - if (!skb->cloned || - atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { -+ /* complete zero-copy callbacks (if any) */ -+ skb_complete_zccd(skb); -+ - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) -@@ -550,6 +556,8 @@ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; -+ skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */ -+ skb_shinfo(skb)->zccd2 = NULL; /* not required */ - - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; -@@ -604,6 +612,18 @@ - get_page(skb_shinfo(n)->frags[i].page); - } - skb_shinfo(n)->nr_frags = i; -+ -+ if (skb_shinfo(skb)->zccd1 != NULL) { -+ BUG_TRAP(skb_shinfo(n)->zccd1 = NULL); -+ skb_shinfo(n)->zccd1 = skb_shinfo(skb)->zccd1; -+ zccd_incref(skb_shinfo(n)->zccd1); -+ } -+ -+ if (skb_shinfo(skb)->zccd2 != NULL) { -+ BUG_TRAP(skb_shinfo(n)->zccd2 = NULL); -+ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2; -+ zccd_incref(skb_shinfo(n)->zccd2); -+ } - } - - if (skb_shinfo(skb)->frag_list) { -@@ -653,6 +673,13 @@ - memcpy(data+nhead, skb->head, skb->tail-skb->head); - memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); - -+ /* zero-copy descriptors have been copied into the new shinfo - -+ * account the new references */ -+ if (skb_shinfo(skb)->zccd1 != NULL) -+ zccd_incref(skb_shinfo(skb)->zccd1); -+ if (skb_shinfo(skb)->zccd2 != NULL) -+ zccd_incref(skb_shinfo(skb)->zccd2); -+ - for (i=0; inr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); - -@@ -809,6 +836,9 @@ - offset = end; - } - -+ if (skb_shinfo(skb)->nr_frags == 0) /* dropped all the pages */ -+ skb_complete_zccd(skb); /* drop zccd refs */ -+ - if (offset < len) { - skb->data_len -= skb->len - len; - skb->len = len; -@@ -962,6 +992,9 @@ - } - skb_shinfo(skb)->nr_frags = k; - -+ if (k == 0) /* dropped all the pages */ -+ skb_complete_zccd(skb); /* drop zccd refs */ -+ - skb->tail += delta; - skb->data_len -= delta; - -diff -uNr linux-2.4.21-32.0.1.EL/net/ipv4/tcp.c linux-2.4.21-32.0.1.EL-newzc/net/ipv4/tcp.c ---- linux-2.4.21-32.0.1.EL/net/ipv4/tcp.c 2005-06-02 05:51:51.000000000 +0300 -+++ linux-2.4.21-32.0.1.EL-newzc/net/ipv4/tcp.c 2006-10-24 00:10:34.000000000 +0300 -@@ -1015,7 +1015,7 @@ - goto out; - } - --ssize_t do_tcp_sendpages(struct sock *sk, struct kveclet *let, int poffset, size_t psize, int flags); -+ssize_t do_tcp_sendpages(struct sock *sk, struct kveclet *let, int poffset, size_t psize, int flags, struct zccd *zccd); - - static inline int - can_coalesce(struct sk_buff *skb, int i, struct page *page, int off) -@@ -1094,7 +1094,7 @@ - return err; - } - --ssize_t do_tcp_sendpages(struct sock *sk, struct kveclet *let, int poffset, size_t psize, int flags) -+ssize_t do_tcp_sendpages(struct sock *sk, struct kveclet *let, int poffset, size_t psize, int flags, struct zccd *zccd) - { - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int mss_now; -@@ -1147,6 +1147,17 @@ - copy = size; - - i = skb_shinfo(skb)->nr_frags; -+ -+ if (zccd != NULL && /* this is a zcc I/O */ -+ skb_shinfo(skb)->zccd1 != NULL && /* skb is part of a zcc I/O */ -+ skb_shinfo(skb)->zccd2 != NULL && -+ skb_shinfo(skb)->zccd1 != zccd && /* not the same one */ -+ skb_shinfo(skb)->zccd2 != zccd) -+ { -+ tcp_mark_push (tp, skb); -+ goto new_segment; -+ } -+ - if (can_coalesce(skb, i, page, offset)) { - skb_shinfo(skb)->frags[i-1].size += copy; - } else if (i < MAX_SKB_FRAGS) { -@@ -1157,6 +1168,18 @@ - goto new_segment; - } - -+ if (zccd != NULL && /* completion callback wanted */ -+ skb_shinfo(skb)->zccd1 != zccd && /* new to this skbuf */ -+ skb_shinfo(skb)->zccd2 != zccd) { -+ if (skb_shinfo(skb)->zccd1 == NULL) { -+ skb_shinfo(skb)->zccd1 = zccd; -+ } else { -+ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL); -+ skb_shinfo(skb)->zccd2 = zccd; -+ } -+ zccd_incref(zccd); /* new reference */ -+ } -+ - skb->len += copy; - skb->data_len += copy; - skb->ip_summed = CHECKSUM_HW; -@@ -1209,7 +1232,8 @@ - return tcp_error(sk, flags, err); - } - --ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) -+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, -+ size_t size, int flags, struct zccd *zccd) - { - struct kveclet let = { page, offset, size }; - ssize_t res; -@@ -1224,12 +1248,18 @@ - - lock_sock(sk); - TCP_CHECK_TIMER(sk); -- res = do_tcp_sendpages(sk, &let, 0, size, flags); -+ res = do_tcp_sendpages(sk, &let, 0, size, flags, zccd); - TCP_CHECK_TIMER(sk); - release_sock(sk); - return res; - } - -+ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) -+{ -+ return tcp_sendpage_zccd(sock, page, offset, size, flags, NULL); -+} -+ -+ - static void tcp_kvec_write_worker(struct tcp_write_async_info *info) - { - struct sock *sk = info->sk; -@@ -1238,7 +1268,7 @@ - !(sk->route_caps & TCP_ZC_CSUM_FLAGS)) - BUG(); - -- res = do_tcp_sendpages(sk, info->cur_let, info->offset, info->len - info->done, MSG_DONTWAIT); -+ res = do_tcp_sendpages(sk, info->cur_let, info->offset, info->len - info->done, MSG_DONTWAIT, NULL); - if (res > 0) - info->done += res; - -diff -uNr linux-2.4.21-32.0.1.EL/net/ipv4/tcp_output.c linux-2.4.21-32.0.1.EL-newzc/net/ipv4/tcp_output.c ---- linux-2.4.21-32.0.1.EL/net/ipv4/tcp_output.c 2005-06-02 05:51:50.000000000 +0300 -+++ linux-2.4.21-32.0.1.EL-newzc/net/ipv4/tcp_output.c 2006-10-23 23:44:10.000000000 +0300 -@@ -363,6 +363,15 @@ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; - -+ /* Transfer zero-copy callback descriptors */ -+ BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL); -+ skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1; -+ skb_shinfo(skb)->zccd1 = NULL; -+ -+ BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL); -+ skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2; -+ skb_shinfo(skb)->zccd2 = NULL; -+ - skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; - skb_shinfo(skb)->nr_frags = 0; - -@@ -409,6 +418,30 @@ - pos += size; - } - skb_shinfo(skb1)->nr_frags = k; -+ -+ if (k != 0) { -+ /* skb1 has pages. Transfer or clone the zccds */ -+ -+ if (skb_shinfo(skb)->zccd1 != NULL) { -+ BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL); -+ skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1; -+ -+ if (skb_shinfo(skb)->nr_frags == 0) -+ skb_shinfo(skb)->zccd1 = NULL; -+ else -+ zccd_incref(skb_shinfo(skb)->zccd1); -+ } -+ -+ if (skb_shinfo(skb)->zccd2 != NULL) { -+ BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL); -+ skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2; -+ -+ if (skb_shinfo(skb)->nr_frags == 0) -+ skb_shinfo(skb)->zccd2 = NULL; -+ else -+ zccd_incref(skb_shinfo(skb)->zccd2); -+ } -+ } - } - } - ---- linux-2.4.21-32.0.1.EL/net/netsyms.c 2005-06-02 05:51:50.000000000 +0300 -+++ linux-2.4.21-32.0.1.EL-newzc/net/netsyms.c 2006-10-23 23:44:10.000000000 +0300 -@@ -424,6 +424,7 @@ - EXPORT_SYMBOL(ip_generic_getfrag); - - #endif -+EXPORT_SYMBOL(tcp_sendpage_zccd); - EXPORT_SYMBOL(tcp_read_sock); - - EXPORT_SYMBOL(netlink_set_err); diff --git a/lustre/kernel_patches/patches/new-tcp-zero-copy-2.4.29-vanilla.patch b/lustre/kernel_patches/patches/new-tcp-zero-copy-2.4.29-vanilla.patch deleted file mode 100644 index c630c81..0000000 --- a/lustre/kernel_patches/patches/new-tcp-zero-copy-2.4.29-vanilla.patch +++ /dev/null @@ -1,317 +0,0 @@ ---- linux-2.4.29-orig/include/linux/skbuff.h 2006-10-10 01:25:07.000000000 +0100 -+++ linux-2.4.29/include/linux/skbuff.h 2006-10-10 00:42:59.000000000 +0100 -@@ -116,6 +116,36 @@ struct skb_frag_struct - __u16 size; - }; - -+/* Zero Copy Callback Descriptor -+ * This struct supports receiving notification when zero-copy network I/O has -+ * completed. The ZCCD can be embedded in a struct containing the state of a -+ * zero-copy network send. Every skbuff that references that send's pages also -+ * keeps a reference on the ZCCD. When they have all been disposed of, the -+ * reference count on the ZCCD drops to zero and the callback is made, telling -+ * the original caller that the pages may now be overwritten. */ -+struct zccd -+{ -+ atomic_t zccd_refcount; -+ void (*zccd_callback)(struct zccd *); -+}; -+ -+static inline void zccd_init (struct zccd *d, void (*callback)(struct zccd *)) -+{ -+ atomic_set (&d->zccd_refcount, 1); -+ d->zccd_callback = callback; -+} -+ -+static inline void zccd_incref (struct zccd *d) /* take a reference */ -+{ -+ atomic_inc (&d->zccd_refcount); -+} -+ -+static inline void zccd_decref (struct zccd *d) /* release a reference */ -+{ -+ if (atomic_dec_and_test (&d->zccd_refcount)) -+ (d->zccd_callback)(d); -+} -+ - /* This data is invariant across clones and lives at - * the end of the header data, ie. at skb->end. - */ -@@ -123,6 +153,11 @@ struct skb_shared_info { - atomic_t dataref; - unsigned int nr_frags; - struct sk_buff *frag_list; -+ struct zccd *zccd1; -+ struct zccd *zccd2; -+ /* NB zero-copy data is normally whole pages. We have 2 zccds in an -+ * skbuff so we don't unneccessarily split the packet where pages fall -+ * into the same packet. */ - skb_frag_t frags[MAX_SKB_FRAGS]; - }; - -@@ -1131,6 +1166,23 @@ static inline void kunmap_skb_frag(void - #endif - } - -+/* This skbuf has dropped its pages: drop refs on any zero-copy callback -+ * descriptors it has. */ -+static inline void skb_complete_zccd (struct sk_buff *skb) -+{ -+ struct skb_shared_info *info = skb_shinfo(skb); -+ -+ if (info->zccd1 != NULL) { -+ zccd_decref(info->zccd1); -+ info->zccd1 = NULL; -+ } -+ -+ if (info->zccd2 != NULL) { -+ zccd_decref(info->zccd2); -+ info->zccd2 = NULL; -+ } -+} -+ - #define skb_queue_walk(queue, skb) \ - for (skb = (queue)->next; \ - (skb != (struct sk_buff *)(queue)); \ ---- linux-2.4.29-orig/include/net/tcp.h 2006-10-10 01:25:07.000000000 +0100 -+++ linux-2.4.29/include/net/tcp.h 2006-10-10 00:43:26.000000000 +0100 -@@ -674,6 +674,8 @@ extern int tcp_v4_tw_remember_stam - - extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size); - extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); -+extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size, -+ int flags, struct zccd *zccd); - - extern int tcp_ioctl(struct sock *sk, - int cmd, ---- linux-2.4.29-orig/net/core/skbuff.c 2006-10-10 01:25:08.000000000 +0100 -+++ linux-2.4.29/net/core/skbuff.c 2006-10-10 02:03:49.000000000 +0100 -@@ -208,6 +208,9 @@ struct sk_buff *alloc_skb(unsigned int s - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; -+ skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */ -+ skb_shinfo(skb)->zccd2 = NULL; /* not required (yet) */ -+ - return skb; - - nodata: -@@ -277,6 +280,9 @@ static void skb_release_data(struct sk_b - { - if (!skb->cloned || - atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { -+ /* complete zero-copy callbacks (if any) */ -+ skb_complete_zccd(skb); -+ - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) -@@ -535,6 +541,8 @@ int skb_linearize(struct sk_buff *skb, i - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; -+ skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */ -+ skb_shinfo(skb)->zccd2 = NULL; /* not required */ - - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; -@@ -589,6 +597,18 @@ struct sk_buff *pskb_copy(struct sk_buff - get_page(skb_shinfo(n)->frags[i].page); - } - skb_shinfo(n)->nr_frags = i; -+ -+ if (skb_shinfo(skb)->zccd1 != NULL) { -+ BUG_TRAP(skb_shinfo(n)->zccd1 = NULL); -+ skb_shinfo(n)->zccd1 = skb_shinfo(skb)->zccd1; -+ zccd_incref(skb_shinfo(n)->zccd1); -+ } -+ -+ if (skb_shinfo(skb)->zccd2 != NULL) { -+ BUG_TRAP(skb_shinfo(n)->zccd2 = NULL); -+ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2; -+ zccd_incref(skb_shinfo(n)->zccd2); -+ } - } - - if (skb_shinfo(skb)->frag_list) { -@@ -638,6 +658,13 @@ int pskb_expand_head(struct sk_buff *skb - memcpy(data+nhead, skb->head, skb->tail-skb->head); - memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); - -+ /* zero-copy descriptors have been copied into the new shinfo - -+ * account the new references */ -+ if (skb_shinfo(skb)->zccd1 != NULL) -+ zccd_incref(skb_shinfo(skb)->zccd1); -+ if (skb_shinfo(skb)->zccd2 != NULL) -+ zccd_incref(skb_shinfo(skb)->zccd2); -+ - for (i=0; inr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); - -@@ -794,6 +821,9 @@ int ___pskb_trim(struct sk_buff *skb, un - offset = end; - } - -+ if (skb_shinfo(skb)->nr_frags == 0) /* dropped all the pages */ -+ skb_complete_zccd(skb); /* drop zccd refs */ -+ - if (offset < len) { - skb->data_len -= skb->len - len; - skb->len = len; -@@ -947,6 +977,9 @@ pull_pages: - } - skb_shinfo(skb)->nr_frags = k; - -+ if (k == 0) /* dropped all the pages */ -+ skb_complete_zccd(skb); /* drop zccd refs */ -+ - skb->tail += delta; - skb->data_len -= delta; - ---- linux-2.4.29-orig/net/ipv4/tcp.c 2006-10-10 01:25:08.000000000 +0100 -+++ linux-2.4.29/net/ipv4/tcp.c 2006-10-09 20:53:28.000000000 +0100 -@@ -749,7 +749,8 @@ do_interrupted: - goto out; - } - --ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags); -+ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, -+ struct zccd *zccd); - - static inline int - can_coalesce(struct sk_buff *skb, int i, struct page *page, int off) -@@ -828,7 +829,8 @@ static int tcp_error(struct sock *sk, in - return err; - } - --ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags) -+ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, -+ struct zccd *zccd) - { - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int mss_now; -@@ -876,6 +878,17 @@ new_segment: - copy = size; - - i = skb_shinfo(skb)->nr_frags; -+ -+ if (zccd != NULL && /* this is a zcc I/O */ -+ skb_shinfo(skb)->zccd1 != NULL && /* skb is part of a zcc I/O */ -+ skb_shinfo(skb)->zccd2 != NULL && -+ skb_shinfo(skb)->zccd1 != zccd && /* not the same one */ -+ skb_shinfo(skb)->zccd2 != zccd) -+ { -+ tcp_mark_push (tp, skb); -+ goto new_segment; -+ } -+ - if (can_coalesce(skb, i, page, offset)) { - skb_shinfo(skb)->frags[i-1].size += copy; - } else if (i < MAX_SKB_FRAGS) { -@@ -886,6 +899,18 @@ new_segment: - goto new_segment; - } - -+ if (zccd != NULL && /* completion callback wanted */ -+ skb_shinfo(skb)->zccd1 != zccd && /* new to this skbuf */ -+ skb_shinfo(skb)->zccd2 != zccd) { -+ if (skb_shinfo(skb)->zccd1 == NULL) { -+ skb_shinfo(skb)->zccd1 = zccd; -+ } else { -+ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL); -+ skb_shinfo(skb)->zccd2 = zccd; -+ } -+ zccd_incref(zccd); /* new reference */ -+ } -+ - skb->len += copy; - skb->data_len += copy; - skb->ip_summed = CHECKSUM_HW; -@@ -934,7 +959,8 @@ out_err: - return tcp_error(sk, flags, err); - } - --ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) -+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, -+ size_t size, int flags, struct zccd *zccd) - { - ssize_t res; - struct sock *sk = sock->sk; -@@ -949,12 +975,17 @@ ssize_t tcp_sendpage(struct socket *sock - - lock_sock(sk); - TCP_CHECK_TIMER(sk); -- res = do_tcp_sendpages(sk, &page, offset, size, flags); -+ res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd); - TCP_CHECK_TIMER(sk); - release_sock(sk); - return res; - } - -+ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) -+{ -+ return tcp_sendpage_zccd(sock, page, offset, size, flags, NULL); -+} -+ - #define TCP_PAGE(sk) (sk->tp_pinfo.af_tcp.sndmsg_page) - #define TCP_OFF(sk) (sk->tp_pinfo.af_tcp.sndmsg_off) - ---- linux-2.4.29-orig/net/ipv4/tcp_output.c 2004-11-17 11:54:22.000000000 +0000 -+++ linux-2.4.29/net/ipv4/tcp_output.c 2006-10-10 01:55:29.000000000 +0100 -@@ -379,6 +379,15 @@ static void skb_split(struct sk_buff *sk - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; - -+ /* Transfer zero-copy callback descriptors */ -+ BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL); -+ skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1; -+ skb_shinfo(skb)->zccd1 = NULL; -+ -+ BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL); -+ skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2; -+ skb_shinfo(skb)->zccd2 = NULL; -+ - skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; - skb_shinfo(skb)->nr_frags = 0; - -@@ -425,6 +434,30 @@ static void skb_split(struct sk_buff *sk - pos += size; - } - skb_shinfo(skb1)->nr_frags = k; -+ -+ if (k != 0) { -+ /* skb1 has pages. Transfer or clone the zccds */ -+ -+ if (skb_shinfo(skb)->zccd1 != NULL) { -+ BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL); -+ skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1; -+ -+ if (skb_shinfo(skb)->nr_frags == 0) -+ skb_shinfo(skb)->zccd1 = NULL; -+ else -+ zccd_incref(skb_shinfo(skb)->zccd1); -+ } -+ -+ if (skb_shinfo(skb)->zccd2 != NULL) { -+ BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL); -+ skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2; -+ -+ if (skb_shinfo(skb)->nr_frags == 0) -+ skb_shinfo(skb)->zccd2 = NULL; -+ else -+ zccd_incref(skb_shinfo(skb)->zccd2); -+ } -+ } - } - } - ---- linux-2.4.29.orig/net/netsyms.c 2003-11-03 23:22:13.000000000 +0300 -+++ linux-2.4.29/net/netsyms.c 2003-12-04 20:42:50.000000000 +0300 -@@ -417,6 +417,7 @@ - - #endif - -+EXPORT_SYMBOL(tcp_sendpage_zccd); - EXPORT_SYMBOL(tcp_read_sock); - - EXPORT_SYMBOL(netlink_set_err); diff --git a/lustre/kernel_patches/patches/new-tcp-zero-copy-2.6.5-sles9.patch b/lustre/kernel_patches/patches/new-tcp-zero-copy-2.6.5-sles9.patch deleted file mode 100644 index 199da8d..0000000 --- a/lustre/kernel_patches/patches/new-tcp-zero-copy-2.6.5-sles9.patch +++ /dev/null @@ -1,329 +0,0 @@ -diff -ur linux-2.6.5-7.252/include/linux/skbuff.h linux-2.6.5-7.252-tcp/include/linux/skbuff.h ---- linux-2.6.5-7.252/include/linux/skbuff.h 2006-10-11 21:46:38.000000000 +0300 -+++ linux-2.6.5-7.252-tcp/include/linux/skbuff.h 2006-10-11 21:52:56.000000000 +0300 -@@ -135,6 +135,36 @@ - __u16 size; - }; - -+/* Zero Copy Callback Descriptor -+ * This struct supports receiving notification when zero-copy network I/O has -+ * completed. The ZCCD can be embedded in a struct containing the state of a -+ * zero-copy network send. Every skbuff that references that send's pages also -+ * keeps a reference on the ZCCD. When they have all been disposed of, the -+ * reference count on the ZCCD drops to zero and the callback is made, telling -+ * the original caller that the pages may now be overwritten. */ -+struct zccd -+{ -+ atomic_t zccd_refcount; -+ void (*zccd_callback)(struct zccd *); -+}; -+ -+static inline void zccd_init (struct zccd *d, void (*callback)(struct zccd *)) -+{ -+ atomic_set (&d->zccd_refcount, 1); -+ d->zccd_callback = callback; -+} -+ -+static inline void zccd_incref (struct zccd *d) /* take a reference */ -+{ -+ atomic_inc (&d->zccd_refcount); -+} -+ -+static inline void zccd_decref (struct zccd *d) /* release a reference */ -+{ -+ if (atomic_dec_and_test (&d->zccd_refcount)) -+ (d->zccd_callback)(d); -+} -+ - /* This data is invariant across clones and lives at - * the end of the header data, ie. at skb->end. - */ -@@ -144,6 +174,11 @@ - unsigned short tso_size; - unsigned short tso_segs; - struct sk_buff *frag_list; -+ struct zccd *zccd1; -+ struct zccd *zccd2; -+ /* NB zero-copy data is normally whole pages. We have 2 zccds in an -+ * skbuff so we don't unneccessarily split the packet where pages fall -+ * into the same packet. */ - skb_frag_t frags[MAX_SKB_FRAGS]; - }; - -@@ -1152,6 +1187,23 @@ - #endif - } - -+/* This skbuf has dropped its pages: drop refs on any zero-copy callback -+ * descriptors it has. */ -+static inline void skb_complete_zccd (struct sk_buff *skb) -+{ -+ struct skb_shared_info *info = skb_shinfo(skb); -+ -+ if (info->zccd1 != NULL) { -+ zccd_decref(info->zccd1); -+ info->zccd1 = NULL; -+ } -+ -+ if (info->zccd2 != NULL) { -+ zccd_decref(info->zccd2); -+ info->zccd2 = NULL; -+ } -+} -+ - #define skb_queue_walk(queue, skb) \ - for (skb = (queue)->next, prefetch(skb->next); \ - (skb != (struct sk_buff *)(queue)); \ -diff -ur linux-2.6.5-7.252/include/net/tcp.h linux-2.6.5-7.252-tcp/include/net/tcp.h ---- linux-2.6.5-7.252/include/net/tcp.h 2006-10-11 21:46:38.000000000 +0300 -+++ linux-2.6.5-7.252-tcp/include/net/tcp.h 2006-10-11 21:52:56.000000000 +0300 -@@ -764,6 +764,8 @@ - extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size); - extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); -+extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size, -+ int flags, struct zccd *zccd); - - extern int tcp_ioctl(struct sock *sk, - int cmd, -diff -ur linux-2.6.5-7.252/net/core/dev.c linux-2.6.5-7.252-tcp/net/core/dev.c ---- linux-2.6.5-7.252/net/core/dev.c 2006-10-11 21:46:38.000000000 +0300 -+++ linux-2.6.5-7.252-tcp/net/core/dev.c 2006-10-11 21:52:56.000000000 +0300 -@@ -1322,6 +1322,8 @@ - ninfo->tso_segs = skb_shinfo(skb)->tso_segs; - ninfo->nr_frags = 0; - ninfo->frag_list = NULL; -+ ninfo->zccd1 = NULL; /* zero copy completion callback */ -+ ninfo->zccd2 = NULL; /* not required */ - - /* Offset between the two in bytes */ - offset = data - skb->head; -diff -ur linux-2.6.5-7.252/net/core/skbuff.c linux-2.6.5-7.252-tcp/net/core/skbuff.c ---- linux-2.6.5-7.252/net/core/skbuff.c 2006-10-11 21:46:38.000000000 +0300 -+++ linux-2.6.5-7.252-tcp/net/core/skbuff.c 2006-10-11 22:06:31.000000000 +0300 -@@ -152,6 +152,8 @@ - skb_shinfo(skb)->tso_size = 0; - skb_shinfo(skb)->tso_segs = 0; - skb_shinfo(skb)->frag_list = NULL; -+ skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */ -+ skb_shinfo(skb)->zccd2 = NULL; /* not required (yet) */ - out: - return skb; - nodata: -@@ -186,6 +188,9 @@ - { - if (!skb->cloned || - atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { -+ /* complete zero-copy callbacks (if any) */ -+ skb_complete_zccd(skb); -+ - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) -@@ -456,7 +461,29 @@ - skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); - } -+ -+ /* Transfer zero-copy callback descriptors */ -+ BUG_TRAP(skb_shinfo(n)->zccd1 == NULL); -+ skb_shinfo(n)->zccd1 = skb_shinfo(skb)->zccd1; -+ skb_shinfo(skb)->zccd1 = NULL; -+ -+ BUG_TRAP(skb_shinfo(n)->zccd2 == NULL); -+ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2; -+ skb_shinfo(skb)->zccd2 = NULL; -+ - skb_shinfo(n)->nr_frags = i; -+ -+ if (skb_shinfo(skb)->zccd1 != NULL) { -+ BUG_TRAP(skb_shinfo(n)->zccd1 == NULL); -+ skb_shinfo(n)->zccd1 = skb_shinfo(skb)->zccd1; -+ zccd_incref(skb_shinfo(n)->zccd1); -+ } -+ -+ if (skb_shinfo(skb)->zccd2 != NULL) { -+ BUG_TRAP(skb_shinfo(n)->zccd2 == NULL); -+ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2; -+ zccd_incref(skb_shinfo(n)->zccd2); -+ } - } - skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size; - skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs; -@@ -508,6 +535,13 @@ - memcpy(data + nhead, skb->head, skb->tail - skb->head); - memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); - -+ /* zero-copy descriptors have been copied into the new shinfo - -+ * account the new references */ -+ if (skb_shinfo(skb)->zccd1 != NULL) -+ zccd_incref(skb_shinfo(skb)->zccd1); -+ if (skb_shinfo(skb)->zccd2 != NULL) -+ zccd_incref(skb_shinfo(skb)->zccd2); -+ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); - -@@ -671,6 +705,9 @@ - offset = end; - } - -+ if (skb_shinfo(skb)->nr_frags == 0) /* dropped all the pages */ -+ skb_complete_zccd(skb); /* drop zccd refs */ -+ - if (offset < len) { - skb->data_len -= skb->len - len; - skb->len = len; -@@ -823,6 +860,9 @@ - } - skb_shinfo(skb)->nr_frags = k; - -+ if (k == 0) /* dropped all the pages */ -+ skb_complete_zccd(skb); /* drop zccd refs */ -+ - skb->tail += delta; - skb->data_len -= delta; - -diff -ur linux-2.6.5-7.252/net/ipv4/tcp.c linux-2.6.5-7.252-tcp/net/ipv4/tcp.c ---- linux-2.6.5-7.252/net/ipv4/tcp.c 2006-10-11 21:46:38.000000000 +0300 -+++ linux-2.6.5-7.252-tcp/net/ipv4/tcp.c 2006-10-11 23:15:24.000000000 +0300 -@@ -799,7 +799,7 @@ - } - - ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, -- size_t psize, int flags); -+ size_t psize, int flags, struct zccd *zccd); - - static inline int can_coalesce(struct sk_buff *skb, int i, struct page *page, - int off) -@@ -881,8 +881,9 @@ - return err; - } - -+/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */ - ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, -- size_t psize, int flags) -+ size_t psize, int flags, struct zccd *zccd) - { - struct tcp_opt *tp = tcp_sk(sk); - int mss_now; -@@ -929,6 +930,16 @@ - copy = size; - - i = skb_shinfo(skb)->nr_frags; -+ -+ if (zccd != NULL && /* completion callback wanted */ -+ skb_shinfo(skb)->zccd1 != NULL && /* no room for zccd */ -+ skb_shinfo(skb)->zccd2 != NULL && -+ skb_shinfo(skb)->zccd1 != zccd && /* room needed */ -+ skb_shinfo(skb)->zccd2 != zccd) { -+ tcp_mark_push (tp, skb); -+ goto new_segment; -+ } -+ - if (can_coalesce(skb, i, page, offset)) { - skb_shinfo(skb)->frags[i - 1].size += copy; - } else if (i < MAX_SKB_FRAGS) { -@@ -939,6 +950,18 @@ - goto new_segment; - } - -+ if (zccd != NULL && /* completion callback wanted */ -+ skb_shinfo(skb)->zccd1 != zccd && /* new to this skbuf */ -+ skb_shinfo(skb)->zccd2 != zccd) { -+ if (skb_shinfo(skb)->zccd1 == NULL) { -+ skb_shinfo(skb)->zccd1 = zccd; -+ } else { -+ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL); -+ skb_shinfo(skb)->zccd2 = zccd; -+ } -+ zccd_incref(zccd); /* new reference */ -+ } -+ - skb->len += copy; - skb->data_len += copy; - skb->ip_summed = CHECKSUM_HW; -@@ -987,8 +1010,8 @@ - return tcp_error(sk, flags, err); - } - --ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, -- size_t size, int flags) -+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, -+ size_t size, int flags, struct zccd *zccd) - { - ssize_t res; - struct sock *sk = sock->sk; -@@ -1003,12 +1026,19 @@ - - lock_sock(sk); - TCP_CHECK_TIMER(sk); -- res = do_tcp_sendpages(sk, &page, offset, size, flags); -+ res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd); - TCP_CHECK_TIMER(sk); - release_sock(sk); - return res; - } - -+ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, -+ size_t size, int flags) -+{ -+ return tcp_sendpage_zccd(sock, page, offset, size, flags, NULL); -+} -+ -+ - #define TCP_PAGE(sk) (inet_sk(sk)->sndmsg_page) - #define TCP_OFF(sk) (inet_sk(sk)->sndmsg_off) - -@@ -2872,6 +2902,7 @@ - EXPORT_SYMBOL(tcp_recvmsg); - EXPORT_SYMBOL(tcp_sendmsg); - EXPORT_SYMBOL(tcp_sendpage); -+EXPORT_SYMBOL(tcp_sendpage_zccd); - EXPORT_SYMBOL(tcp_setsockopt); - EXPORT_SYMBOL(tcp_shutdown); - EXPORT_SYMBOL(tcp_sockets_allocated); -diff -ur linux-2.6.5-7.252/net/ipv4/tcp_output.c linux-2.6.5-7.252-tcp/net/ipv4/tcp_output.c ---- linux-2.6.5-7.252/net/ipv4/tcp_output.c 2006-10-11 21:46:38.000000000 +0300 -+++ linux-2.6.5-7.252-tcp/net/ipv4/tcp_output.c 2006-10-11 22:14:04.000000000 +0300 -@@ -411,6 +411,30 @@ - pos += size; - } - skb_shinfo(skb1)->nr_frags = k; -+ -+ if (k != 0) { -+ /* skb1 has pages. Transfer or clone the zccds */ -+ -+ if (skb_shinfo(skb)->zccd1 != NULL) { -+ BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL); -+ skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1; -+ -+ if (skb_shinfo(skb)->nr_frags == 0) -+ skb_shinfo(skb)->zccd1 = NULL; -+ else -+ zccd_incref(skb_shinfo(skb)->zccd1); -+ } -+ -+ if (skb_shinfo(skb)->zccd2 != NULL) { -+ BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL); -+ skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2; -+ -+ if (skb_shinfo(skb)->nr_frags == 0) -+ skb_shinfo(skb)->zccd2 = NULL; -+ else -+ zccd_incref(skb_shinfo(skb)->zccd2); -+ } -+ } - } - } - -@@ -505,6 +529,9 @@ - } - skb_shinfo(skb)->nr_frags = k; - -+ if (k == 0) /* dropped all pages */ -+ skb_complete_zccd(skb); -+ - skb->tail = skb->data; - skb->data_len -= len; - skb->len = skb->data_len; diff --git a/lustre/kernel_patches/patches/new-tcp-zero-copy-2.6.9-41.2chaos.patch b/lustre/kernel_patches/patches/new-tcp-zero-copy-2.6.9-41.2chaos.patch deleted file mode 100644 index 8782730..0000000 --- a/lustre/kernel_patches/patches/new-tcp-zero-copy-2.6.9-41.2chaos.patch +++ /dev/null @@ -1,318 +0,0 @@ ---- linux/./include/net/tcp.h 2006-10-10 01:49:23.000000000 +0100 -+++ ../2.6.9-41.2chaos/linux/./include/net/tcp.h 2006-09-21 17:15:21.000000000 +0100 -@@ -787,6 +787,8 @@ extern int tcp_v4_tw_remember_stam - extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size); - extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); -+extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size, -+ int flags, struct zccd *zccd); - - extern int tcp_ioctl(struct sock *sk, - int cmd, ---- linux/./include/linux/skbuff.h 2006-10-10 01:49:23.000000000 +0100 -+++ ../2.6.9-41.2chaos/linux/./include/linux/skbuff.h 2006-10-06 18:09:35.000000000 +0100 -@@ -134,6 +134,36 @@ struct skb_frag_struct { - __u16 size; - }; - -+/* Zero Copy Callback Descriptor -+ * This struct supports receiving notification when zero-copy network I/O has -+ * completed. The ZCCD can be embedded in a struct containing the state of a -+ * zero-copy network send. Every skbuff that references that send's pages also -+ * keeps a reference on the ZCCD. When they have all been disposed of, the -+ * reference count on the ZCCD drops to zero and the callback is made, telling -+ * the original caller that the pages may now be overwritten. */ -+struct zccd -+{ -+ atomic_t zccd_refcount; -+ void (*zccd_callback)(struct zccd *); -+}; -+ -+static inline void zccd_init (struct zccd *d, void (*callback)(struct zccd *)) -+{ -+ atomic_set (&d->zccd_refcount, 1); -+ d->zccd_callback = callback; -+} -+ -+static inline void zccd_incref (struct zccd *d) /* take a reference */ -+{ -+ atomic_inc (&d->zccd_refcount); -+} -+ -+static inline void zccd_decref (struct zccd *d) /* release a reference */ -+{ -+ if (atomic_dec_and_test (&d->zccd_refcount)) -+ (d->zccd_callback)(d); -+} -+ - /* This data is invariant across clones and lives at - * the end of the header data, ie. at skb->end. - */ -@@ -143,6 +173,11 @@ struct skb_shared_info { - unsigned short tso_size; - unsigned short tso_segs; - struct sk_buff *frag_list; -+ struct zccd *zccd1; -+ struct zccd *zccd2; -+ /* NB zero-copy data is normally whole pages. We have 2 zccds in an -+ * skbuff so we don't unneccessarily split the packet where pages fall -+ * into the same packet. */ - skb_frag_t frags[MAX_SKB_FRAGS]; - }; - -@@ -1070,6 +1105,23 @@ static inline void kunmap_skb_frag(void - #endif - } - -+/* This skbuf has dropped its pages: drop refs on any zero-copy callback -+ * descriptors it has. */ -+static inline void skb_complete_zccd (struct sk_buff *skb) -+{ -+ struct skb_shared_info *info = skb_shinfo(skb); -+ -+ if (info->zccd1 != NULL) { -+ zccd_decref(info->zccd1); -+ info->zccd1 = NULL; -+ } -+ -+ if (info->zccd2 != NULL) { -+ zccd_decref(info->zccd2); -+ info->zccd2 = NULL; -+ } -+} -+ - #define skb_queue_walk(queue, skb) \ - for (skb = (queue)->next, prefetch(skb->next); \ - (skb != (struct sk_buff *)(queue)); \ ---- linux/./net/core/dev.c 2006-10-10 01:49:23.000000000 +0100 -+++ ../2.6.9-41.2chaos/linux/./net/core/dev.c 2006-09-21 16:53:45.000000000 +0100 -@@ -1140,6 +1140,8 @@ int __skb_linearize(struct sk_buff *skb, - ninfo->tso_segs = skb_shinfo(skb)->tso_segs; - ninfo->nr_frags = 0; - ninfo->frag_list = NULL; -+ ninfo->zccd1 = NULL; /* zero copy completion callback */ -+ ninfo->zccd2 = NULL; /* not required */ - - /* Offset between the two in bytes */ - offset = data - skb->head; ---- linux/./net/core/skbuff.c 2006-10-10 01:49:23.000000000 +0100 -+++ ../2.6.9-41.2chaos/linux/./net/core/skbuff.c 2006-10-10 01:46:16.000000000 +0100 -@@ -155,6 +155,8 @@ struct sk_buff *alloc_skb(unsigned int s - skb_shinfo(skb)->tso_size = 0; - skb_shinfo(skb)->tso_segs = 0; - skb_shinfo(skb)->frag_list = NULL; -+ skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */ -+ skb_shinfo(skb)->zccd2 = NULL; /* not required (yet) */ - out: - return skb; - nodata: -@@ -189,6 +191,9 @@ void skb_release_data(struct sk_buff *sk - { - if (!skb->cloned || - atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { -+ /* complete zero-copy callbacks (if any) */ -+ skb_complete_zccd(skb); -+ - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) -@@ -484,6 +489,18 @@ struct sk_buff *pskb_copy(struct sk_buff - get_page(skb_shinfo(n)->frags[i].page); - } - skb_shinfo(n)->nr_frags = i; -+ -+ if (skb_shinfo(skb)->zccd1 != NULL) { -+ BUG_TRAP(skb_shinfo(n)->zccd1 == NULL); -+ skb_shinfo(n)->zccd1 = skb_shinfo(skb)->zccd1; -+ zccd_incref(skb_shinfo(n)->zccd1); -+ } -+ -+ if (skb_shinfo(skb)->zccd2 != NULL) { -+ BUG_TRAP(skb_shinfo(n)->zccd2 == NULL); -+ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2; -+ zccd_incref(skb_shinfo(n)->zccd2); -+ } - } - - if (skb_shinfo(skb)->frag_list) { -@@ -533,6 +550,13 @@ int pskb_expand_head(struct sk_buff *skb - memcpy(data + nhead, skb->head, skb->tail - skb->head); - memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); - -+ /* zero-copy descriptors have been copied into the new shinfo - -+ * account the new references */ -+ if (skb_shinfo(skb)->zccd1 != NULL) -+ zccd_incref(skb_shinfo(skb)->zccd1); -+ if (skb_shinfo(skb)->zccd2 != NULL) -+ zccd_incref(skb_shinfo(skb)->zccd2); -+ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); - -@@ -694,6 +718,9 @@ int ___pskb_trim(struct sk_buff *skb, un - offset = end; - } - -+ if (skb_shinfo(skb)->nr_frags == 0) /* dropped all the pages */ -+ skb_complete_zccd(skb); /* drop zccd refs */ -+ - if (offset < len) { - skb->data_len -= skb->len - len; - skb->len = len; -@@ -846,6 +873,9 @@ pull_pages: - } - skb_shinfo(skb)->nr_frags = k; - -+ if (k == 0) /* dropped all the pages */ -+ skb_complete_zccd(skb); /* drop zccd refs */ -+ - skb->tail += delta; - skb->data_len -= delta; - -@@ -1362,6 +1392,15 @@ static void inline skb_split_inside_head - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; - -+ /* Transfer zero-copy callback descriptors */ -+ BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL); -+ skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1; -+ skb_shinfo(skb)->zccd1 = NULL; -+ -+ BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL); -+ skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2; -+ skb_shinfo(skb)->zccd2 = NULL; -+ - skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; - skb_shinfo(skb)->nr_frags = 0; - skb1->data_len = skb->data_len; -@@ -1410,6 +1449,30 @@ static void inline skb_split_no_header(s - pos += size; - } - skb_shinfo(skb1)->nr_frags = k; -+ -+ if (k != 0) { -+ /* skb1 has pages. Transfer or clone the zccds */ -+ -+ if (skb_shinfo(skb)->zccd1 != NULL) { -+ BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL); -+ skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1; -+ -+ if (skb_shinfo(skb)->nr_frags == 0) -+ skb_shinfo(skb)->zccd1 = NULL; -+ else -+ zccd_incref(skb_shinfo(skb)->zccd1); -+ } -+ -+ if (skb_shinfo(skb)->zccd2 != NULL) { -+ BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL); -+ skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2; -+ -+ if (skb_shinfo(skb)->nr_frags == 0) -+ skb_shinfo(skb)->zccd2 = NULL; -+ else -+ zccd_incref(skb_shinfo(skb)->zccd2); -+ } -+ } - } - - /** ---- linux/./net/ipv4/tcp_output.c 2006-09-21 00:13:11.000000000 +0100 -+++ ../2.6.9-41.2chaos/linux/./net/ipv4/tcp_output.c 2006-09-21 18:24:26.000000000 +0100 -@@ -562,6 +562,9 @@ static unsigned char *__pskb_trim_head(s - } - skb_shinfo(skb)->nr_frags = k; - -+ if (k == 0) /* dropped all pages */ -+ skb_complete_zccd(skb); -+ - skb->tail = skb->data; - skb->data_len -= len; - skb->len = skb->data_len; ---- linux/./net/ipv4/tcp.c 2006-10-10 01:49:23.000000000 +0100 -+++ ../2.6.9-41.2chaos/linux/./net/ipv4/tcp.c 2006-10-09 19:03:15.000000000 +0100 -@@ -628,8 +628,9 @@ static inline void tcp_push(struct sock - } - } - -+/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */ - static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, -- size_t psize, int flags) -+ size_t psize, int flags, struct zccd *zccd) - { - struct tcp_opt *tp = tcp_sk(sk); - int mss_now; -@@ -676,6 +677,16 @@ new_segment: - copy = size; - - i = skb_shinfo(skb)->nr_frags; -+ -+ if (zccd != NULL && /* completion callback wanted */ -+ skb_shinfo(skb)->zccd1 != NULL && /* no room for zccd */ -+ skb_shinfo(skb)->zccd2 != NULL && -+ skb_shinfo(skb)->zccd1 != zccd && /* room needed */ -+ skb_shinfo(skb)->zccd2 != zccd) { -+ tcp_mark_push (tp, skb); -+ goto new_segment; -+ } -+ - can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= MAX_SKB_FRAGS) { - tcp_mark_push(tp, skb); -@@ -692,6 +703,18 @@ new_segment: - skb_fill_page_desc(skb, i, page, offset, copy); - } - -+ if (zccd != NULL && /* completion callback wanted */ -+ skb_shinfo(skb)->zccd1 != zccd && /* new to this skbuf */ -+ skb_shinfo(skb)->zccd2 != zccd) { -+ if (skb_shinfo(skb)->zccd1 == NULL) { -+ skb_shinfo(skb)->zccd1 = zccd; -+ } else { -+ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL); -+ skb_shinfo(skb)->zccd2 = zccd; -+ } -+ zccd_incref(zccd); /* new reference */ -+ } -+ - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; -@@ -744,8 +767,8 @@ out_err: - return sk_stream_error(sk, flags, err); - } - --ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, -- size_t size, int flags) -+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, -+ size_t size, int flags, struct zccd *zccd) - { - ssize_t res; - struct sock *sk = sock->sk; -@@ -760,12 +783,18 @@ ssize_t tcp_sendpage(struct socket *sock - - lock_sock(sk); - TCP_CHECK_TIMER(sk); -- res = do_tcp_sendpages(sk, &page, offset, size, flags); -+ res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd); - TCP_CHECK_TIMER(sk); - release_sock(sk); - return res; - } - -+ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, -+ size_t size, int flags) -+{ -+ return tcp_sendpage_zccd(sock, page, offset, size, flags, NULL); -+} -+ - #define TCP_PAGE(sk) (sk->sk_sndmsg_page) - #define TCP_OFF(sk) (sk->sk_sndmsg_off) - -@@ -2343,6 +2372,7 @@ EXPORT_SYMBOL(tcp_read_sock); - EXPORT_SYMBOL(tcp_recvmsg); - EXPORT_SYMBOL(tcp_sendmsg); - EXPORT_SYMBOL(tcp_sendpage); -+EXPORT_SYMBOL(tcp_sendpage_zccd); - EXPORT_SYMBOL(tcp_setsockopt); - EXPORT_SYMBOL(tcp_shutdown); - EXPORT_SYMBOL(tcp_statistics); diff --git a/lustre/kernel_patches/series/2.6-rhel4-titech.series b/lustre/kernel_patches/series/2.6-rhel4-titech.series index dcd971d..9d76232 100644 --- a/lustre/kernel_patches/series/2.6-rhel4-titech.series +++ b/lustre/kernel_patches/series/2.6-rhel4-titech.series @@ -32,4 +32,3 @@ bitops_ext2_find_next_le_bit-2.6.patch quota-deadlock-on-pagelock-core.patch quota-umount-race-fix.patch quota-deadlock-on-pagelock-ext3.patch -new-tcp-zero-copy-2.6.9-41.2chaos.patch diff --git a/lustre/kernel_patches/series/2.6-rhel4.series b/lustre/kernel_patches/series/2.6-rhel4.series index cbd7cd6..34e9411 100644 --- a/lustre/kernel_patches/series/2.6-rhel4.series +++ b/lustre/kernel_patches/series/2.6-rhel4.series @@ -31,4 +31,3 @@ bitops_ext2_find_next_le_bit-2.6.patch quota-deadlock-on-pagelock-core.patch quota-umount-race-fix.patch quota-deadlock-on-pagelock-ext3.patch -new-tcp-zero-copy-2.6.9-41.2chaos.patch diff --git a/lustre/kernel_patches/series/2.6-suse-newer.series b/lustre/kernel_patches/series/2.6-suse-newer.series index e2565c3..c284949 100644 --- a/lustre/kernel_patches/series/2.6-suse-newer.series +++ b/lustre/kernel_patches/series/2.6-suse-newer.series @@ -10,6 +10,5 @@ dcache-qstr-api-fix-2.6-suse.patch iallocsem_consistency.patch jbd-stats-2.6.5.patch bitops_ext2_find_next_le_bit-2.6.patch -new-tcp-zero-copy-2.6.5-sles9.patch 2.6.5-quotafix.patch vfs_intent-reduce-stack-usage-2.6-suse-newer.patch diff --git a/lustre/kernel_patches/series/rhel-2.4.21 b/lustre/kernel_patches/series/rhel-2.4.21 index a2e1c29..981707d 100644 --- a/lustre/kernel_patches/series/rhel-2.4.21 +++ b/lustre/kernel_patches/series/rhel-2.4.21 @@ -51,5 +51,4 @@ nfsd_iallocsem.patch linux-2.4.24-jbd-handle-EIO-rhel3.patch ext3-lookup-dotdot-2.4.20.patch ext3-sector_t-overflow-2.4.patch -new-tcp-zero-copy-2.4.21-rhel3.patch nfs_export_kernel-getattr_on_lookup-2.4.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.29 b/lustre/kernel_patches/series/vanilla-2.4.29 index ecbbe10..88983ad 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.29 +++ b/lustre/kernel_patches/series/vanilla-2.4.29 @@ -17,7 +17,6 @@ ext3-san-2.4.20.patch ext3-map_inode_page.patch ext3-error-export.patch iopen-2.4.20.patch -new-tcp-zero-copy-2.4.29-vanilla.patch jbd-dont-account-blocks-twice.patch jbd-commit-tricks.patch ext3-no-write-super-chaos.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.29-uml b/lustre/kernel_patches/series/vanilla-2.4.29-uml index 399234e..9740148 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.29-uml +++ b/lustre/kernel_patches/series/vanilla-2.4.29-uml @@ -21,7 +21,6 @@ ext3-san-2.4.20.patch ext3-map_inode_page.patch ext3-error-export.patch iopen-2.4.20.patch -new-tcp-zero-copy-2.4.29-vanilla.patch jbd-dont-account-blocks-twice.patch jbd-commit-tricks.patch ext3-no-write-super-chaos.patch