1 --- linux-2.4.29-orig/include/linux/skbuff.h 2006-10-10 01:25:07.000000000 +0100
2 +++ linux-2.4.29/include/linux/skbuff.h 2006-10-10 00:42:59.000000000 +0100
3 @@ -116,6 +116,36 @@ struct skb_frag_struct
7 +/* Zero Copy Callback Descriptor
8 + * This struct supports receiving notification when zero-copy network I/O has
9 + * completed. The ZCCD can be embedded in a struct containing the state of a
10 + * zero-copy network send. Every skbuff that references that send's pages also
11 + * keeps a reference on the ZCCD. When they have all been disposed of, the
12 + * reference count on the ZCCD drops to zero and the callback is made, telling
13 + * the original caller that the pages may now be overwritten. */
16 + atomic_t zccd_refcount;
17 + void (*zccd_callback)(struct zccd *);
20 +static inline void zccd_init (struct zccd *d, void (*callback)(struct zccd *))
22 + atomic_set (&d->zccd_refcount, 1);
23 + d->zccd_callback = callback;
26 +static inline void zccd_incref (struct zccd *d) /* take a reference */
28 + atomic_inc (&d->zccd_refcount);
31 +static inline void zccd_decref (struct zccd *d) /* release a reference */
33 + if (atomic_dec_and_test (&d->zccd_refcount))
34 + (d->zccd_callback)(d);
37 /* This data is invariant across clones and lives at
38 * the end of the header data, ie. at skb->end.
40 @@ -123,6 +153,11 @@ struct skb_shared_info {
42 unsigned int nr_frags;
43 struct sk_buff *frag_list;
46 + /* NB zero-copy data is normally whole pages. We have 2 zccds in an
47 + * skbuff so we don't unneccessarily split the packet where pages fall
48 + * into the same packet. */
49 skb_frag_t frags[MAX_SKB_FRAGS];
52 @@ -1131,6 +1166,23 @@ static inline void kunmap_skb_frag(void
56 +/* This skbuf has dropped its pages: drop refs on any zero-copy callback
57 + * descriptors it has. */
58 +static inline void skb_complete_zccd (struct sk_buff *skb)
60 + struct skb_shared_info *info = skb_shinfo(skb);
62 + if (info->zccd1 != NULL) {
63 + zccd_decref(info->zccd1);
67 + if (info->zccd2 != NULL) {
68 + zccd_decref(info->zccd2);
73 #define skb_queue_walk(queue, skb) \
74 for (skb = (queue)->next; \
75 (skb != (struct sk_buff *)(queue)); \
76 --- linux-2.4.29-orig/include/net/tcp.h 2006-10-10 01:25:07.000000000 +0100
77 +++ linux-2.4.29/include/net/tcp.h 2006-10-10 00:43:26.000000000 +0100
78 @@ -674,6 +674,8 @@ extern int tcp_v4_tw_remember_stam
80 extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
81 extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
82 +extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
83 + int flags, struct zccd *zccd);
85 extern int tcp_ioctl(struct sock *sk,
87 --- linux-2.4.29-orig/net/core/skbuff.c 2006-10-10 01:25:08.000000000 +0100
88 +++ linux-2.4.29/net/core/skbuff.c 2006-10-10 02:03:49.000000000 +0100
89 @@ -208,6 +208,9 @@ struct sk_buff *alloc_skb(unsigned int s
90 atomic_set(&(skb_shinfo(skb)->dataref), 1);
91 skb_shinfo(skb)->nr_frags = 0;
92 skb_shinfo(skb)->frag_list = NULL;
93 + skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */
94 + skb_shinfo(skb)->zccd2 = NULL; /* not required (yet) */
99 @@ -277,6 +280,9 @@ static void skb_release_data(struct sk_b
102 atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
103 + /* complete zero-copy callbacks (if any) */
104 + skb_complete_zccd(skb);
106 if (skb_shinfo(skb)->nr_frags) {
108 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
109 @@ -535,6 +541,8 @@ int skb_linearize(struct sk_buff *skb, i
110 atomic_set(&(skb_shinfo(skb)->dataref), 1);
111 skb_shinfo(skb)->nr_frags = 0;
112 skb_shinfo(skb)->frag_list = NULL;
113 + skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */
114 + skb_shinfo(skb)->zccd2 = NULL; /* not required */
116 /* We are no longer a clone, even if we were. */
118 @@ -589,6 +597,18 @@ struct sk_buff *pskb_copy(struct sk_buff
119 get_page(skb_shinfo(n)->frags[i].page);
121 skb_shinfo(n)->nr_frags = i;
123 + if (skb_shinfo(skb)->zccd1 != NULL) {
124 + BUG_TRAP(skb_shinfo(n)->zccd1 = NULL);
125 + skb_shinfo(n)->zccd1 = skb_shinfo(skb)->zccd1;
126 + zccd_incref(skb_shinfo(n)->zccd1);
129 + if (skb_shinfo(skb)->zccd2 != NULL) {
130 + BUG_TRAP(skb_shinfo(n)->zccd2 = NULL);
131 + skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
132 + zccd_incref(skb_shinfo(n)->zccd2);
136 if (skb_shinfo(skb)->frag_list) {
137 @@ -638,6 +658,13 @@ int pskb_expand_head(struct sk_buff *skb
138 memcpy(data+nhead, skb->head, skb->tail-skb->head);
139 memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
141 + /* zero-copy descriptors have been copied into the new shinfo -
142 + * account the new references */
143 + if (skb_shinfo(skb)->zccd1 != NULL)
144 + zccd_incref(skb_shinfo(skb)->zccd1);
145 + if (skb_shinfo(skb)->zccd2 != NULL)
146 + zccd_incref(skb_shinfo(skb)->zccd2);
148 for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
149 get_page(skb_shinfo(skb)->frags[i].page);
151 @@ -794,6 +821,9 @@ int ___pskb_trim(struct sk_buff *skb, un
155 + if (skb_shinfo(skb)->nr_frags == 0) /* dropped all the pages */
156 + skb_complete_zccd(skb); /* drop zccd refs */
159 skb->data_len -= skb->len - len;
161 @@ -947,6 +977,9 @@ pull_pages:
163 skb_shinfo(skb)->nr_frags = k;
165 + if (k == 0) /* dropped all the pages */
166 + skb_complete_zccd(skb); /* drop zccd refs */
169 skb->data_len -= delta;
171 --- linux-2.4.29-orig/net/ipv4/tcp.c 2006-10-10 01:25:08.000000000 +0100
172 +++ linux-2.4.29/net/ipv4/tcp.c 2006-10-09 20:53:28.000000000 +0100
173 @@ -749,7 +749,8 @@ do_interrupted:
177 -ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags);
178 +ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags,
179 + struct zccd *zccd);
182 can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
183 @@ -828,7 +829,8 @@ static int tcp_error(struct sock *sk, in
187 -ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags)
188 +ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags,
191 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
193 @@ -876,6 +878,17 @@ new_segment:
196 i = skb_shinfo(skb)->nr_frags;
198 + if (zccd != NULL && /* this is a zcc I/O */
199 + skb_shinfo(skb)->zccd1 != NULL && /* skb is part of a zcc I/O */
200 + skb_shinfo(skb)->zccd2 != NULL &&
201 + skb_shinfo(skb)->zccd1 != zccd && /* not the same one */
202 + skb_shinfo(skb)->zccd2 != zccd)
204 + tcp_mark_push (tp, skb);
208 if (can_coalesce(skb, i, page, offset)) {
209 skb_shinfo(skb)->frags[i-1].size += copy;
210 } else if (i < MAX_SKB_FRAGS) {
211 @@ -886,6 +899,18 @@ new_segment:
215 + if (zccd != NULL && /* completion callback wanted */
216 + skb_shinfo(skb)->zccd1 != zccd && /* new to this skbuf */
217 + skb_shinfo(skb)->zccd2 != zccd) {
218 + if (skb_shinfo(skb)->zccd1 == NULL) {
219 + skb_shinfo(skb)->zccd1 = zccd;
221 + BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
222 + skb_shinfo(skb)->zccd2 = zccd;
224 + zccd_incref(zccd); /* new reference */
228 skb->data_len += copy;
229 skb->ip_summed = CHECKSUM_HW;
230 @@ -934,7 +959,8 @@ out_err:
231 return tcp_error(sk, flags, err);
234 -ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
235 +ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset,
236 + size_t size, int flags, struct zccd *zccd)
239 struct sock *sk = sock->sk;
240 @@ -949,12 +975,17 @@ ssize_t tcp_sendpage(struct socket *sock
244 - res = do_tcp_sendpages(sk, &page, offset, size, flags);
245 + res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
251 +ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
253 + return tcp_sendpage_zccd(sock, page, offset, size, flags, NULL);
256 #define TCP_PAGE(sk) (sk->tp_pinfo.af_tcp.sndmsg_page)
257 #define TCP_OFF(sk) (sk->tp_pinfo.af_tcp.sndmsg_off)
259 --- linux-2.4.29-orig/net/ipv4/tcp_output.c 2004-11-17 11:54:22.000000000 +0000
260 +++ linux-2.4.29/net/ipv4/tcp_output.c 2006-10-10 01:55:29.000000000 +0100
261 @@ -379,6 +379,15 @@ static void skb_split(struct sk_buff *sk
262 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
263 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
265 + /* Transfer zero-copy callback descriptors */
266 + BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL);
267 + skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1;
268 + skb_shinfo(skb)->zccd1 = NULL;
270 + BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL);
271 + skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2;
272 + skb_shinfo(skb)->zccd2 = NULL;
274 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
275 skb_shinfo(skb)->nr_frags = 0;
277 @@ -425,6 +434,30 @@ static void skb_split(struct sk_buff *sk
280 skb_shinfo(skb1)->nr_frags = k;
283 + /* skb1 has pages. Transfer or clone the zccds */
285 + if (skb_shinfo(skb)->zccd1 != NULL) {
286 + BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL);
287 + skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1;
289 + if (skb_shinfo(skb)->nr_frags == 0)
290 + skb_shinfo(skb)->zccd1 = NULL;
292 + zccd_incref(skb_shinfo(skb)->zccd1);
295 + if (skb_shinfo(skb)->zccd2 != NULL) {
296 + BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL);
297 + skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2;
299 + if (skb_shinfo(skb)->nr_frags == 0)
300 + skb_shinfo(skb)->zccd2 = NULL;
302 + zccd_incref(skb_shinfo(skb)->zccd2);
308 --- linux-2.4.29.orig/net/netsyms.c 2003-11-03 23:22:13.000000000 +0300
309 +++ linux-2.4.29/net/netsyms.c 2003-12-04 20:42:50.000000000 +0300
314 +EXPORT_SYMBOL(tcp_sendpage_zccd);
315 EXPORT_SYMBOL(tcp_read_sock);
317 EXPORT_SYMBOL(netlink_set_err);