1 diff -uNr linux-2.4.21-32.0.1.EL/include/linux/skbuff.h linux-2.4.21-32.0.1.EL-newzc/include/linux/skbuff.h
2 --- linux-2.4.21-32.0.1.EL/include/linux/skbuff.h 2005-06-02 05:51:57.000000000 +0300
3 +++ linux-2.4.21-32.0.1.EL-newzc/include/linux/skbuff.h 2006-10-23 23:54:31.000000000 +0300
8 +/* Zero Copy Callback Descriptor
9 + * This struct supports receiving notification when zero-copy network I/O has
10 + * completed. The ZCCD can be embedded in a struct containing the state of a
11 + * zero-copy network send. Every skbuff that references that send's pages also
12 + * keeps a reference on the ZCCD. When they have all been disposed of, the
13 + * reference count on the ZCCD drops to zero and the callback is made, telling
14 + * the original caller that the pages may now be overwritten. */
17 + atomic_t zccd_refcount;
18 + void (*zccd_callback)(struct zccd *);
21 +static inline void zccd_init (struct zccd *d, void (*callback)(struct zccd *))
23 + atomic_set (&d->zccd_refcount, 1);
24 + d->zccd_callback = callback;
27 +static inline void zccd_incref (struct zccd *d) /* take a reference */
29 + atomic_inc (&d->zccd_refcount);
32 +static inline void zccd_decref (struct zccd *d) /* release a reference */
34 + if (atomic_dec_and_test (&d->zccd_refcount))
35 + (d->zccd_callback)(d);
38 /* This data is invariant across clones and lives at
39 * the end of the header data, ie. at skb->end.
43 unsigned int nr_frags;
44 struct sk_buff *frag_list;
47 + /* NB zero-copy data is normally whole pages. We have 2 zccds in an
48 + * skbuff so we don't unneccessarily split the packet where pages fall
49 + * into the same packet. */
50 skb_frag_t frags[MAX_SKB_FRAGS];
53 @@ -1153,6 +1188,23 @@
57 +/* This skbuf has dropped its pages: drop refs on any zero-copy callback
58 + * descriptors it has. */
59 +static inline void skb_complete_zccd (struct sk_buff *skb)
61 + struct skb_shared_info *info = skb_shinfo(skb);
63 + if (info->zccd1 != NULL) {
64 + zccd_decref(info->zccd1);
68 + if (info->zccd2 != NULL) {
69 + zccd_decref(info->zccd2);
74 #define skb_queue_walk(queue, skb) \
75 for (skb = (queue)->next; \
76 (skb != (struct sk_buff *)(queue)); \
77 diff -uNr linux-2.4.21-32.0.1.EL/include/net/tcp.h linux-2.4.21-32.0.1.EL-newzc/include/net/tcp.h
78 --- linux-2.4.21-32.0.1.EL/include/net/tcp.h 2005-06-02 05:51:51.000000000 +0300
79 +++ linux-2.4.21-32.0.1.EL-newzc/include/net/tcp.h 2006-10-23 23:54:37.000000000 +0300
82 extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
83 extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
84 +extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
85 + int flags, struct zccd *zccd);
87 extern int tcp_ioctl(struct sock *sk,
89 diff -uNr linux-2.4.21-32.0.1.EL/net/core/skbuff.c linux-2.4.21-32.0.1.EL-newzc/net/core/skbuff.c
90 --- linux-2.4.21-32.0.1.EL/net/core/skbuff.c 2005-06-02 05:51:57.000000000 +0300
91 +++ linux-2.4.21-32.0.1.EL-newzc/net/core/skbuff.c 2006-10-23 23:44:10.000000000 +0300
93 atomic_set(&(skb_shinfo(skb)->dataref), 1);
94 skb_shinfo(skb)->nr_frags = 0;
95 skb_shinfo(skb)->frag_list = NULL;
96 + skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */
97 + skb_shinfo(skb)->zccd2 = NULL; /* not required (yet) */
105 atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
106 + /* complete zero-copy callbacks (if any) */
107 + skb_complete_zccd(skb);
109 if (skb_shinfo(skb)->nr_frags) {
111 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
113 atomic_set(&(skb_shinfo(skb)->dataref), 1);
114 skb_shinfo(skb)->nr_frags = 0;
115 skb_shinfo(skb)->frag_list = NULL;
116 + skb_shinfo(skb)->zccd1 = NULL; /* zero-copy completion callback */
117 + skb_shinfo(skb)->zccd2 = NULL; /* not required */
119 /* We are no longer a clone, even if we were. */
122 get_page(skb_shinfo(n)->frags[i].page);
124 skb_shinfo(n)->nr_frags = i;
126 + if (skb_shinfo(skb)->zccd1 != NULL) {
127 + BUG_TRAP(skb_shinfo(n)->zccd1 = NULL);
128 + skb_shinfo(n)->zccd1 = skb_shinfo(skb)->zccd1;
129 + zccd_incref(skb_shinfo(n)->zccd1);
132 + if (skb_shinfo(skb)->zccd2 != NULL) {
133 + BUG_TRAP(skb_shinfo(n)->zccd2 = NULL);
134 + skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
135 + zccd_incref(skb_shinfo(n)->zccd2);
139 if (skb_shinfo(skb)->frag_list) {
141 memcpy(data+nhead, skb->head, skb->tail-skb->head);
142 memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
144 + /* zero-copy descriptors have been copied into the new shinfo -
145 + * account the new references */
146 + if (skb_shinfo(skb)->zccd1 != NULL)
147 + zccd_incref(skb_shinfo(skb)->zccd1);
148 + if (skb_shinfo(skb)->zccd2 != NULL)
149 + zccd_incref(skb_shinfo(skb)->zccd2);
151 for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
152 get_page(skb_shinfo(skb)->frags[i].page);
158 + if (skb_shinfo(skb)->nr_frags == 0) /* dropped all the pages */
159 + skb_complete_zccd(skb); /* drop zccd refs */
162 skb->data_len -= skb->len - len;
166 skb_shinfo(skb)->nr_frags = k;
168 + if (k == 0) /* dropped all the pages */
169 + skb_complete_zccd(skb); /* drop zccd refs */
172 skb->data_len -= delta;
174 diff -uNr linux-2.4.21-32.0.1.EL/net/ipv4/tcp.c linux-2.4.21-32.0.1.EL-newzc/net/ipv4/tcp.c
175 --- linux-2.4.21-32.0.1.EL/net/ipv4/tcp.c 2005-06-02 05:51:51.000000000 +0300
176 +++ linux-2.4.21-32.0.1.EL-newzc/net/ipv4/tcp.c 2006-10-24 00:10:34.000000000 +0300
177 @@ -1015,7 +1015,7 @@
181 -ssize_t do_tcp_sendpages(struct sock *sk, struct kveclet *let, int poffset, size_t psize, int flags);
182 +ssize_t do_tcp_sendpages(struct sock *sk, struct kveclet *let, int poffset, size_t psize, int flags, struct zccd *zccd);
185 can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
186 @@ -1094,7 +1094,7 @@
190 -ssize_t do_tcp_sendpages(struct sock *sk, struct kveclet *let, int poffset, size_t psize, int flags)
191 +ssize_t do_tcp_sendpages(struct sock *sk, struct kveclet *let, int poffset, size_t psize, int flags, struct zccd *zccd)
193 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
195 @@ -1147,6 +1147,17 @@
198 i = skb_shinfo(skb)->nr_frags;
200 + if (zccd != NULL && /* this is a zcc I/O */
201 + skb_shinfo(skb)->zccd1 != NULL && /* skb is part of a zcc I/O */
202 + skb_shinfo(skb)->zccd2 != NULL &&
203 + skb_shinfo(skb)->zccd1 != zccd && /* not the same one */
204 + skb_shinfo(skb)->zccd2 != zccd)
206 + tcp_mark_push (tp, skb);
210 if (can_coalesce(skb, i, page, offset)) {
211 skb_shinfo(skb)->frags[i-1].size += copy;
212 } else if (i < MAX_SKB_FRAGS) {
213 @@ -1157,6 +1168,18 @@
217 + if (zccd != NULL && /* completion callback wanted */
218 + skb_shinfo(skb)->zccd1 != zccd && /* new to this skbuf */
219 + skb_shinfo(skb)->zccd2 != zccd) {
220 + if (skb_shinfo(skb)->zccd1 == NULL) {
221 + skb_shinfo(skb)->zccd1 = zccd;
223 + BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
224 + skb_shinfo(skb)->zccd2 = zccd;
226 + zccd_incref(zccd); /* new reference */
230 skb->data_len += copy;
231 skb->ip_summed = CHECKSUM_HW;
232 @@ -1209,7 +1232,8 @@
233 return tcp_error(sk, flags, err);
236 -ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
237 +ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset,
238 + size_t size, int flags, struct zccd *zccd)
240 struct kveclet let = { page, offset, size };
242 @@ -1224,12 +1248,18 @@
246 - res = do_tcp_sendpages(sk, &let, 0, size, flags);
247 + res = do_tcp_sendpages(sk, &let, 0, size, flags, zccd);
253 +ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
255 + return tcp_sendpage_zccd(sock, page, offset, size, flags, NULL);
259 static void tcp_kvec_write_worker(struct tcp_write_async_info *info)
261 struct sock *sk = info->sk;
262 @@ -1238,7 +1268,7 @@
263 !(sk->route_caps & TCP_ZC_CSUM_FLAGS))
266 - res = do_tcp_sendpages(sk, info->cur_let, info->offset, info->len - info->done, MSG_DONTWAIT);
267 + res = do_tcp_sendpages(sk, info->cur_let, info->offset, info->len - info->done, MSG_DONTWAIT, NULL);
271 diff -uNr linux-2.4.21-32.0.1.EL/net/ipv4/tcp_output.c linux-2.4.21-32.0.1.EL-newzc/net/ipv4/tcp_output.c
272 --- linux-2.4.21-32.0.1.EL/net/ipv4/tcp_output.c 2005-06-02 05:51:50.000000000 +0300
273 +++ linux-2.4.21-32.0.1.EL-newzc/net/ipv4/tcp_output.c 2006-10-23 23:44:10.000000000 +0300
275 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
276 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
278 + /* Transfer zero-copy callback descriptors */
279 + BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL);
280 + skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1;
281 + skb_shinfo(skb)->zccd1 = NULL;
283 + BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL);
284 + skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2;
285 + skb_shinfo(skb)->zccd2 = NULL;
287 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
288 skb_shinfo(skb)->nr_frags = 0;
293 skb_shinfo(skb1)->nr_frags = k;
296 + /* skb1 has pages. Transfer or clone the zccds */
298 + if (skb_shinfo(skb)->zccd1 != NULL) {
299 + BUG_TRAP(skb_shinfo(skb1)->zccd1 == NULL);
300 + skb_shinfo(skb1)->zccd1 = skb_shinfo(skb)->zccd1;
302 + if (skb_shinfo(skb)->nr_frags == 0)
303 + skb_shinfo(skb)->zccd1 = NULL;
305 + zccd_incref(skb_shinfo(skb)->zccd1);
308 + if (skb_shinfo(skb)->zccd2 != NULL) {
309 + BUG_TRAP(skb_shinfo(skb1)->zccd2 == NULL);
310 + skb_shinfo(skb1)->zccd2 = skb_shinfo(skb)->zccd2;
312 + if (skb_shinfo(skb)->nr_frags == 0)
313 + skb_shinfo(skb)->zccd2 = NULL;
315 + zccd_incref(skb_shinfo(skb)->zccd2);
321 --- linux-2.4.21-32.0.1.EL/net/netsyms.c 2005-06-02 05:51:50.000000000 +0300
322 +++ linux-2.4.21-32.0.1.EL-newzc/net/netsyms.c 2006-10-23 23:44:10.000000000 +0300
324 EXPORT_SYMBOL(ip_generic_getfrag);
327 +EXPORT_SYMBOL(tcp_sendpage_zccd);
328 EXPORT_SYMBOL(tcp_read_sock);
330 EXPORT_SYMBOL(netlink_set_err);