1 Index: linux-2.6.16.i686/net/core/dev.c
2 ===================================================================
3 --- linux-2.6.16.i686.orig/net/core/dev.c 2006-05-30 15:47:10.000000000 +0800
4 +++ linux-2.6.16.i686/net/core/dev.c 2006-05-30 21:24:07.000000000 +0800
6 ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
8 ninfo->frag_list = NULL;
9 + ninfo->zccd = NULL; /* copied data => no user zero copy descriptor */
10 + ninfo->zccd2 = NULL;
13 /* Offset between the two in bytes */
14 offset = data - skb->head;
15 Index: linux-2.6.16.i686/net/core/skbuff.c
16 ===================================================================
17 --- linux-2.6.16.i686.orig/net/core/skbuff.c 2006-05-30 15:47:12.000000000 +0800
18 +++ linux-2.6.16.i686/net/core/skbuff.c 2006-05-30 21:26:35.000000000 +0800
21 shinfo->ip6_frag_id = 0;
22 shinfo->frag_list = NULL;
24 + shinfo->zccd = NULL; /* skbuffs kick off with NO user zero copy descriptors */
25 + shinfo->zccd2 = NULL;
27 struct sk_buff *child = skb + 1;
28 atomic_t *fclone_ref = (atomic_t *) (child + 1);
31 shinfo->ip6_frag_id = 0;
32 shinfo->frag_list = NULL;
34 + shinfo->zccd = NULL; /* skbuffs kick off with NO user zero copy descriptors */
35 + shinfo->zccd2 = NULL;
38 struct sk_buff *child = skb + 1;
39 atomic_t *fclone_ref = (atomic_t *) (child + 1);
42 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
43 &skb_shinfo(skb)->dataref)) {
44 + if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
45 + zccd_put (skb_shinfo(skb)->zccd); /* release hold */
46 + if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
47 + zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
48 if (skb_shinfo(skb)->nr_frags) {
50 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
52 n->data_len = skb->data_len;
55 + if (skb_shinfo(skb)->zccd != NULL) /* user zero copy descriptor? */
56 + zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
57 + skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
59 + if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd user zero copy descriptor? */
60 + zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
61 + skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
63 if (skb_shinfo(skb)->nr_frags) {
68 int size = nhead + (skb->end - skb->head) + ntail;
70 + zccd_t *zccd = skb_shinfo(skb)->zccd; /* stash user zero copy descriptor */
71 + zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
77 if (skb_shinfo(skb)->frag_list)
78 skb_clone_fraglist(skb);
80 + if (zccd != NULL) /* user zero copy descriptor? */
81 + zccd_get (zccd); /* extra ref (pages are shared) */
82 + if (zccd2 != NULL) /* 2nd user zero copy descriptor? */
83 + zccd_get (zccd2); /* extra ref (pages are shared) */
85 skb_release_data(skb);
87 off = (data + nhead) - skb->head;
91 atomic_set(&skb_shinfo(skb)->dataref, 1);
92 + skb_shinfo(skb)->zccd = zccd;
93 + skb_shinfo(skb)->zccd2 = zccd2;
97 Index: linux-2.6.16.i686/net/ipv4/tcp.c
98 ===================================================================
99 --- linux-2.6.16.i686.orig/net/ipv4/tcp.c 2006-05-30 15:47:12.000000000 +0800
100 +++ linux-2.6.16.i686/net/ipv4/tcp.c 2006-05-30 21:24:07.000000000 +0800
105 +/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
106 static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
107 - size_t psize, int flags)
108 + size_t psize, int flags, zccd_t *zccd)
111 struct tcp_sock *tp = tcp_sk(sk);
112 int mss_now, size_goal;
116 i = skb_shinfo(skb)->nr_frags;
118 + if (zccd != NULL && /* this is a zcc I/O */
119 + skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
120 + skb_shinfo(skb)->zccd2 != NULL &&
121 + skb_shinfo(skb)->zccd != zccd && /* not the same one */
122 + skb_shinfo(skb)->zccd2 != zccd)
124 + tcp_mark_push (tp, skb);
128 can_coalesce = skb_can_coalesce(skb, i, page, offset);
129 if (!can_coalesce && i >= MAX_SKB_FRAGS) {
130 tcp_mark_push(tp, skb);
132 skb_fill_page_desc(skb, i, page, offset, copy);
135 + if (zccd != NULL && /* this is a zcc I/O */
136 + skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
137 + skb_shinfo(skb)->zccd2 != zccd)
139 + zccd_get (zccd); /* bump ref count */
141 + BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
143 + if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
144 + skb_shinfo(skb)->zccd = zccd;
146 + skb_shinfo(skb)->zccd2 = zccd;
150 skb->data_len += copy;
151 skb->truesize += copy;
152 @@ -631,12 +658,37 @@
156 - res = do_tcp_sendpages(sk, &page, offset, size, flags);
157 + res = do_tcp_sendpages(sk, &page, offset, size, flags,NULL);
158 + TCP_CHECK_TIMER(sk);
163 +ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
164 + int flags, zccd_t *zccd)
167 + struct sock *sk = sock->sk;
169 +#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
171 + if (!(sk->sk_route_caps & NETIF_F_SG) || /* caller shouldn't waste her time */
172 + !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
175 +#undef TCP_ZC_CSUM_FLAGS
178 + TCP_CHECK_TIMER(sk);
180 + res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
188 #define TCP_PAGE(sk) (sk->sk_sndmsg_page)
189 #define TCP_OFF(sk) (sk->sk_sndmsg_off)
191 @@ -1406,6 +1458,202 @@
195 +int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
196 + int len, int nonblock)
198 + struct tcp_sock *tp = tcp_sk(sk);
202 + BUG_TRAP (len > 0);
203 + /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
207 + TCP_CHECK_TIMER(sk);
209 + copied = -ENOTCONN;
210 + if (sk->sk_state == TCP_LISTEN)
214 + timeo = sock_rcvtimeo(sk, nonblock);
217 + struct sk_buff * skb;
219 + unsigned long used;
223 + /* Are we at urgent data? Stop if we have read anything. */
224 + if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
227 + /* We need to check signals first, to get correct SIGURG
228 + * handling. FIXME: Need to check this doesnt impact 1003.1g
229 + * and move it down to the bottom of the loop
231 + if (signal_pending(current)) {
234 + copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
238 + /* Next get a buffer. */
240 + skb = skb_peek(&sk->sk_receive_queue);
242 + if (skb == NULL) /* nothing ready */
246 + sk->sk_state == TCP_CLOSE ||
247 + (sk->sk_shutdown & RCV_SHUTDOWN) ||
252 + if (sock_flag(sk, SOCK_DONE))
256 + copied = sock_error(sk);
260 + if (sk->sk_shutdown & RCV_SHUTDOWN)
263 + if (sk->sk_state == TCP_CLOSE) {
264 + if (!(sock_flag(sk, SOCK_DONE))) {
265 + /* This occurs when user tries to read
266 + * from never connected socket.
268 + copied = -ENOTCONN;
280 + cleanup_rbuf(sk, copied);
281 + sk_wait_data(sk, &timeo);
285 + BUG_TRAP (atomic_read (&skb->users) == 1);
287 + exhausted = eaten = 0;
289 + offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
290 + if (skb->h.th->syn)
293 + used = skb->len - offset;
295 + if (tp->urg_data) {
296 + u32 urg_offset = tp->urg_seq - tp->copied_seq;
297 + if (urg_offset < used) {
298 + if (!urg_offset) { /* at urgent date */
299 + if (!(sock_flag(sk, SOCK_URGINLINE))) {
300 + tp->copied_seq++; /* discard the single byte of urgent data */
304 + } else /* truncate read */
309 + BUG_TRAP (used >= 0);
317 + if (skb_is_nonlinear (skb))
319 + int rc = skb_linearize (skb, GFP_KERNEL);
321 + printk ("tcp_recvpackets(): linearising: %d\n", rc);
331 + if ((offset + used) == skb->len) /* consuming the whole packet */
333 + __skb_unlink (skb, &sk->sk_receive_queue);
334 + dst_release (skb->dst);
336 + __skb_pull (skb, offset);
337 + __skb_queue_tail (packets, skb);
338 + exhausted = eaten = 1;
340 + else /* consuming only part of the packet */
342 + struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
351 + dst_release (skb2->dst);
352 + __skb_pull (skb2, offset);
353 + __skb_trim (skb2, used);
354 + __skb_queue_tail (packets, skb2);
357 + tp->copied_seq += used;
362 + if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
364 + tcp_fast_path_check(sk, tp);
370 + if (skb->h.th->fin)
374 + sk_eat_skb (sk, skb);
379 + sk_eat_skb (sk, skb);
384 + /* Clean up data we have read: This will do ACK frames. */
385 + cleanup_rbuf(sk, copied);
386 + TCP_CHECK_TIMER(sk);
392 * State processing on a close. This implements the state shift for
393 * sending our FIN frame. Note that we only send a FIN for some
394 @@ -2139,6 +2387,8 @@
395 EXPORT_SYMBOL(tcp_recvmsg);
396 EXPORT_SYMBOL(tcp_sendmsg);
397 EXPORT_SYMBOL(tcp_sendpage);
398 +EXPORT_SYMBOL(tcp_sendpage_zccd);
399 +EXPORT_SYMBOL(tcp_recvpackets);
400 EXPORT_SYMBOL(tcp_setsockopt);
401 EXPORT_SYMBOL(tcp_shutdown);
402 EXPORT_SYMBOL(tcp_statistics);
403 Index: linux-2.6.16.i686/include/linux/skbuff.h
404 ===================================================================
405 --- linux-2.6.16.i686.orig/include/linux/skbuff.h 2006-05-30 15:47:11.000000000 +0800
406 +++ linux-2.6.16.i686/include/linux/skbuff.h 2006-05-30 21:24:07.000000000 +0800
411 +/* Support for callback when skb data has been released */
412 +typedef struct zccd /* Zero Copy Callback Descriptor */
413 +{ /* (embed as first member of custom struct) */
414 + atomic_t zccd_count; /* reference count */
415 + void (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
418 +static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
420 + atomic_set (&d->zccd_count, 1);
421 + d->zccd_destructor = callback;
424 +static inline void zccd_get (zccd_t *d) /* take a reference */
426 + atomic_inc (&d->zccd_count);
429 +static inline void zccd_put (zccd_t *d) /* release a reference */
431 + if (atomic_dec_and_test (&d->zccd_count))
432 + (d->zccd_destructor)(d);
435 /* This data is invariant across clones and lives at
436 * the end of the header data, ie. at skb->end.
439 unsigned short ufo_size;
440 unsigned int ip6_frag_id;
441 struct sk_buff *frag_list;
442 + zccd_t *zccd; /* zero copy descriptor */
443 + zccd_t *zccd2; /* 2nd zero copy descriptor */
444 + /* NB we expect zero-copy data to be at least 1 packet, so
445 + * having 2 zccds means we don't unneccessarily split the packet
446 + * where consecutive zero-copy sends abutt.
449 skb_frag_t frags[MAX_SKB_FRAGS];
452 Index: linux-2.6.16.i686/include/net/tcp.h
453 ===================================================================
454 --- linux-2.6.16.i686.orig/include/net/tcp.h 2006-05-30 15:47:11.000000000 +0800
455 +++ linux-2.6.16.i686/include/net/tcp.h 2006-05-30 21:24:07.000000000 +0800
457 extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
458 struct msghdr *msg, size_t size);
459 extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
460 +extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
461 + int flags, zccd_t *zccd);
464 extern int tcp_ioctl(struct sock *sk,
468 size_t len, int nonblock,
469 int flags, int *addr_len);
470 +extern int tcp_recvpackets(struct sock *sk,
471 + struct sk_buff_head *packets,
472 + int len, int nonblock);
474 extern void tcp_parse_options(struct sk_buff *skb,
475 struct tcp_options_received *opt_rx,