1 diff -u -r1.1.1.1 linux/include/linux/skbuff.h
2 --- linux/include/linux/skbuff.h 2 Aug 2002 10:59:25 -0000 1.1.1.1
3 +++ linux/include/linux/skbuff.h 2 Aug 2002 14:20:00 -0000
8 +/* Support for callback when skb data has been released */
9 +typedef struct zccd /* Zero Copy Callback Descriptor */
10 +{ /* (embed as first member of custom struct) */
11 + atomic_t zccd_count; /* reference count */
12 + void (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
15 +static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
17 + atomic_set (&d->zccd_count, 1);
18 + d->zccd_destructor = callback;
21 +static inline void zccd_get (zccd_t *d) /* take a reference */
23 + atomic_inc (&d->zccd_count);
26 +static inline void zccd_put (zccd_t *d) /* release a reference */
28 + if (atomic_dec_and_test (&d->zccd_count))
29 + (d->zccd_destructor)(d);
32 /* This data is invariant across clones and lives at
33 * the end of the header data, ie. at skb->end.
37 unsigned int nr_frags;
38 struct sk_buff *frag_list;
39 + zccd_t *zccd; /* zero copy descriptor */
40 + zccd_t *zccd2; /* 2nd zero copy descriptor */
41 + /* NB we expect zero-copy data to be at least 1 packet, so
42 + * having 2 zccds means we don't unneccessarily split the packet
43 + * where consecutive zero-copy sends abutt.
45 skb_frag_t frags[MAX_SKB_FRAGS];
48 diff -u -r1.1.1.1 linux/include/net/tcp.h
49 --- linux/include/net/tcp.h 2 Aug 2002 10:59:29 -0000 1.1.1.1
50 +++ linux/include/net/tcp.h 2 Aug 2002 14:03:49 -0000
53 extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
54 extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
55 +extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
56 + int flags, zccd_t *zccd);
58 extern int tcp_ioctl(struct sock *sk,
62 int len, int nonblock,
63 int flags, int *addr_len);
64 +extern int tcp_recvpackets(struct sock *sk,
65 + struct sk_buff_head *packets,
66 + int len, int nonblock);
68 extern int tcp_listen_start(struct sock *sk);
70 diff -u -r1.1.1.1 linux/net/netsyms.c
71 --- linux/net/netsyms.c 2 Aug 2002 10:59:31 -0000 1.1.1.1
72 +++ linux/net/netsyms.c 2 Aug 2002 14:21:31 -0000
74 EXPORT_SYMBOL(sysctl_tcp_ecn);
75 EXPORT_SYMBOL(tcp_cwnd_application_limited);
76 EXPORT_SYMBOL(tcp_sendpage);
77 +EXPORT_SYMBOL(tcp_sendpage_zccd);
78 +EXPORT_SYMBOL(tcp_recvpackets);
80 EXPORT_SYMBOL(tcp_write_xmit);
82 diff -u -r1.1.1.1 linux/net/core/skbuff.c
83 --- linux/net/core/skbuff.c 2 Aug 2002 10:59:32 -0000 1.1.1.1
84 +++ linux/net/core/skbuff.c 2 Aug 2002 14:07:13 -0000
86 atomic_set(&(skb_shinfo(skb)->dataref), 1);
87 skb_shinfo(skb)->nr_frags = 0;
88 skb_shinfo(skb)->frag_list = NULL;
89 + skb_shinfo(skb)->zccd = NULL; /* skbuffs kick off with NO user zero copy descriptors */
90 + skb_shinfo(skb)->zccd2 = NULL;
97 atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
98 + if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
99 + zccd_put (skb_shinfo(skb)->zccd); /* release hold */
100 + if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
101 + zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
102 if (skb_shinfo(skb)->nr_frags) {
104 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
106 atomic_set(&(skb_shinfo(skb)->dataref), 1);
107 skb_shinfo(skb)->nr_frags = 0;
108 skb_shinfo(skb)->frag_list = NULL;
109 + skb_shinfo(skb)->zccd = NULL; /* copied data => no user zero copy descriptor */
110 + skb_shinfo(skb)->zccd2 = NULL;
112 /* We are no longer a clone, even if we were. */
116 n->data_len = skb->data_len;
119 + if (skb_shinfo(skb)->zccd != NULL) /* user zero copy descriptor? */
120 + zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
121 + skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
123 + if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd user zero copy descriptor? */
124 + zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
125 + skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
127 if (skb_shinfo(skb)->nr_frags) {
131 int size = nhead + (skb->end - skb->head) + ntail;
133 + zccd_t *zccd = skb_shinfo(skb)->zccd; /* stash user zero copy descriptor */
134 + zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
139 if (skb_shinfo(skb)->frag_list)
140 skb_clone_fraglist(skb);
142 + if (zccd != NULL) /* user zero copy descriptor? */
143 + zccd_get (zccd); /* extra ref (pages are shared) */
144 + if (zccd2 != NULL) /* 2nd user zero copy descriptor? */
145 + zccd_get (zccd2); /* extra ref (pages are shared) */
147 skb_release_data(skb);
149 off = (data+nhead) - skb->head;
153 atomic_set(&skb_shinfo(skb)->dataref, 1);
154 + skb_shinfo(skb)->zccd = zccd;
155 + skb_shinfo(skb)->zccd2 = zccd2;
159 diff -u -r1.1.1.1 linux/net/ipv4/tcp.c
160 --- linux/net/ipv4/tcp.c 2 Aug 2002 10:59:34 -0000 1.1.1.1
161 +++ linux/net/ipv4/tcp.c 2 Aug 2002 14:36:30 -0000
166 -ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags);
167 +ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd);
170 can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
175 -ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags)
176 +/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
177 +ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd)
179 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
184 i = skb_shinfo(skb)->nr_frags;
186 + if (zccd != NULL && /* this is a zcc I/O */
187 + skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
188 + skb_shinfo(skb)->zccd2 != NULL &&
189 + skb_shinfo(skb)->zccd != zccd && /* not the same one */
190 + skb_shinfo(skb)->zccd2 != zccd)
192 + tcp_mark_push (tp, skb);
196 if (can_coalesce(skb, i, page, offset)) {
197 skb_shinfo(skb)->frags[i-1].size += copy;
198 } else if (i < MAX_SKB_FRAGS) {
200 tcp_mark_push(tp, skb);
204 + if (zccd != NULL && /* this is a zcc I/O */
205 + skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
206 + skb_shinfo(skb)->zccd2 != zccd)
208 + zccd_get (zccd); /* bump ref count */
210 + BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
212 + if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
213 + skb_shinfo(skb)->zccd = zccd;
215 + skb_shinfo(skb)->zccd2 = zccd;
219 skb->data_len += copy;
224 - res = do_tcp_sendpages(sk, &page, offset, size, flags);
225 + res = do_tcp_sendpages(sk, &page, offset, size, flags, NULL);
226 + TCP_CHECK_TIMER(sk);
231 +ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
232 + int flags, zccd_t *zccd)
235 + struct sock *sk = sock->sk;
237 +#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
239 + if (!(sk->route_caps & NETIF_F_SG) || /* caller shouldn't waste her time */
240 + !(sk->route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
243 +#undef TCP_ZC_CSUM_FLAGS
246 + TCP_CHECK_TIMER(sk);
248 + res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
253 @@ -1767,6 +1817,202 @@
255 err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len);
259 +int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
260 + int len, int nonblock)
262 + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
266 + BUG_TRAP (len > 0);
267 + /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
271 + TCP_CHECK_TIMER(sk);
273 + copied = -ENOTCONN;
274 + if (sk->state == TCP_LISTEN)
278 + timeo = sock_rcvtimeo(sk, nonblock);
281 + struct sk_buff * skb;
283 + unsigned long used;
287 + /* Are we at urgent data? Stop if we have read anything. */
288 + if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
291 + /* We need to check signals first, to get correct SIGURG
292 + * handling. FIXME: Need to check this doesnt impact 1003.1g
293 + * and move it down to the bottom of the loop
295 + if (signal_pending(current)) {
298 + copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
302 + /* Next get a buffer. */
304 + skb = skb_peek(&sk->receive_queue);
306 + if (skb == NULL) /* nothing ready */
310 + sk->state == TCP_CLOSE ||
311 + (sk->shutdown & RCV_SHUTDOWN) ||
320 + copied = sock_error(sk);
324 + if (sk->shutdown & RCV_SHUTDOWN)
327 + if (sk->state == TCP_CLOSE) {
329 + /* This occurs when user tries to read
330 + * from never connected socket.
332 + copied = -ENOTCONN;
344 + cleanup_rbuf(sk, copied);
345 + timeo = tcp_data_wait(sk, timeo);
349 + BUG_TRAP (atomic_read (&skb->users) == 1);
351 + exhausted = eaten = 0;
353 + offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
354 + if (skb->h.th->syn)
357 + used = skb->len - offset;
359 + if (tp->urg_data) {
360 + u32 urg_offset = tp->urg_seq - tp->copied_seq;
361 + if (urg_offset < used) {
362 + if (!urg_offset) { /* at urgent date */
363 + if (!sk->urginline) {
364 + tp->copied_seq++; /* discard the single byte of urgent data */
368 + } else /* truncate read */
373 + BUG_TRAP (used >= 0);
381 + if (skb_is_nonlinear (skb))
383 + int rc = skb_linearize (skb, GFP_KERNEL);
385 + printk ("tcp_recvpackets(): linearising: %d\n", rc);
395 + if ((offset + used) == skb->len) /* consuming the whole packet */
397 + __skb_unlink (skb, &sk->receive_queue);
398 + dst_release (skb->dst);
400 + __skb_pull (skb, offset);
401 + __skb_queue_tail (packets, skb);
402 + exhausted = eaten = 1;
404 + else /* consuming only part of the packet */
406 + struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
415 + dst_release (skb2->dst);
416 + __skb_pull (skb2, offset);
417 + __skb_trim (skb2, used);
418 + __skb_queue_tail (packets, skb2);
421 + tp->copied_seq += used;
426 + if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
428 + tcp_fast_path_check(sk, tp);
434 + if (skb->h.th->fin)
438 + tcp_eat_skb (sk, skb);
443 + tcp_eat_skb (sk, skb);
448 + /* Clean up data we have read: This will do ACK frames. */
449 + cleanup_rbuf(sk, copied);
450 + TCP_CHECK_TIMER(sk);