Whamcloud - gitweb
Branch HEAD
authoradilger <adilger>
Thu, 8 Mar 2007 20:14:45 +0000 (20:14 +0000)
committeradilger <adilger>
Thu, 8 Mar 2007 20:14:45 +0000 (20:14 +0000)
Remove old, dangerous tcp zero copy patches.

lustre/kernel_patches/patches/tcp-zero-copy-2.6-fc5.patch [deleted file]
lustre/kernel_patches/patches/tcp-zero-copy-2.6.18-vanilla.patch [deleted file]
lustre/kernel_patches/series/2.6-fc5.series

diff --git a/lustre/kernel_patches/patches/tcp-zero-copy-2.6-fc5.patch b/lustre/kernel_patches/patches/tcp-zero-copy-2.6-fc5.patch
deleted file mode 100644 (file)
index 2183518..0000000
+++ /dev/null
@@ -1,475 +0,0 @@
-Index: linux-2.6.16.i686/net/core/dev.c
-===================================================================
---- linux-2.6.16.i686.orig/net/core/dev.c      2006-05-30 15:47:10.000000000 +0800
-+++ linux-2.6.16.i686/net/core/dev.c   2006-05-30 21:24:07.000000000 +0800
-@@ -1181,6 +1181,9 @@
-       ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
-       ninfo->nr_frags = 0;
-       ninfo->frag_list = NULL;
-+      ninfo->zccd = NULL;             /* copied data => no user zero copy descriptor */
-+      ninfo->zccd2 = NULL;
-+
-       /* Offset between the two in bytes */
-       offset = data - skb->head;
-Index: linux-2.6.16.i686/net/core/skbuff.c
-===================================================================
---- linux-2.6.16.i686.orig/net/core/skbuff.c   2006-05-30 15:47:12.000000000 +0800
-+++ linux-2.6.16.i686/net/core/skbuff.c        2006-05-30 21:26:35.000000000 +0800
-@@ -170,7 +170,8 @@
-       shinfo->ufo_size = 0;
-       shinfo->ip6_frag_id = 0;
-       shinfo->frag_list = NULL;
--
-+      shinfo->zccd = NULL;           /* skbuffs kick off with NO user zero copy descriptors */
-+      shinfo->zccd2 = NULL;
-       if (fclone) {
-               struct sk_buff *child = skb + 1;
-               atomic_t *fclone_ref = (atomic_t *) (child + 1);
-@@ -242,7 +243,9 @@
-       shinfo->ufo_size = 0;
-       shinfo->ip6_frag_id = 0;
-       shinfo->frag_list = NULL;
--
-+      shinfo->zccd = NULL;           /* skbuffs kick off with NO user zero copy descriptors */
-+      shinfo->zccd2 = NULL;
-+      
-       if (fclone) {
-               struct sk_buff *child = skb + 1;
-               atomic_t *fclone_ref = (atomic_t *) (child + 1);
-@@ -287,6 +290,10 @@
-       if (!skb->cloned ||
-           !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
-                              &skb_shinfo(skb)->dataref)) {
-+              if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
-+                      zccd_put (skb_shinfo(skb)->zccd); /* release hold */
-+              if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
-+                      zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
-               if (skb_shinfo(skb)->nr_frags) {
-                       int i;
-                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-@@ -606,6 +613,14 @@
-       n->data_len  = skb->data_len;
-       n->len       = skb->len;
-+      if (skb_shinfo(skb)->zccd != NULL)      /* user zero copy descriptor? */
-+              zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
-+      skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
-+
-+      if (skb_shinfo(skb)->zccd2 != NULL)     /* 2nd user zero copy descriptor? */
-+              zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
-+      skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
-+
-       if (skb_shinfo(skb)->nr_frags) {
-               int i;
-@@ -649,6 +664,9 @@
-       u8 *data;
-       int size = nhead + (skb->end - skb->head) + ntail;
-       long off;
-+      zccd_t *zccd = skb_shinfo(skb)->zccd;   /* stash user zero copy descriptor */
-+      zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
-+
-       if (skb_shared(skb))
-               BUG();
-@@ -670,6 +688,11 @@
-       if (skb_shinfo(skb)->frag_list)
-               skb_clone_fraglist(skb);
-+      if (zccd != NULL)                       /* user zero copy descriptor? */
-+              zccd_get (zccd);                /* extra ref (pages are shared) */
-+      if (zccd2 != NULL)                      /* 2nd user zero copy descriptor? */
-+              zccd_get (zccd2);               /* extra ref (pages are shared) */
-+
-       skb_release_data(skb);
-       off = (data + nhead) - skb->head;
-@@ -684,6 +707,8 @@
-       skb->cloned   = 0;
-       skb->nohdr    = 0;
-       atomic_set(&skb_shinfo(skb)->dataref, 1);
-+      skb_shinfo(skb)->zccd = zccd;
-+      skb_shinfo(skb)->zccd2 = zccd2;
-       return 0;
- nodata:
-Index: linux-2.6.16.i686/net/ipv4/tcp.c
-===================================================================
---- linux-2.6.16.i686.orig/net/ipv4/tcp.c      2006-05-30 15:47:12.000000000 +0800
-+++ linux-2.6.16.i686/net/ipv4/tcp.c   2006-05-30 21:24:07.000000000 +0800
-@@ -498,8 +498,10 @@
-       }
- }
-+/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
- static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
--                       size_t psize, int flags)
-+                              size_t psize, int flags, zccd_t *zccd)
-+
- {
-       struct tcp_sock *tp = tcp_sk(sk);
-       int mss_now, size_goal;
-@@ -547,6 +549,17 @@
-                       copy = size;
-               i = skb_shinfo(skb)->nr_frags;
-+
-+              if (zccd != NULL &&             /* this is a zcc I/O */
-+                              skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
-+                              skb_shinfo(skb)->zccd2 != NULL &&
-+                              skb_shinfo(skb)->zccd != zccd && /* not the same one */
-+                              skb_shinfo(skb)->zccd2 != zccd)
-+              {
-+                      tcp_mark_push (tp, skb);
-+                      goto new_segment;
-+              }
-+
-               can_coalesce = skb_can_coalesce(skb, i, page, offset);
-               if (!can_coalesce && i >= MAX_SKB_FRAGS) {
-                       tcp_mark_push(tp, skb);
-@@ -562,6 +575,20 @@
-                       skb_fill_page_desc(skb, i, page, offset, copy);
-               }
-+              if (zccd != NULL &&     /* this is a zcc I/O */
-+                      skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
-+                      skb_shinfo(skb)->zccd2 != zccd)
-+              {
-+                      zccd_get (zccd);        /* bump ref count */
-+
-+                      BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
-+
-+                      if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
-+                              skb_shinfo(skb)->zccd = zccd;
-+                      else
-+                              skb_shinfo(skb)->zccd2 = zccd;
-+              }
-+
-               skb->len += copy;
-               skb->data_len += copy;
-               skb->truesize += copy;
-@@ -631,12 +658,37 @@
-       lock_sock(sk);
-       TCP_CHECK_TIMER(sk);
--      res = do_tcp_sendpages(sk, &page, offset, size, flags);
-+      res = do_tcp_sendpages(sk, &page, offset, size, flags,NULL);
-+      TCP_CHECK_TIMER(sk);
-+      release_sock(sk);
-+      return res;
-+}
-+
-+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+                          int flags, zccd_t *zccd)
-+{
-+      ssize_t res;
-+      struct sock *sk = sock->sk;
-+
-+#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
-+
-+      if (!(sk->sk_route_caps & NETIF_F_SG) ||        /* caller shouldn't waste her time */
-+          !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
-+              BUG ();
-+
-+#undef TCP_ZC_CSUM_FLAGS
-+
-+      lock_sock(sk);
-+      TCP_CHECK_TIMER(sk);
-+
-+      res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
-+
-       TCP_CHECK_TIMER(sk);
-       release_sock(sk);
-       return res;
- }
-+
- #define TCP_PAGE(sk)  (sk->sk_sndmsg_page)
- #define TCP_OFF(sk)   (sk->sk_sndmsg_off)
-@@ -1406,6 +1458,202 @@
-       goto out;
- }
-+int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
-+                   int len, int nonblock)
-+{
-+      struct tcp_sock *tp = tcp_sk(sk);
-+      int copied;
-+      long timeo;
-+
-+      BUG_TRAP (len > 0);
-+      /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
-+
-+      lock_sock(sk);
-+
-+      TCP_CHECK_TIMER(sk);
-+
-+      copied = -ENOTCONN;
-+      if (sk->sk_state == TCP_LISTEN)
-+              goto out;
-+
-+      copied = 0;
-+      timeo = sock_rcvtimeo(sk, nonblock);
-+
-+      do {
-+              struct sk_buff * skb;
-+              u32 offset;
-+              unsigned long used;
-+              int exhausted;
-+              int eaten;
-+
-+              /* Are we at urgent data? Stop if we have read anything. */
-+              if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
-+                      break;
-+
-+              /* We need to check signals first, to get correct SIGURG
-+               * handling. FIXME: Need to check this doesnt impact 1003.1g
-+               * and move it down to the bottom of the loop
-+               */
-+              if (signal_pending(current)) {
-+                      if (copied)
-+                              break;
-+                      copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
-+                      break;
-+              }
-+
-+              /* Next get a buffer. */
-+
-+              skb = skb_peek(&sk->sk_receive_queue);
-+
-+              if (skb == NULL)                /* nothing ready */
-+              {
-+                      if (copied) {
-+                              if (sk->sk_err ||
-+                                  sk->sk_state == TCP_CLOSE ||
-+                                  (sk->sk_shutdown & RCV_SHUTDOWN) ||
-+                                  !timeo ||
-+                                  (0))
-+                                      break;
-+                      } else {
-+                              if (sock_flag(sk, SOCK_DONE))
-+                                      break;
-+
-+                              if (sk->sk_err) {
-+                                      copied = sock_error(sk);
-+                                      break;
-+                              }
-+
-+                              if (sk->sk_shutdown & RCV_SHUTDOWN)
-+                                      break;
-+
-+                              if (sk->sk_state == TCP_CLOSE) {
-+                                      if (!(sock_flag(sk, SOCK_DONE))) {
-+                                              /* This occurs when user tries to read
-+                                               * from never connected socket.
-+                                               */
-+                                              copied = -ENOTCONN;
-+                                              break;
-+                                      }
-+                                      break;
-+                              }
-+
-+                              if (!timeo) {
-+                                      copied = -EAGAIN;
-+                                      break;
-+                              }
-+                      }
-+
-+                      cleanup_rbuf(sk, copied);
-+                      sk_wait_data(sk, &timeo);
-+                      continue;
-+              }
-+
-+              BUG_TRAP (atomic_read (&skb->users) == 1);
-+
-+              exhausted = eaten = 0;
-+
-+              offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
-+              if (skb->h.th->syn)
-+                      offset--;
-+
-+              used = skb->len - offset;
-+
-+              if (tp->urg_data) {
-+                      u32 urg_offset = tp->urg_seq - tp->copied_seq;
-+                      if (urg_offset < used) {
-+                              if (!urg_offset) { /* at urgent date */
-+                                      if (!(sock_flag(sk, SOCK_URGINLINE))) {
-+                                              tp->copied_seq++; /* discard the single byte of urgent data */
-+                                              offset++;
-+                                              used--;
-+                                      }
-+                              } else          /* truncate read */
-+                                      used = urg_offset;
-+                      }
-+              }
-+
-+              BUG_TRAP (used >= 0);
-+              if (len < used)
-+                      used = len;
-+
-+              if (used == 0)
-+                      exhausted = 1;
-+              else
-+              {
-+                      if (skb_is_nonlinear (skb))
-+                      {
-+                              int   rc = skb_linearize (skb, GFP_KERNEL);
-+
-+                              printk ("tcp_recvpackets(): linearising: %d\n", rc);
-+
-+                              if (rc)
-+                              {
-+                                      if (!copied)
-+                                              copied = rc;
-+                                      break;
-+                              }
-+                      }
-+
-+                      if ((offset + used) == skb->len) /* consuming the whole packet */
-+                      {
-+                              __skb_unlink (skb, &sk->sk_receive_queue);
-+                              dst_release (skb->dst);
-+                              skb_orphan (skb);
-+                              __skb_pull (skb, offset);
-+                              __skb_queue_tail (packets, skb);
-+                              exhausted = eaten = 1;
-+                      }
-+                      else                    /* consuming only part of the packet */
-+                      {
-+                              struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
-+
-+                              if (skb2 == NULL)
-+                              {
-+                                      if (!copied)
-+                                              copied = -ENOMEM;
-+                                      break;
-+                              }
-+
-+                              dst_release (skb2->dst);
-+                              __skb_pull (skb2, offset);
-+                              __skb_trim (skb2, used);
-+                              __skb_queue_tail (packets, skb2);
-+                      }
-+
-+                      tp->copied_seq += used;
-+                      copied += used;
-+                      len -= used;
-+              }
-+
-+              if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
-+                      tp->urg_data = 0;
-+                      tcp_fast_path_check(sk, tp);
-+              }
-+
-+              if (!exhausted)
-+                      continue;
-+
-+              if (skb->h.th->fin)
-+              {
-+                      tp->copied_seq++;
-+                      if (!eaten)
-+                              sk_eat_skb (sk, skb);
-+                      break;
-+              }
-+
-+              if (!eaten)
-+                      sk_eat_skb (sk, skb);
-+
-+      } while (len > 0);
-+
-+ out:
-+      /* Clean up data we have read: This will do ACK frames. */
-+      cleanup_rbuf(sk, copied);
-+      TCP_CHECK_TIMER(sk);
-+      release_sock(sk);
-+      return copied;
-+}
-+
- /*
-  *    State processing on a close. This implements the state shift for
-  *    sending our FIN frame. Note that we only send a FIN for some
-@@ -2139,6 +2387,8 @@
- EXPORT_SYMBOL(tcp_recvmsg);
- EXPORT_SYMBOL(tcp_sendmsg);
- EXPORT_SYMBOL(tcp_sendpage);
-+EXPORT_SYMBOL(tcp_sendpage_zccd);
-+EXPORT_SYMBOL(tcp_recvpackets);
- EXPORT_SYMBOL(tcp_setsockopt);
- EXPORT_SYMBOL(tcp_shutdown);
- EXPORT_SYMBOL(tcp_statistics);
-Index: linux-2.6.16.i686/include/linux/skbuff.h
-===================================================================
---- linux-2.6.16.i686.orig/include/linux/skbuff.h      2006-05-30 15:47:11.000000000 +0800
-+++ linux-2.6.16.i686/include/linux/skbuff.h   2006-05-30 21:24:07.000000000 +0800
-@@ -128,6 +128,30 @@
-       __u16 size;
- };
-+/* Support for callback when skb data has been released */
-+typedef struct zccd                            /* Zero Copy Callback Descriptor */
-+{                                              /* (embed as first member of custom struct) */
-+      atomic_t        zccd_count;             /* reference count */
-+      void           (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
-+} zccd_t;
-+
-+static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
-+{
-+      atomic_set (&d->zccd_count, 1);
-+      d->zccd_destructor = callback;
-+}
-+
-+static inline void zccd_get (zccd_t *d)                /* take a reference */
-+{
-+      atomic_inc (&d->zccd_count);
-+}
-+
-+static inline void zccd_put (zccd_t *d)                /* release a reference */
-+{
-+      if (atomic_dec_and_test (&d->zccd_count))
-+              (d->zccd_destructor)(d);
-+}
-+
- /* This data is invariant across clones and lives at
-  * the end of the header data, ie. at skb->end.
-  */
-@@ -139,6 +163,13 @@
-       unsigned short  ufo_size;
-       unsigned int    ip6_frag_id;
-       struct sk_buff  *frag_list;
-+      zccd_t          *zccd;                  /* zero copy descriptor */
-+      zccd_t          *zccd2;                 /* 2nd zero copy descriptor */
-+      /* NB we expect zero-copy data to be at least 1 packet, so
-+      * having 2 zccds means we don't unneccessarily split the packet
-+      * where consecutive zero-copy sends abutt.
-+      */
-+
-       skb_frag_t      frags[MAX_SKB_FRAGS];
- };
-Index: linux-2.6.16.i686/include/net/tcp.h
-===================================================================
---- linux-2.6.16.i686.orig/include/net/tcp.h   2006-05-30 15:47:11.000000000 +0800
-+++ linux-2.6.16.i686/include/net/tcp.h        2006-05-30 21:24:07.000000000 +0800
-@@ -272,6 +272,9 @@
- extern int                    tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
-                                           struct msghdr *msg, size_t size);
- extern ssize_t                        tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
-+extern ssize_t                 tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+                                              int flags, zccd_t *zccd);
-+
- extern int                    tcp_ioctl(struct sock *sk, 
-                                         int cmd, 
-@@ -354,6 +357,9 @@
-                                           struct msghdr *msg,
-                                           size_t len, int nonblock, 
-                                           int flags, int *addr_len);
-+extern int                     tcp_recvpackets(struct sock *sk,
-+                                              struct sk_buff_head *packets,
-+                                              int len, int nonblock);
- extern void                   tcp_parse_options(struct sk_buff *skb,
-                                                 struct tcp_options_received *opt_rx,
diff --git a/lustre/kernel_patches/patches/tcp-zero-copy-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/tcp-zero-copy-2.6.18-vanilla.patch
deleted file mode 100644 (file)
index cb33b04..0000000
+++ /dev/null
@@ -1,450 +0,0 @@
-Index: linux-2.6/net/core/skbuff.c
-===================================================================
---- linux-2.6.orig/net/core/skbuff.c   2006-07-15 21:08:45.000000000 +0800
-+++ linux-2.6/net/core/skbuff.c        2006-07-15 21:12:21.000000000 +0800
-@@ -183,7 +183,8 @@ struct sk_buff *__alloc_skb(unsigned int
-       shinfo->gso_type = 0;
-       shinfo->ip6_frag_id = 0;
-       shinfo->frag_list = NULL;
--
-+      shinfo->zccd = NULL;           /* skbuffs kick off with NO user zero copy descriptors */
-+      shinfo->zccd2 = NULL;
-       if (fclone) {
-               struct sk_buff *child = skb + 1;
-               atomic_t *fclone_ref = (atomic_t *) (child + 1);
-@@ -283,6 +284,10 @@ static void skb_release_data(struct sk_b
-       if (!skb->cloned ||
-           !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
-                              &skb_shinfo(skb)->dataref)) {
-+              if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
-+                      zccd_put (skb_shinfo(skb)->zccd); /* release hold */
-+              if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
-+                      zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
-               if (skb_shinfo(skb)->nr_frags) {
-                       int i;
-                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-@@ -618,6 +623,14 @@ struct sk_buff *pskb_copy(struct sk_buff
-       n->data_len  = skb->data_len;
-       n->len       = skb->len;
-+      if (skb_shinfo(skb)->zccd != NULL)      /* user zero copy descriptor? */
-+              zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
-+      skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
-+
-+      if (skb_shinfo(skb)->zccd2 != NULL)     /* 2nd user zero copy descriptor? */
-+              zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
-+      skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
-+
-       if (skb_shinfo(skb)->nr_frags) {
-               int i;
-@@ -661,6 +674,9 @@ int pskb_expand_head(struct sk_buff *skb
-       u8 *data;
-       int size = nhead + (skb->end - skb->head) + ntail;
-       long off;
-+      zccd_t *zccd = skb_shinfo(skb)->zccd;   /* stash user zero copy descriptor */
-+      zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
-+
-       if (skb_shared(skb))
-               BUG();
-@@ -682,6 +698,11 @@ int pskb_expand_head(struct sk_buff *skb
-       if (skb_shinfo(skb)->frag_list)
-               skb_clone_fraglist(skb);
-+      if (zccd != NULL)                       /* user zero copy descriptor? */
-+              zccd_get (zccd);                /* extra ref (pages are shared) */
-+      if (zccd2 != NULL)                      /* 2nd user zero copy descriptor? */
-+              zccd_get (zccd2);               /* extra ref (pages are shared) */
-+
-       skb_release_data(skb);
-       off = (data + nhead) - skb->head;
-@@ -696,6 +717,8 @@ int pskb_expand_head(struct sk_buff *skb
-       skb->cloned   = 0;
-       skb->nohdr    = 0;
-       atomic_set(&skb_shinfo(skb)->dataref, 1);
-+      skb_shinfo(skb)->zccd = zccd;
-+      skb_shinfo(skb)->zccd2 = zccd2;
-       return 0;
- nodata:
-Index: linux-2.6/net/ipv4/tcp.c
-===================================================================
---- linux-2.6.orig/net/ipv4/tcp.c      2006-07-15 21:08:45.000000000 +0800
-+++ linux-2.6/net/ipv4/tcp.c   2006-07-15 22:32:12.000000000 +0800
-@@ -499,8 +499,10 @@ static inline void tcp_push(struct sock 
-       }
- }
-+/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
- static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
--                       size_t psize, int flags)
-+                              size_t psize, int flags, zccd_t *zccd)
-+
- {
-       struct tcp_sock *tp = tcp_sk(sk);
-       int mss_now, size_goal;
-@@ -548,6 +550,17 @@ new_segment:
-                       copy = size;
-               i = skb_shinfo(skb)->nr_frags;
-+
-+              if (zccd != NULL &&             /* this is a zcc I/O */
-+                              skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
-+                              skb_shinfo(skb)->zccd2 != NULL &&
-+                              skb_shinfo(skb)->zccd != zccd && /* not the same one */
-+                              skb_shinfo(skb)->zccd2 != zccd)
-+              {
-+                      tcp_mark_push (tp, skb);
-+                      goto new_segment;
-+              }
-+
-               can_coalesce = skb_can_coalesce(skb, i, page, offset);
-               if (!can_coalesce && i >= MAX_SKB_FRAGS) {
-                       tcp_mark_push(tp, skb);
-@@ -563,6 +576,20 @@ new_segment:
-                       skb_fill_page_desc(skb, i, page, offset, copy);
-               }
-+              if (zccd != NULL &&     /* this is a zcc I/O */
-+                      skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
-+                      skb_shinfo(skb)->zccd2 != zccd)
-+              {
-+                      zccd_get (zccd);        /* bump ref count */
-+
-+                      BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
-+
-+                      if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
-+                              skb_shinfo(skb)->zccd = zccd;
-+                      else
-+                              skb_shinfo(skb)->zccd2 = zccd;
-+              }
-+
-               skb->len += copy;
-               skb->data_len += copy;
-               skb->truesize += copy;
-@@ -628,12 +655,37 @@ ssize_t tcp_sendpage(struct socket *sock
-       lock_sock(sk);
-       TCP_CHECK_TIMER(sk);
--      res = do_tcp_sendpages(sk, &page, offset, size, flags);
-+      res = do_tcp_sendpages(sk, &page, offset, size, flags,NULL);
-+      TCP_CHECK_TIMER(sk);
-+      release_sock(sk);
-+      return res;
-+}
-+
-+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+                          int flags, zccd_t *zccd)
-+{
-+      ssize_t res;
-+      struct sock *sk = sock->sk;
-+
-+#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
-+
-+      if (!(sk->sk_route_caps & NETIF_F_SG) ||        /* caller shouldn't waste her time */
-+          !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
-+              BUG ();
-+
-+#undef TCP_ZC_CSUM_FLAGS
-+
-+      lock_sock(sk);
-+      TCP_CHECK_TIMER(sk);
-+
-+      res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
-+
-       TCP_CHECK_TIMER(sk);
-       release_sock(sk);
-       return res;
- }
-+
- #define TCP_PAGE(sk)  (sk->sk_sndmsg_page)
- #define TCP_OFF(sk)   (sk->sk_sndmsg_off)
-@@ -1477,6 +1529,202 @@ recv_urg:
-       goto out;
- }
-+int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
-+                   int len, int nonblock)
-+{
-+      struct tcp_sock *tp = tcp_sk(sk);
-+      int copied;
-+      long timeo;
-+
-+      BUG_TRAP (len > 0);
-+      /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
-+
-+      lock_sock(sk);
-+
-+      TCP_CHECK_TIMER(sk);
-+
-+      copied = -ENOTCONN;
-+      if (sk->sk_state == TCP_LISTEN)
-+              goto out;
-+
-+      copied = 0;
-+      timeo = sock_rcvtimeo(sk, nonblock);
-+
-+      do {
-+              struct sk_buff * skb;
-+              u32 offset;
-+              unsigned long used;
-+              int exhausted;
-+              int eaten;
-+
-+              /* Are we at urgent data? Stop if we have read anything. */
-+              if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
-+                      break;
-+
-+              /* We need to check signals first, to get correct SIGURG
-+               * handling. FIXME: Need to check this doesnt impact 1003.1g
-+               * and move it down to the bottom of the loop
-+               */
-+              if (signal_pending(current)) {
-+                      if (copied)
-+                              break;
-+                      copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
-+                      break;
-+              }
-+
-+              /* Next get a buffer. */
-+
-+              skb = skb_peek(&sk->sk_receive_queue);
-+
-+              if (skb == NULL)                /* nothing ready */
-+              {
-+                      if (copied) {
-+                              if (sk->sk_err ||
-+                                  sk->sk_state == TCP_CLOSE ||
-+                                  (sk->sk_shutdown & RCV_SHUTDOWN) ||
-+                                  !timeo ||
-+                                  (0))
-+                                      break;
-+                      } else {
-+                              if (sock_flag(sk, SOCK_DONE))
-+                                      break;
-+
-+                              if (sk->sk_err) {
-+                                      copied = sock_error(sk);
-+                                      break;
-+                              }
-+
-+                              if (sk->sk_shutdown & RCV_SHUTDOWN)
-+                                      break;
-+
-+                              if (sk->sk_state == TCP_CLOSE) {
-+                                      if (!(sock_flag(sk, SOCK_DONE))) {
-+                                              /* This occurs when user tries to read
-+                                               * from never connected socket.
-+                                               */
-+                                              copied = -ENOTCONN;
-+                                              break;
-+                                      }
-+                                      break;
-+                              }
-+
-+                              if (!timeo) {
-+                                      copied = -EAGAIN;
-+                                      break;
-+                              }
-+                      }
-+
-+                      tcp_cleanup_rbuf(sk, copied);
-+                      sk_wait_data(sk, &timeo);
-+                      continue;
-+              }
-+
-+              BUG_TRAP (atomic_read (&skb->users) == 1);
-+
-+              exhausted = eaten = 0;
-+
-+              offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
-+              if (skb->h.th->syn)
-+                      offset--;
-+
-+              used = skb->len - offset;
-+
-+              if (tp->urg_data) {
-+                      u32 urg_offset = tp->urg_seq - tp->copied_seq;
-+                      if (urg_offset < used) {
-+                              if (!urg_offset) { /* at urgent date */
-+                                      if (!(sock_flag(sk, SOCK_URGINLINE))) {
-+                                              tp->copied_seq++; /* discard the single byte of urgent data */
-+                                              offset++;
-+                                              used--;
-+                                      }
-+                              } else          /* truncate read */
-+                                      used = urg_offset;
-+                      }
-+              }
-+
-+              BUG_TRAP (used >= 0);
-+              if (len < used)
-+                      used = len;
-+
-+              if (used == 0)
-+                      exhausted = 1;
-+              else
-+              {
-+                      if (skb_is_nonlinear (skb))
-+                      {
-+                              int   rc = skb_linearize (skb);
-+
-+                              printk ("tcp_recvpackets(): linearising: %d\n", rc);
-+
-+                              if (rc)
-+                              {
-+                                      if (!copied)
-+                                              copied = rc;
-+                                      break;
-+                              }
-+                      }
-+
-+                      if ((offset + used) == skb->len) /* consuming the whole packet */
-+                      {
-+                              __skb_unlink (skb, &sk->sk_receive_queue);
-+                              dst_release (skb->dst);
-+                              skb_orphan (skb);
-+                              __skb_pull (skb, offset);
-+                              __skb_queue_tail (packets, skb);
-+                              exhausted = eaten = 1;
-+                      }
-+                      else                    /* consuming only part of the packet */
-+                      {
-+                              struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
-+
-+                              if (skb2 == NULL)
-+                              {
-+                                      if (!copied)
-+                                              copied = -ENOMEM;
-+                                      break;
-+                              }
-+
-+                              dst_release (skb2->dst);
-+                              __skb_pull (skb2, offset);
-+                              __skb_trim (skb2, used);
-+                              __skb_queue_tail (packets, skb2);
-+                      }
-+
-+                      tp->copied_seq += used;
-+                      copied += used;
-+                      len -= used;
-+              }
-+
-+              if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
-+                      tp->urg_data = 0;
-+                      tcp_fast_path_check(sk, tp);
-+              }
-+
-+              if (!exhausted)
-+                      continue;
-+
-+              if (skb->h.th->fin)
-+              {
-+                      tp->copied_seq++;
-+                      if (!eaten)
-+                              sk_eat_skb (sk, skb, 0);
-+                      break;
-+              }
-+
-+              if (!eaten)
-+                      sk_eat_skb (sk, skb, 0);
-+
-+      } while (len > 0);
-+
-+ out:
-+      /* Clean up data we have read: This will do ACK frames. */
-+      tcp_cleanup_rbuf(sk, copied);
-+      TCP_CHECK_TIMER(sk);
-+      release_sock(sk);
-+      return copied;
-+}
-+
- /*
-  *    State processing on a close. This implements the state shift for
-  *    sending our FIN frame. Note that we only send a FIN for some
-@@ -2345,6 +2593,8 @@ EXPORT_SYMBOL(tcp_read_sock);
- EXPORT_SYMBOL(tcp_recvmsg);
- EXPORT_SYMBOL(tcp_sendmsg);
- EXPORT_SYMBOL(tcp_sendpage);
-+EXPORT_SYMBOL(tcp_sendpage_zccd);
-+EXPORT_SYMBOL(tcp_recvpackets);
- EXPORT_SYMBOL(tcp_setsockopt);
- EXPORT_SYMBOL(tcp_shutdown);
- EXPORT_SYMBOL(tcp_statistics);
-Index: linux-2.6/include/linux/skbuff.h
-===================================================================
---- linux-2.6.orig/include/linux/skbuff.h      2006-07-15 21:08:45.000000000 +0800
-+++ linux-2.6/include/linux/skbuff.h   2006-07-15 21:12:21.000000000 +0800
-@@ -128,6 +128,30 @@ struct skb_frag_struct {
-       __u16 size;
- };
-+/* Support for callback when skb data has been released */
-+typedef struct zccd                            /* Zero Copy Callback Descriptor */
-+{                                              /* (embed as first member of custom struct) */
-+      atomic_t        zccd_count;             /* reference count */
-+      void           (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
-+} zccd_t;
-+
-+static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
-+{
-+      atomic_set (&d->zccd_count, 1);
-+      d->zccd_destructor = callback;
-+}
-+
-+static inline void zccd_get (zccd_t *d)                /* take a reference */
-+{
-+      atomic_inc (&d->zccd_count);
-+}
-+
-+static inline void zccd_put (zccd_t *d)                /* release a reference */
-+{
-+      if (atomic_dec_and_test (&d->zccd_count))
-+              (d->zccd_destructor)(d);
-+}
-+
- /* This data is invariant across clones and lives at
-  * the end of the header data, ie. at skb->end.
-  */
-@@ -140,6 +164,13 @@ struct skb_shared_info {
-       unsigned short  gso_type;
-       unsigned int    ip6_frag_id;
-       struct sk_buff  *frag_list;
-+      zccd_t          *zccd;                  /* zero copy descriptor */
-+      zccd_t          *zccd2;                 /* 2nd zero copy descriptor */
-+      /* NB we expect zero-copy data to be at least 1 packet, so
-+      * having 2 zccds means we don't unneccessarily split the packet
-+      * where consecutive zero-copy sends abutt.
-+      */
-+
-       skb_frag_t      frags[MAX_SKB_FRAGS];
- };
-Index: linux-2.6/include/net/tcp.h
-===================================================================
---- linux-2.6.orig/include/net/tcp.h   2006-07-15 21:08:45.000000000 +0800
-+++ linux-2.6/include/net/tcp.h        2006-07-15 21:12:21.000000000 +0800
-@@ -278,6 +278,9 @@ extern int                 tcp_v4_tw_remember_stam
- extern int                    tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
-                                           struct msghdr *msg, size_t size);
- extern ssize_t                        tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
-+extern ssize_t                 tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+                                              int flags, zccd_t *zccd);
-+
- extern int                    tcp_ioctl(struct sock *sk, 
-                                         int cmd, 
-@@ -368,6 +371,9 @@ extern int                 tcp_recvmsg(struct kiocb *i
-                                           struct msghdr *msg,
-                                           size_t len, int nonblock, 
-                                           int flags, int *addr_len);
-+extern int                     tcp_recvpackets(struct sock *sk,
-+                                              struct sk_buff_head *packets,
-+                                              int len, int nonblock);
- extern void                   tcp_parse_options(struct sk_buff *skb,
-                                                 struct tcp_options_received *opt_rx,
index c9abdd0..1835748 100644 (file)
@@ -16,5 +16,4 @@ remove-suid-2.6-suse.patch
 export-show_task-2.6-fc5.patch 
 sd_iostats-2.6-rhel4.patch 
 export_symbol_numa-2.6-fc5.patch 
-tcp-zero-copy-2.6-fc5.patch
 vfs_intent-2.6-fc5-fix.patch