Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / raid5-zerocopy-rhel5.patch
index fa92977..1ba727c 100644 (file)
@@ -1,15 +1,18 @@
-diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c
---- linux-2.6.18-53.orig/drivers/md/raid5.c    2007-12-28 19:09:20.000000000 +0800
-+++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-28 19:09:32.000000000 +0800
-@@ -633,6 +633,7 @@ static int raid5_end_read_request(struct
+Index: linux-2.6.18-128.1.6/drivers/md/raid5.c
+===================================================================
+--- linux-2.6.18-128.1.6.orig/drivers/md/raid5.c       2009-06-02 23:24:52.000000000 -0600
++++ linux-2.6.18-128.1.6/drivers/md/raid5.c    2009-06-02 23:24:55.000000000 -0600
+@@ -633,6 +633,9 @@
                clear_buffer_uptodate(bh);
        }
  #endif
-+      BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));
++      /* Read on a Directing write is allowable */
++      /* BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)) */
++      BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page != sh->dev[i].page);
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
        release_stripe(sh);
-@@ -671,6 +672,10 @@ static int raid5_end_write_request (stru
+@@ -669,6 +672,10 @@
  
        rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
        
@@ -19,8 +22,8 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
 +      }
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
-       __release_stripe(conf, sh);
-@@ -911,7 +916,27 @@ static sector_t compute_blocknr(struct s
+       release_stripe(sh);
+@@ -910,7 +917,27 @@
        return r_sector;
  }
  
@@ -48,7 +51,7 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
  
  /*
   * Copy data between a page in the stripe cache, and one or more bion
-@@ -1003,8 +1028,9 @@ static void compute_parity5(struct strip
+@@ -1002,8 +1029,9 @@
  {
        raid5_conf_t *conf = sh->raid_conf;
        int i, pd_idx = sh->pd_idx, disks = sh->disks, count;
@@ -59,7 +62,7 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
  
        PRINTK("compute_parity5, stripe %llu, method %d\n",
                (unsigned long long)sh->sector, method);
-@@ -1054,34 +1080,90 @@ static void compute_parity5(struct strip
+@@ -1053,34 +1081,92 @@
                count = 1;
        }
        
@@ -90,6 +93,8 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
 +                      page = zero_copy_data(wbi, sector);
 +                      if (page) {
 +                              atomic_inc(&conf->writes_zcopy);
++                              /* The pointer must be restored whenever the LOCKED
++                               * gets cleared. */
 +                              dev->req.bi_io_vec[0].bv_page = page;
 +                              set_bit(R5_Direct, &dev->flags);
 +                              clear_bit(R5_UPTODATE, &sh->dev[i].flags);
@@ -167,7 +172,7 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
        }
        if (count != 1)
                xor_block(count, STRIPE_SIZE, ptr);
-@@ -1098,6 +1180,7 @@ static void compute_parity6(struct strip
+@@ -1097,6 +1183,7 @@
        raid6_conf_t *conf = sh->raid_conf;
        int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = conf->raid_disks, count;
        struct bio *chosen;
@@ -175,7 +180,7 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
        /**** FIX THIS: This could be very bad if disks is close to 256 ****/
        void *ptrs[disks];
  
-@@ -1127,18 +1210,47 @@ static void compute_parity6(struct strip
+@@ -1126,18 +1213,49 @@
                BUG();          /* Not implemented yet */
        }
  
@@ -211,6 +216,8 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
 +                       * algorithm. -jay */
 +                      if (page && !PageHighMem(page)) {
 +                              atomic_inc(&conf->writes_zcopy);
++                              /* The pointer must be restored whenever the LOCKED
++                               * gets cleared. */
 +                              sh->dev[i].req.bi_io_vec[0].bv_page = page;
 +                              set_bit(R5_Direct, &sh->dev[i].flags);
 +                              clear_bit(R5_UPTODATE, &sh->dev[i].flags);
@@ -232,7 +239,7 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
  
  //    switch(method) {
  //    case RECONSTRUCT_WRITE:
-@@ -1149,8 +1261,12 @@ static void compute_parity6(struct strip
+@@ -1148,8 +1266,12 @@
                count = 0;
                i = d0_idx;
                do {
@@ -247,7 +254,7 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
                                printk("block %d/%d not uptodate on parity calc\n", i,count);
                        i = raid6_next_disk(i, disks);
                } while ( i != d0_idx );
-@@ -1597,7 +1713,8 @@ static void handle_stripe5(struct stripe
+@@ -1596,7 +1718,8 @@
                if (sh->dev[i].written) {
                    dev = &sh->dev[i];
                    if (!test_bit(R5_LOCKED, &dev->flags) &&
@@ -257,7 +264,7 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
                        /* We can return any write requests */
                            struct bio *wbi, *wbi2;
                            int bitmap_end = 0;
-@@ -1605,6 +1722,7 @@ static void handle_stripe5(struct stripe
+@@ -1604,6 +1727,7 @@
                            spin_lock_irq(&conf->device_lock);
                            wbi = dev->written;
                            dev->written = NULL;
@@ -265,7 +272,23 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
                            while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
                                    wbi2 = r5_next_bio(wbi, dev->sector);
                                    if (--wbi->bi_phys_segments == 0) {
-@@ -2173,7 +2291,8 @@ static void handle_stripe6(struct stripe
+@@ -1967,6 +2091,15 @@
+                               set_bit(STRIPE_DEGRADED, &sh->state);
+                       PRINTK("skip op %ld on disc %d for sector %llu\n",
+                               bi->bi_rw, i, (unsigned long long)sh->sector);
++
++                      if (test_bit(R5_Direct, &sh->dev[i].flags)) {
++                              /* restore the page pointer of req, otherwise,
++                               * no any read is permitted on this stripe, this is
++                               * not what we want. -jay */
++                              BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
++                              sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
++                      }
++
+                       clear_bit(R5_LOCKED, &sh->dev[i].flags);
+                       set_bit(STRIPE_HANDLE, &sh->state);
+               }
+@@ -2172,7 +2305,8 @@
                        if (sh->dev[i].written) {
                                dev = &sh->dev[i];
                                if (!test_bit(R5_LOCKED, &dev->flags) &&
@@ -275,7 +298,7 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
                                        /* We can return any write requests */
                                        int bitmap_end = 0;
                                        struct bio *wbi, *wbi2;
-@@ -2182,6 +2301,7 @@ static void handle_stripe6(struct stripe
+@@ -2181,6 +2315,7 @@
                                        spin_lock_irq(&conf->device_lock);
                                        wbi = dev->written;
                                        dev->written = NULL;
@@ -283,7 +306,23 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
                                        while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
                                                wbi2 = r5_next_bio(wbi, dev->sector);
                                                if (--wbi->bi_phys_segments == 0) {
-@@ -3450,6 +3570,9 @@ static int run(mddev_t *mddev)
+@@ -2532,6 +2667,15 @@
+                               set_bit(STRIPE_DEGRADED, &sh->state);
+                       PRINTK("skip op %ld on disc %d for sector %llu\n",
+                               bi->bi_rw, i, (unsigned long long)sh->sector);
++
++                      if (test_bit(R5_Direct, &sh->dev[i].flags)) {
++                              /* restore the page pointer of req, otherwise,
++                               * no any read is permitted on this stripe, this is
++                               * not what we want. -jay */
++                              BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
++                              sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
++                      }
++
+                       clear_bit(R5_LOCKED, &sh->dev[i].flags);
+                       set_bit(STRIPE_HANDLE, &sh->state);
+               }
+@@ -3451,6 +3595,9 @@
        mddev->queue->max_phys_segments = conf->chunk_size * conf->previous_raid_disks >> PAGE_SHIFT;
        mddev->queue->max_hw_segments = conf->chunk_size * conf->previous_raid_disks >> PAGE_SHIFT;;
  
@@ -293,7 +332,7 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
        return 0;
  abort:
        if (conf) {
-@@ -3536,9 +3659,11 @@ static void status (struct seq_file *seq
+@@ -3537,9 +3684,11 @@
                        atomic_read(&conf->handled_in_raid5d),
                        atomic_read(&conf->out_of_stripes),
                        atomic_read(&conf->handle_called));
@@ -307,10 +346,11 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
        seq_printf (seq, "\n\t\t%u delayed, %u bit delayed, %u active, queues: %u in, %u out\n",
                        atomic_read(&conf->delayed), atomic_read(&conf->bit_delayed),
                        atomic_read(&conf->active_stripes),
-diff -pur linux-2.6.18-53.orig/include/linux/backing-dev.h linux-2.6.18-53/include/linux/backing-dev.h
---- linux-2.6.18-53.orig/include/linux/backing-dev.h   2007-12-28 14:49:26.000000000 +0800
-+++ linux-2.6.18-53/include/linux/backing-dev.h        2007-12-28 19:09:32.000000000 +0800
-@@ -48,6 +48,7 @@ struct backing_dev_info {
+Index: linux-2.6.18-128.1.6/include/linux/backing-dev.h
+===================================================================
+--- linux-2.6.18-128.1.6.orig/include/linux/backing-dev.h      2006-09-19 21:42:06.000000000 -0600
++++ linux-2.6.18-128.1.6/include/linux/backing-dev.h   2009-06-02 23:24:55.000000000 -0600
+@@ -48,6 +48,7 @@
  #define BDI_CAP_READ_MAP      0x00000010      /* Can be mapped for reading */
  #define BDI_CAP_WRITE_MAP     0x00000020      /* Can be mapped for writing */
  #define BDI_CAP_EXEC_MAP      0x00000040      /* Can be mapped for execution */
@@ -318,7 +358,7 @@ diff -pur linux-2.6.18-53.orig/include/linux/backing-dev.h linux-2.6.18-53/inclu
  #define BDI_CAP_VMFLAGS \
        (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
  
-@@ -94,11 +95,18 @@ static inline int bdi_rw_congested(struc
+@@ -94,11 +95,18 @@
  #define bdi_cap_account_dirty(bdi) \
        (!((bdi)->capabilities & BDI_CAP_NO_ACCT_DIRTY))
  
@@ -337,18 +377,20 @@ diff -pur linux-2.6.18-53.orig/include/linux/backing-dev.h linux-2.6.18-53/inclu
 +
  
  #endif                /* _LINUX_BACKING_DEV_H */
-diff -pur linux-2.6.18-53.orig/include/linux/page-flags.h linux-2.6.18-53/include/linux/page-flags.h
---- linux-2.6.18-53.orig/include/linux/page-flags.h    2007-12-28 14:49:26.000000000 +0800
-+++ linux-2.6.18-53/include/linux/page-flags.h 2007-12-28 19:09:32.000000000 +0800
+Index: linux-2.6.18-128.1.6/include/linux/page-flags.h
+===================================================================
+--- linux-2.6.18-128.1.6.orig/include/linux/page-flags.h       2009-04-14 21:05:24.000000000 -0600
++++ linux-2.6.18-128.1.6/include/linux/page-flags.h    2009-06-02 23:24:55.000000000 -0600
 @@ -86,6 +86,7 @@
  #define PG_reclaim            17      /* To be reclaimed asap */
  #define PG_nosave_free                18      /* Free, should not be written */
  #define PG_buddy              19      /* Page is free, on buddy lists */
-+#define PG_constant           20      /* To mark if the page is constant */
+ #define PG_gup                        20      /* Page pin may be because of gup */
++#define PG_constant           21      /* To mark if the page is constant */
+ #define PG_xpmem              27      /* Testing for xpmem. */
  
  /* PG_owner_priv_1 users should have descriptive aliases */
- #define PG_checked              PG_owner_priv_1 /* Used by some filesystems */
-@@ -252,6 +253,14 @@
+@@ -283,6 +284,14 @@
  
  struct page;  /* forward declaration */
  
@@ -363,10 +405,11 @@ diff -pur linux-2.6.18-53.orig/include/linux/page-flags.h linux-2.6.18-53/includ
  int test_clear_page_dirty(struct page *page);
  int test_clear_page_writeback(struct page *page);
  int test_set_page_writeback(struct page *page);
-diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/include/linux/raid/raid5.h
---- linux-2.6.18-53.orig/include/linux/raid/raid5.h    2007-12-28 18:55:24.000000000 +0800
-+++ linux-2.6.18-53/include/linux/raid/raid5.h 2007-12-28 19:09:32.000000000 +0800
-@@ -156,8 +156,9 @@ struct stripe_head {
+Index: linux-2.6.18-128.1.6/include/linux/raid/raid5.h
+===================================================================
+--- linux-2.6.18-128.1.6.orig/include/linux/raid/raid5.h       2009-06-02 23:24:50.000000000 -0600
++++ linux-2.6.18-128.1.6/include/linux/raid/raid5.h    2009-06-02 23:24:55.000000000 -0600
+@@ -156,8 +156,9 @@
  #define       R5_Overlap      7       /* There is a pending overlapping request on this block */
  #define       R5_ReadError    8       /* seen a read error here recently */
  #define       R5_ReWrite      9       /* have tried to over-write the readerror */
@@ -377,9 +420,10 @@ diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/includ
  /*
   * Write method
   */
-diff -pur linux-2.6.18-53.orig/mm/filemap.c linux-2.6.18-53/mm/filemap.c
---- linux-2.6.18-53.orig/mm/filemap.c  2007-12-28 14:49:26.000000000 +0800
-+++ linux-2.6.18-53/mm/filemap.c       2007-12-28 19:09:32.000000000 +0800
+Index: linux-2.6.18-128.1.6/mm/filemap.c
+===================================================================
+--- linux-2.6.18-128.1.6.orig/mm/filemap.c     2009-04-14 21:05:46.000000000 -0600
++++ linux-2.6.18-128.1.6/mm/filemap.c  2009-06-02 23:24:55.000000000 -0600
 @@ -30,6 +30,7 @@
  #include <linux/security.h>
  #include <linux/syscalls.h>
@@ -388,7 +432,7 @@ diff -pur linux-2.6.18-53.orig/mm/filemap.c linux-2.6.18-53/mm/filemap.c
  #include "filemap.h"
  #include "internal.h"
  
-@@ -566,11 +567,55 @@ void end_page_writeback(struct page *pag
+@@ -567,11 +568,55 @@
                if (!test_clear_page_writeback(page))
                        BUG();
        }