Whamcloud - gitweb
Fixed the raid5 patches.
authorjxiong <jxiong>
Tue, 25 Nov 2008 04:12:58 +0000 (04:12 +0000)
committerjxiong <jxiong>
Tue, 25 Nov 2008 04:12:58 +0000 (04:12 +0000)
- rebuild policy for rhel5 .21 kernel
- soft lockups fixed

b=17084
r=adilger,jay

lustre/kernel_patches/patches/md-rebuild-policy-rhel5.patch [new file with mode: 0644]
lustre/kernel_patches/patches/md-soft-lockups.patch [new file with mode: 0644]
lustre/kernel_patches/patches/raid5-zerocopy-rhel5.patch
lustre/kernel_patches/series/2.6-rhel5.series

diff --git a/lustre/kernel_patches/patches/md-rebuild-policy-rhel5.patch b/lustre/kernel_patches/patches/md-rebuild-policy-rhel5.patch
new file mode 100644 (file)
index 0000000..ca898ed
--- /dev/null
@@ -0,0 +1,137 @@
+diff -pur linux-2.6.18-53.1.21.orig/drivers/md/md.c linux-2.6.18-53.1.21/drivers/md/md.c
+--- linux-2.6.18-53.1.21.orig/drivers/md/md.c  2008-11-25 11:21:57.000000000 +0800
++++ linux-2.6.18-53.1.21/drivers/md/md.c       2008-11-25 11:26:49.000000000 +0800
+@@ -90,6 +90,8 @@ static void md_print_devices(void);
+ static int sysctl_speed_limit_min = 1000;
+ static int sysctl_speed_limit_max = 200000;
++static int sysctl_rebuild_window_size = 256;
++static int sysctl_disk_idle_size = 4096;
+ static inline int speed_min(mddev_t *mddev)
+ {
+       return mddev->sync_speed_min ?
+@@ -121,6 +123,22 @@ static ctl_table raid_table[] = {
+               .mode           = S_IRUGO|S_IWUSR,
+               .proc_handler   = &proc_dointvec,
+       },
++      {
++              .ctl_name       = DEV_RAID_REBUILD_WINDOW,
++              .procname       = "rebuild_window_size",
++              .data           = &sysctl_rebuild_window_size,
++              .maxlen         = sizeof(int),
++              .mode           = S_IRUGO|S_IWUSR,
++              .proc_handler   = &proc_dointvec,
++      },
++      {
++              .ctl_name       = DEV_RAID_DISK_IDLE_SIZE,
++              .procname       = "disk_idle_size",
++              .data           = &sysctl_disk_idle_size,
++              .maxlen         = sizeof(int),
++              .mode           = S_IRUGO|S_IWUSR,
++              .proc_handler   = &proc_dointvec,
++      },
+       { .ctl_name = 0 }
+ };
+@@ -4980,14 +4998,15 @@ static int is_mddev_idle(mddev_t *mddev)
+       mdk_rdev_t * rdev;
+       struct list_head *tmp;
+       int idle;
+-      unsigned long curr_events;
++      unsigned long rw, sync;
+       idle = 1;
+       ITERATE_RDEV(mddev,rdev,tmp) {
+               struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
+-              curr_events = disk_stat_read(disk, sectors[0]) + 
+-                              disk_stat_read(disk, sectors[1]) - 
+-                              atomic_read(&disk->sync_io);
++
++              rw = disk_stat_read(disk, sectors[READ])+disk_stat_read(disk, sectors[WRITE]);
++              sync = atomic_read(&disk->sync_io);
++
+               /* The difference between curr_events and last_events
+                * will be affected by any new non-sync IO (making
+                * curr_events bigger) and any difference in the amount of
+@@ -5001,9 +5020,9 @@ static int is_mddev_idle(mddev_t *mddev)
+                *
+                * Note: the following is an unsigned comparison.
+                */
+-              if ((curr_events - rdev->last_events + 4096) > 8192) {
+-                      rdev->last_events = curr_events;
++              if (rw - rdev->last_events > sync + sysctl_disk_idle_size) {
+                       idle = 0;
++                      rdev->last_events = rw - sync;
+               }
+       }
+       return idle;
+@@ -5069,8 +5088,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wa
+ void md_do_sync(mddev_t *mddev)
+ {
+       mddev_t *mddev2;
+-      unsigned int currspeed = 0,
+-               window;
++      unsigned int currspeed = 0;
+       sector_t max_sectors,j, io_sectors;
+       unsigned long mark[SYNC_MARKS];
+       sector_t mark_cnt[SYNC_MARKS];
+@@ -5190,9 +5208,8 @@ void md_do_sync(mddev_t *mddev)
+       /*
+        * Tune reconstruction:
+        */
+-      window = 32*(PAGE_SIZE/512);
+       printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n",
+-              window/2,(unsigned long long) max_sectors/2);
++              sysctl_rebuild_window_size/2,(unsigned long long) max_sectors/2);
+       atomic_set(&mddev->recovery_active, 0);
+       init_waitqueue_head(&mddev->recovery_wait);
+@@ -5230,7 +5247,7 @@ void md_do_sync(mddev_t *mddev)
+                        */
+                       md_new_event(mddev);
+-              if (last_check + window > io_sectors || j == max_sectors)
++              if (last_check + sysctl_rebuild_window_size > io_sectors || j == max_sectors)
+                       continue;
+               last_check = io_sectors;
+@@ -5251,7 +5268,6 @@ void md_do_sync(mddev_t *mddev)
+                       last_mark = next;
+               }
+-
+               if (kthread_should_stop()) {
+                       /*
+                        * got a signal, exit.
+@@ -5275,10 +5291,16 @@ void md_do_sync(mddev_t *mddev)
+               currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
+                       /((jiffies-mddev->resync_mark)/HZ +1) +1;
+-
+               if (currspeed > speed_min(mddev)) {
+                       if ((currspeed > speed_max(mddev)) ||
+                                       !is_mddev_idle(mddev)) {
++                              static unsigned long next_report;
++                              if (time_after(jiffies, next_report)) {
++                                      printk(KERN_INFO "md: rebuild %s throttled due to IO\n",
++                                              mdname(mddev));
++                                      /* once per 10 minutes */
++                                      next_report = jiffies + 600 * HZ;
++                              }
+                               msleep(500);
+                               goto repeat;
+                       }
+diff -pur linux-2.6.18-53.1.21.orig/include/linux/sysctl.h linux-2.6.18-53.1.21/include/linux/sysctl.h
+--- linux-2.6.18-53.1.21.orig/include/linux/sysctl.h   2008-11-25 11:21:59.000000000 +0800
++++ linux-2.6.18-53.1.21/include/linux/sysctl.h        2008-11-25 11:22:26.000000000 +0800
+@@ -903,7 +903,9 @@ enum {
+ /* /proc/sys/dev/raid */
+ enum {
+       DEV_RAID_SPEED_LIMIT_MIN=1,
+-      DEV_RAID_SPEED_LIMIT_MAX=2
++      DEV_RAID_SPEED_LIMIT_MAX=2,
++      DEV_RAID_REBUILD_WINDOW=3,
++      DEV_RAID_DISK_IDLE_SIZE=4
+ };
+ /* /proc/sys/dev/parport/default */
diff --git a/lustre/kernel_patches/patches/md-soft-lockups.patch b/lustre/kernel_patches/patches/md-soft-lockups.patch
new file mode 100644 (file)
index 0000000..cde9a34
--- /dev/null
@@ -0,0 +1,13 @@
+Index: linux-2.6.18-92.1.10/drivers/md/raid5.c
+===================================================================
+--- linux-2.6.18-92.1.10.orig/drivers/md/raid5.c       2008-11-10 11:00:51.000000000 +0900
++++ linux-2.6.18-92.1.10/drivers/md/raid5.c    2008-11-10 11:02:38.000000000 +0900
+@@ -3251,6 +3251,8 @@
+               handle_stripe(sh, conf->spare_page, NULL);
+               release_stripe(sh);
++              cond_resched();
++
+               spin_lock_irq(&conf->device_lock);
+       }
+       PRINTK("%d stripes handled\n", handled);
index dd80825..e66ae01 100644 (file)
@@ -1,6 +1,6 @@
-diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c
---- linux-2.6.18-53.orig/drivers/md/raid5.c    2007-12-28 19:09:20.000000000 +0800
-+++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-28 19:09:32.000000000 +0800
+diff -pur linux-2.6.18-53.1.21.orig/drivers/md/raid5.c linux-2.6.18-53.1.21/drivers/md/raid5.c
+--- linux-2.6.18-53.1.21.orig/drivers/md/raid5.c       2008-11-25 11:09:16.000000000 +0800
++++ linux-2.6.18-53.1.21/drivers/md/raid5.c    2008-11-25 11:09:48.000000000 +0800
 @@ -633,6 +633,7 @@ static int raid5_end_read_request(struct
                clear_buffer_uptodate(bh);
        }
@@ -307,9 +307,9 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai
        seq_printf (seq, "\n\t\t%u delayed, %u bit delayed, %u active, queues: %u in, %u out\n",
                        atomic_read(&conf->delayed), atomic_read(&conf->bit_delayed),
                        atomic_read(&conf->active_stripes),
-diff -pur linux-2.6.18-53.orig/include/linux/backing-dev.h linux-2.6.18-53/include/linux/backing-dev.h
---- linux-2.6.18-53.orig/include/linux/backing-dev.h   2007-12-28 14:49:26.000000000 +0800
-+++ linux-2.6.18-53/include/linux/backing-dev.h        2007-12-28 19:09:32.000000000 +0800
+diff -pur linux-2.6.18-53.1.21.orig/include/linux/backing-dev.h linux-2.6.18-53.1.21/include/linux/backing-dev.h
+--- linux-2.6.18-53.1.21.orig/include/linux/backing-dev.h      2008-11-25 11:09:20.000000000 +0800
++++ linux-2.6.18-53.1.21/include/linux/backing-dev.h   2008-11-25 11:09:48.000000000 +0800
 @@ -48,6 +48,7 @@ struct backing_dev_info {
  #define BDI_CAP_READ_MAP      0x00000010      /* Can be mapped for reading */
  #define BDI_CAP_WRITE_MAP     0x00000020      /* Can be mapped for writing */
@@ -337,17 +337,17 @@ diff -pur linux-2.6.18-53.orig/include/linux/backing-dev.h linux-2.6.18-53/inclu
 +
  
  #endif                /* _LINUX_BACKING_DEV_H */
-diff -pur linux-2.6.18-53.orig/include/linux/page-flags.h linux-2.6.18-53/include/linux/page-flags.h
---- linux-2.6.18-53.orig/include/linux/page-flags.h    2007-12-28 14:49:26.000000000 +0800
-+++ linux-2.6.18-53/include/linux/page-flags.h 2007-12-28 19:09:32.000000000 +0800
+diff -pur linux-2.6.18-53.1.21.orig/include/linux/page-flags.h linux-2.6.18-53.1.21/include/linux/page-flags.h
+--- linux-2.6.18-53.1.21.orig/include/linux/page-flags.h       2008-11-25 11:09:20.000000000 +0800
++++ linux-2.6.18-53.1.21/include/linux/page-flags.h    2008-11-25 11:12:08.000000000 +0800
 @@ -86,6 +86,7 @@
  #define PG_reclaim            17      /* To be reclaimed asap */
  #define PG_nosave_free                18      /* Free, should not be written */
  #define PG_buddy              19      /* Page is free, on buddy lists */
-+#define PG_constant           20      /* To mark if the page is constant */
- #define PG_xpmem              27      /* Testing for xpmem. */
++#define PG_constant     20  /* To mark if the page is constant */
  
  /* PG_owner_priv_1 users should have descriptive aliases */
+ #define PG_checked              PG_owner_priv_1 /* Used by some filesystems */
 @@ -252,6 +253,14 @@
  
  struct page;  /* forward declaration */
@@ -363,9 +363,9 @@ diff -pur linux-2.6.18-53.orig/include/linux/page-flags.h linux-2.6.18-53/includ
  int test_clear_page_dirty(struct page *page);
  int test_clear_page_writeback(struct page *page);
  int test_set_page_writeback(struct page *page);
-diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/include/linux/raid/raid5.h
---- linux-2.6.18-53.orig/include/linux/raid/raid5.h    2007-12-28 18:55:24.000000000 +0800
-+++ linux-2.6.18-53/include/linux/raid/raid5.h 2007-12-28 19:09:32.000000000 +0800
+diff -pur linux-2.6.18-53.1.21.orig/include/linux/raid/raid5.h linux-2.6.18-53.1.21/include/linux/raid/raid5.h
+--- linux-2.6.18-53.1.21.orig/include/linux/raid/raid5.h       2008-11-25 11:09:20.000000000 +0800
++++ linux-2.6.18-53.1.21/include/linux/raid/raid5.h    2008-11-25 11:09:48.000000000 +0800
 @@ -156,8 +156,9 @@ struct stripe_head {
  #define       R5_Overlap      7       /* There is a pending overlapping request on this block */
  #define       R5_ReadError    8       /* seen a read error here recently */
@@ -377,9 +377,9 @@ diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/includ
  /*
   * Write method
   */
-diff -pur linux-2.6.18-53.orig/mm/filemap.c linux-2.6.18-53/mm/filemap.c
---- linux-2.6.18-53.orig/mm/filemap.c  2007-12-28 14:49:26.000000000 +0800
-+++ linux-2.6.18-53/mm/filemap.c       2007-12-28 19:09:32.000000000 +0800
+diff -pur linux-2.6.18-53.1.21.orig/mm/filemap.c linux-2.6.18-53.1.21/mm/filemap.c
+--- linux-2.6.18-53.1.21.orig/mm/filemap.c     2008-11-25 11:09:15.000000000 +0800
++++ linux-2.6.18-53.1.21/mm/filemap.c  2008-11-25 11:09:48.000000000 +0800
 @@ -30,6 +30,7 @@
  #include <linux/security.h>
  #include <linux/syscalls.h>
index 8495289..43302ff 100644 (file)
@@ -17,6 +17,7 @@ raid5-large-io-rhel5.patch
 raid5-stripe-by-stripe-handling-rhel5.patch
 raid5-merge-ios-rhel5.patch
 raid5-zerocopy-rhel5.patch
-md-rebuild-policy.patch
+md-rebuild-policy-rhel5.patch
+md-soft-lockups.patch
 jbd-journal-chksum-2.6.18-vanilla.patch
 quota-large-limits-rhel5.patch