Whamcloud - gitweb
Porting raid5 improvements to rhel5 kernels.
[fs/lustre-release.git] / lustre / kernel_patches / patches / md-rebuild-policy.patch
1 diff -pur linux-2.6.18-53.orig/drivers/md/md.c linux-2.6.18-53/drivers/md/md.c
2 --- linux-2.6.18-53.orig/drivers/md/md.c        2008-02-13 17:34:25.000000000 +0800
3 +++ linux-2.6.18-53/drivers/md/md.c     2008-02-13 17:39:28.000000000 +0800
4 @@ -90,6 +90,8 @@ static void md_print_devices(void);
5  
6  static int sysctl_speed_limit_min = 1000;
7  static int sysctl_speed_limit_max = 200000;
8 +static int sysctl_rebuild_window_size = 256;
9 +static int sysctl_disk_idle_size = 4096;
10  static inline int speed_min(mddev_t *mddev)
11  {
12         return mddev->sync_speed_min ?
13 @@ -121,6 +123,22 @@ static ctl_table raid_table[] = {
14                 .mode           = S_IRUGO|S_IWUSR,
15                 .proc_handler   = &proc_dointvec,
16         },
17 +       {
18 +               .ctl_name       = DEV_RAID_REBUILD_WINDOW,
19 +               .procname       = "rebuild_window_size",
20 +               .data           = &sysctl_rebuild_window_size,
21 +               .maxlen         = sizeof(int),
22 +               .mode           = S_IRUGO|S_IWUSR,
23 +               .proc_handler   = &proc_dointvec,
24 +       },
25 +       {
26 +               .ctl_name       = DEV_RAID_DISK_IDLE_SIZE,
27 +               .procname       = "disk_idle_size",
28 +               .data           = &sysctl_disk_idle_size,
29 +               .maxlen         = sizeof(int),
30 +               .mode           = S_IRUGO|S_IWUSR,
31 +               .proc_handler   = &proc_dointvec,
32 +       },
33         { .ctl_name = 0 }
34  };
35  
36 @@ -4980,14 +4998,15 @@ static int is_mddev_idle(mddev_t *mddev)
37         mdk_rdev_t * rdev;
38         struct list_head *tmp;
39         int idle;
40 -       unsigned long curr_events;
41 +       unsigned long rw, sync;
42  
43         idle = 1;
44         ITERATE_RDEV(mddev,rdev,tmp) {
45                 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
46 -               curr_events = disk_stat_read(disk, sectors[0]) + 
47 -                               disk_stat_read(disk, sectors[1]) - 
48 -                               atomic_read(&disk->sync_io);
49 +
50 +               rw = disk_stat_read(disk, sectors[READ])+disk_stat_read(disk, sectors[WRITE]);
51 +               sync = atomic_read(&disk->sync_io);
52 +
53                 /* The difference between curr_events and last_events
54                  * will be affected by any new non-sync IO (making
55                  * curr_events bigger) and any difference in the amount of
56 @@ -5001,9 +5020,9 @@ static int is_mddev_idle(mddev_t *mddev)
57                  *
58                  * Note: the following is an unsigned comparison.
59                  */
60 -               if ((curr_events - rdev->last_events + 4096) > 8192) {
61 -                       rdev->last_events = curr_events;
62 +               if (rw - rdev->last_events > sync + sysctl_disk_idle_size) {
63                         idle = 0;
64 +                       rdev->last_events = rw - sync;
65                 }
66         }
67         return idle;
68 @@ -5069,8 +5088,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wa
69  void md_do_sync(mddev_t *mddev)
70  {
71         mddev_t *mddev2;
72 -       unsigned int currspeed = 0,
73 -                window;
74 +       unsigned int currspeed = 0;
75         sector_t max_sectors,j, io_sectors;
76         unsigned long mark[SYNC_MARKS];
77         sector_t mark_cnt[SYNC_MARKS];
78 @@ -5190,9 +5208,8 @@ void md_do_sync(mddev_t *mddev)
79         /*
80          * Tune reconstruction:
81          */
82 -       window = 32*(PAGE_SIZE/512);
83         printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n",
84 -               window/2,(unsigned long long) max_sectors/2);
85 +               sysctl_rebuild_window_size/2,(unsigned long long) max_sectors/2);
86  
87         atomic_set(&mddev->recovery_active, 0);
88         init_waitqueue_head(&mddev->recovery_wait);
89 @@ -5230,7 +5247,7 @@ void md_do_sync(mddev_t *mddev)
90                          */
91                         md_new_event(mddev);
92  
93 -               if (last_check + window > io_sectors || j == max_sectors)
94 +               if (last_check + sysctl_rebuild_window_size > io_sectors || j == max_sectors)
95                         continue;
96  
97                 last_check = io_sectors;
98 @@ -5251,7 +5268,6 @@ void md_do_sync(mddev_t *mddev)
99                         last_mark = next;
100                 }
101  
102 -
103                 if (kthread_should_stop()) {
104                         /*
105                          * got a signal, exit.
106 @@ -5275,10 +5291,16 @@ void md_do_sync(mddev_t *mddev)
107  
108                 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
109                         /((jiffies-mddev->resync_mark)/HZ +1) +1;
110 -
111                 if (currspeed > speed_min(mddev)) {
112                         if ((currspeed > speed_max(mddev)) ||
113                                         !is_mddev_idle(mddev)) {
114 +                               static unsigned long next_report;
115 +                               if (time_after(jiffies, next_report)) {
116 +                                       printk(KERN_INFO "md: rebuild %s throttled due to IO\n",
117 +                                               mdname(mddev));
118 +                                       /* once per 10 minutes */
119 +                                       next_report = jiffies + 600 * HZ;
120 +                               }
121                                 msleep(500);
122                                 goto repeat;
123                         }
124 diff -pur linux-2.6.18-53.orig/include/linux/sysctl.h linux-2.6.18-53/include/linux/sysctl.h
125 --- linux-2.6.18-53.orig/include/linux/sysctl.h 2008-02-13 17:35:25.000000000 +0800
126 +++ linux-2.6.18-53/include/linux/sysctl.h      2008-02-13 17:36:22.000000000 +0800
127 @@ -903,7 +903,9 @@ enum {
128  /* /proc/sys/dev/raid */
129  enum {
130         DEV_RAID_SPEED_LIMIT_MIN=1,
131 -       DEV_RAID_SPEED_LIMIT_MAX=2
132 +       DEV_RAID_SPEED_LIMIT_MAX=2,
133 +       DEV_RAID_REBUILD_WINDOW=3,
134 +       DEV_RAID_DISK_IDLE_SIZE=4
135  };
136  
137  /* /proc/sys/dev/parport/default */