Whamcloud - gitweb
Fixed the raid5 patches.
[fs/lustre-release.git] / lustre / kernel_patches / patches / md-rebuild-policy.patch
1 diff -pur linux-2.6.18-53.orig/drivers/md/md.c linux-2.6.18-53/drivers/md/md.c
2 --- linux-2.6.18-53.orig/drivers/md/md.c        2008-02-13 17:34:25.000000000 +0800
3 +++ linux-2.6.18-53/drivers/md/md.c     2008-02-13 17:39:28.000000000 +0800
4 @@ -90,6 +90,8 @@ static void md_print_devices(void);
5  
6  static int sysctl_speed_limit_min = 1000;
7  static int sysctl_speed_limit_max = 200000;
8 +static int sysctl_rebuild_window_size = 256;
9 +static int sysctl_disk_idle_size = 4096;
10  static inline int speed_min(mddev_t *mddev)
11  {
12         return mddev->sync_speed_min ?
13 @@ -121,6 +123,22 @@ static ctl_table raid_table[] = {
14                 .mode           = S_IRUGO|S_IWUSR,
15                 .proc_handler   = &proc_dointvec,
16         },
17 +       {
18 +               .ctl_name       = DEV_RAID_REBUILD_WINDOW,
19 +               .procname       = "rebuild_window_size",
20 +               .data           = &sysctl_rebuild_window_size,
21 +               .maxlen         = sizeof(int),
22 +               .mode           = S_IRUGO|S_IWUSR,
23 +               .proc_handler   = &proc_dointvec,
24 +       },
25 +       {
26 +               .ctl_name       = DEV_RAID_DISK_IDLE_SIZE,
27 +               .procname       = "disk_idle_size",
28 +               .data           = &sysctl_disk_idle_size,
29 +               .maxlen         = sizeof(int),
30 +               .mode           = S_IRUGO|S_IWUSR,
31 +               .proc_handler   = &proc_dointvec,
32 +       },
33         { .ctl_name = 0 }
34  };
35  
36 @@ -4980,15 +4998,16 @@ static int is_mddev_idle(mddev_t *mddev)
37  {
38         mdk_rdev_t * rdev;
39         int idle;
40 -       unsigned long curr_events;
41 +       unsigned long rw, sync;
42  
43         idle = 1;
44         rcu_read_lock();
45         rdev_for_each_rcu(rdev, mddev) {
46                 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
47 -               curr_events = disk_stat_read(disk, sectors[0]) + 
48 -                               disk_stat_read(disk, sectors[1]) - 
49 -                               atomic_read(&disk->sync_io);
50 +
51 +               rw = disk_stat_read(disk, sectors[READ])+disk_stat_read(disk, sectors[WRITE]);
52 +               sync = atomic_read(&disk->sync_io);
53 +
54                 /* The difference between curr_events and last_events
55                  * will be affected by any new non-sync IO (making
56                  * curr_events bigger) and any difference in the amount of
57 @@ -5001,9 +5020,9 @@ static int is_mddev_idle(mddev_t *mddev)
58                  *
59                  * Note: the following is an unsigned comparison.
60                  */
61 -               if ((curr_events - rdev->last_events + 4096) > 8192) {
62 -                       rdev->last_events = curr_events;
63 +               if (rw - rdev->last_events > sync + sysctl_disk_idle_size) {
64                         idle = 0;
65 +                       rdev->last_events = rw - sync;
66                 }
67         }
68         return idle;
69 @@ -5069,8 +5088,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wa
70  void md_do_sync(mddev_t *mddev)
71  {
72         mddev_t *mddev2;
73 -       unsigned int currspeed = 0,
74 -                window;
75 +       unsigned int currspeed = 0;
76         sector_t max_sectors,j, io_sectors;
77         unsigned long mark[SYNC_MARKS];
78         sector_t mark_cnt[SYNC_MARKS];
79 @@ -5190,9 +5208,8 @@ void md_do_sync(mddev_t *mddev)
80         /*
81          * Tune reconstruction:
82          */
83 -       window = 32*(PAGE_SIZE/512);
84         printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n",
85 -               window/2,(unsigned long long) max_sectors/2);
86 +               sysctl_rebuild_window_size/2,(unsigned long long) max_sectors/2);
87  
88         atomic_set(&mddev->recovery_active, 0);
89         init_waitqueue_head(&mddev->recovery_wait);
90 @@ -5230,7 +5247,7 @@ void md_do_sync(mddev_t *mddev)
91                          */
92                         md_new_event(mddev);
93  
94 -               if (last_check + window > io_sectors || j == max_sectors)
95 +               if (last_check + sysctl_rebuild_window_size > io_sectors || j == max_sectors)
96                         continue;
97  
98                 last_check = io_sectors;
99 @@ -5251,7 +5268,6 @@ void md_do_sync(mddev_t *mddev)
100                         last_mark = next;
101                 }
102  
103 -
104                 if (kthread_should_stop()) {
105                         /*
106                          * got a signal, exit.
107 @@ -5275,10 +5291,16 @@ void md_do_sync(mddev_t *mddev)
108  
109                 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
110                         /((jiffies-mddev->resync_mark)/HZ +1) +1;
111 -
112                 if (currspeed > speed_min(mddev)) {
113                         if ((currspeed > speed_max(mddev)) ||
114                                         !is_mddev_idle(mddev)) {
115 +                               static unsigned long next_report;
116 +                               if (time_after(jiffies, next_report)) {
117 +                                       printk(KERN_INFO "md: rebuild %s throttled due to IO\n",
118 +                                               mdname(mddev));
119 +                                       /* once per 10 minutes */
120 +                                       next_report = jiffies + 600 * HZ;
121 +                               }
122                                 msleep(500);
123                                 goto repeat;
124                         }
125 diff -pur linux-2.6.18-53.orig/include/linux/sysctl.h linux-2.6.18-53/include/linux/sysctl.h
126 --- linux-2.6.18-53.orig/include/linux/sysctl.h 2008-02-13 17:35:25.000000000 +0800
127 +++ linux-2.6.18-53/include/linux/sysctl.h      2008-02-13 17:36:22.000000000 +0800
128 @@ -903,7 +903,9 @@ enum {
129  /* /proc/sys/dev/raid */
130  enum {
131         DEV_RAID_SPEED_LIMIT_MIN=1,
132 -       DEV_RAID_SPEED_LIMIT_MAX=2
133 +       DEV_RAID_SPEED_LIMIT_MAX=2,
134 +       DEV_RAID_REBUILD_WINDOW=3,
135 +       DEV_RAID_DISK_IDLE_SIZE=4
136  };
137  
138  /* /proc/sys/dev/parport/default */