Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / md-rebuild-policy.patch
1 Index: linux-2.6.18-128.1.6/drivers/md/md.c
2 ===================================================================
3 --- linux-2.6.18-128.1.6.orig/drivers/md/md.c   2009-04-14 21:05:26.000000000 -0600
4 +++ linux-2.6.18-128.1.6/drivers/md/md.c        2009-06-02 23:25:31.000000000 -0600
5 @@ -90,6 +90,8 @@
6  
7  static int sysctl_speed_limit_min = 1000;
8  static int sysctl_speed_limit_max = 200000;
9 +static int sysctl_rebuild_window_size = 256;
10 +static int sysctl_disk_idle_size = 4096;
11  static inline int speed_min(mddev_t *mddev)
12  {
13         return mddev->sync_speed_min ?
14 @@ -121,6 +123,22 @@
15                 .mode           = S_IRUGO|S_IWUSR,
16                 .proc_handler   = &proc_dointvec,
17         },
18 +       {
19 +               .ctl_name       = DEV_RAID_REBUILD_WINDOW,
20 +               .procname       = "rebuild_window_size",
21 +               .data           = &sysctl_rebuild_window_size,
22 +               .maxlen         = sizeof(int),
23 +               .mode           = S_IRUGO|S_IWUSR,
24 +               .proc_handler   = &proc_dointvec,
25 +       },
26 +       {
27 +               .ctl_name       = DEV_RAID_DISK_IDLE_SIZE,
28 +               .procname       = "disk_idle_size",
29 +               .data           = &sysctl_disk_idle_size,
30 +               .maxlen         = sizeof(int),
31 +               .mode           = S_IRUGO|S_IWUSR,
32 +               .proc_handler   = &proc_dointvec,
33 +       },
34         { .ctl_name = 0 }
35  };
36  
37 @@ -5009,15 +5027,16 @@
38  {
39         mdk_rdev_t * rdev;
40         int idle;
41 -       unsigned long curr_events;
42 +       unsigned long rw, sync;
43  
44         idle = 1;
45         rcu_read_lock();
46         rdev_for_each_rcu(rdev, mddev) {
47                 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
48 -               curr_events = disk_stat_read(disk, sectors[0]) + 
49 -                               disk_stat_read(disk, sectors[1]) - 
50 -                               atomic_read(&disk->sync_io);
51 +
52 +               rw = disk_stat_read(disk, sectors[READ])+disk_stat_read(disk, sectors[WRITE]);
53 +               sync = atomic_read(&disk->sync_io);
54 +
55                 /* The difference between curr_events and last_events
56                  * will be affected by any new non-sync IO (making
57                  * curr_events bigger) and any difference in the amount of
58 @@ -5031,9 +5050,9 @@
59                  *
60                  * Note: the following is an unsigned comparison.
61                  */
62 -               if ((curr_events - rdev->last_events + 4096) > 8192) {
63 -                       rdev->last_events = curr_events;
64 +               if (rw - rdev->last_events > sync + sysctl_disk_idle_size) {
65                         idle = 0;
66 +                       rdev->last_events = rw - sync;
67                 }
68         }
69         rcu_read_unlock();
70 @@ -5100,8 +5119,7 @@
71  void md_do_sync(mddev_t *mddev)
72  {
73         mddev_t *mddev2;
74 -       unsigned int currspeed = 0,
75 -                window;
76 +       unsigned int currspeed = 0;
77         sector_t max_sectors,j, io_sectors;
78         unsigned long mark[SYNC_MARKS];
79         sector_t mark_cnt[SYNC_MARKS];
80 @@ -5221,9 +5239,8 @@
81         /*
82          * Tune reconstruction:
83          */
84 -       window = 32*(PAGE_SIZE/512);
85         printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n",
86 -               window/2,(unsigned long long) max_sectors/2);
87 +               sysctl_rebuild_window_size/2,(unsigned long long) max_sectors/2);
88  
89         atomic_set(&mddev->recovery_active, 0);
90         init_waitqueue_head(&mddev->recovery_wait);
91 @@ -5261,7 +5278,7 @@
92                          */
93                         md_new_event(mddev);
94  
95 -               if (last_check + window > io_sectors || j == max_sectors)
96 +               if (last_check + sysctl_rebuild_window_size > io_sectors || j == max_sectors)
97                         continue;
98  
99                 last_check = io_sectors;
100 @@ -5282,7 +5299,6 @@
101                         last_mark = next;
102                 }
103  
104 -
105                 if (kthread_should_stop()) {
106                         /*
107                          * got a signal, exit.
108 @@ -5306,10 +5322,16 @@
109  
110                 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
111                         /((jiffies-mddev->resync_mark)/HZ +1) +1;
112 -
113                 if (currspeed > speed_min(mddev)) {
114                         if ((currspeed > speed_max(mddev)) ||
115                                         !is_mddev_idle(mddev)) {
116 +                               static unsigned long next_report;
117 +                               if (time_after(jiffies, next_report)) {
118 +                                       printk(KERN_INFO "md: rebuild %s throttled due to IO\n",
119 +                                               mdname(mddev));
120 +                                       /* once per 10 minutes */
121 +                                       next_report = jiffies + 600 * HZ;
122 +                               }
123                                 msleep(500);
124                                 goto repeat;
125                         }
126 Index: linux-2.6.18-128.1.6/include/linux/sysctl.h
127 ===================================================================
128 --- linux-2.6.18-128.1.6.orig/include/linux/sysctl.h    2009-04-14 21:05:41.000000000 -0600
129 +++ linux-2.6.18-128.1.6/include/linux/sysctl.h 2009-06-02 23:25:31.000000000 -0600
130 @@ -928,7 +928,9 @@
131  /* /proc/sys/dev/raid */
132  enum {
133         DEV_RAID_SPEED_LIMIT_MIN=1,
134 -       DEV_RAID_SPEED_LIMIT_MAX=2
135 +       DEV_RAID_SPEED_LIMIT_MAX=2,
136 +       DEV_RAID_REBUILD_WINDOW=3,
137 +       DEV_RAID_DISK_IDLE_SIZE=4
138  };
139  
140  /* /proc/sys/dev/parport/default */