Index: linux-2.6.18-128.1.6/drivers/md/md.c =================================================================== --- linux-2.6.18-128.1.6.orig/drivers/md/md.c 2009-04-14 21:05:26.000000000 -0600 +++ linux-2.6.18-128.1.6/drivers/md/md.c 2009-06-02 23:25:31.000000000 -0600 @@ -90,6 +90,8 @@ static int sysctl_speed_limit_min = 1000; static int sysctl_speed_limit_max = 200000; +static int sysctl_rebuild_window_size = 256; +static int sysctl_disk_idle_size = 4096; static inline int speed_min(mddev_t *mddev) { return mddev->sync_speed_min ? @@ -121,6 +123,22 @@ .mode = S_IRUGO|S_IWUSR, .proc_handler = &proc_dointvec, }, + { + .ctl_name = DEV_RAID_REBUILD_WINDOW, + .procname = "rebuild_window_size", + .data = &sysctl_rebuild_window_size, + .maxlen = sizeof(int), + .mode = S_IRUGO|S_IWUSR, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = DEV_RAID_DISK_IDLE_SIZE, + .procname = "disk_idle_size", + .data = &sysctl_disk_idle_size, + .maxlen = sizeof(int), + .mode = S_IRUGO|S_IWUSR, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; @@ -5009,15 +5027,16 @@ { mdk_rdev_t * rdev; int idle; - unsigned long curr_events; + unsigned long rw, sync; idle = 1; rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; - curr_events = disk_stat_read(disk, sectors[0]) + - disk_stat_read(disk, sectors[1]) - - atomic_read(&disk->sync_io); + + rw = disk_stat_read(disk, sectors[READ])+disk_stat_read(disk, sectors[WRITE]); + sync = atomic_read(&disk->sync_io); + /* The difference between curr_events and last_events * will be affected by any new non-sync IO (making * curr_events bigger) and any difference in the amount of @@ -5031,9 +5050,9 @@ * * Note: the following is an unsigned comparison. */ - if ((curr_events - rdev->last_events + 4096) > 8192) { - rdev->last_events = curr_events; + if (rw - rdev->last_events > sync + sysctl_disk_idle_size) { idle = 0; + rdev->last_events = rw - sync; } } rcu_read_unlock(); @@ -5100,8 +5119,7 @@ void md_do_sync(mddev_t *mddev) { mddev_t *mddev2; - unsigned int currspeed = 0, - window; + unsigned int currspeed = 0; sector_t max_sectors,j, io_sectors; unsigned long mark[SYNC_MARKS]; sector_t mark_cnt[SYNC_MARKS]; @@ -5221,9 +5239,8 @@ /* * Tune reconstruction: */ - window = 32*(PAGE_SIZE/512); printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n", - window/2,(unsigned long long) max_sectors/2); + sysctl_rebuild_window_size/2,(unsigned long long) max_sectors/2); atomic_set(&mddev->recovery_active, 0); init_waitqueue_head(&mddev->recovery_wait); @@ -5261,7 +5278,7 @@ */ md_new_event(mddev); - if (last_check + window > io_sectors || j == max_sectors) + if (last_check + sysctl_rebuild_window_size > io_sectors || j == max_sectors) continue; last_check = io_sectors; @@ -5282,7 +5299,6 @@ last_mark = next; } - if (kthread_should_stop()) { /* * got a signal, exit. @@ -5306,10 +5322,16 @@ currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 /((jiffies-mddev->resync_mark)/HZ +1) +1; - if (currspeed > speed_min(mddev)) { if ((currspeed > speed_max(mddev)) || !is_mddev_idle(mddev)) { + static unsigned long next_report; + if (time_after(jiffies, next_report)) { + printk(KERN_INFO "md: rebuild %s throttled due to IO\n", + mdname(mddev)); + /* once per 10 minutes */ + next_report = jiffies + 600 * HZ; + } msleep(500); goto repeat; } Index: linux-2.6.18-128.1.6/include/linux/sysctl.h =================================================================== --- linux-2.6.18-128.1.6.orig/include/linux/sysctl.h 2009-04-14 21:05:41.000000000 -0600 +++ linux-2.6.18-128.1.6/include/linux/sysctl.h 2009-06-02 23:25:31.000000000 -0600 @@ -928,7 +928,9 @@ /* /proc/sys/dev/raid */ enum { DEV_RAID_SPEED_LIMIT_MIN=1, - DEV_RAID_SPEED_LIMIT_MAX=2 + DEV_RAID_SPEED_LIMIT_MAX=2, + DEV_RAID_REBUILD_WINDOW=3, + DEV_RAID_DISK_IDLE_SIZE=4 }; /* /proc/sys/dev/parport/default */