From: jxiong Date: Tue, 25 Nov 2008 04:12:58 +0000 (+0000) Subject: Fixed the raid5 patches. X-Git-Tag: v1_9_120~75 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=bdebf22720730b2438b658dcd4e17597299b3074 Fixed the raid5 patches. - rebuild policy for rhel5 .21 kernel - soft lockups fixed b=17084 r=adilger,jay --- diff --git a/lustre/kernel_patches/patches/md-rebuild-policy-rhel5.patch b/lustre/kernel_patches/patches/md-rebuild-policy-rhel5.patch new file mode 100644 index 0000000..ca898ed --- /dev/null +++ b/lustre/kernel_patches/patches/md-rebuild-policy-rhel5.patch @@ -0,0 +1,137 @@ +diff -pur linux-2.6.18-53.1.21.orig/drivers/md/md.c linux-2.6.18-53.1.21/drivers/md/md.c +--- linux-2.6.18-53.1.21.orig/drivers/md/md.c 2008-11-25 11:21:57.000000000 +0800 ++++ linux-2.6.18-53.1.21/drivers/md/md.c 2008-11-25 11:26:49.000000000 +0800 +@@ -90,6 +90,8 @@ static void md_print_devices(void); + + static int sysctl_speed_limit_min = 1000; + static int sysctl_speed_limit_max = 200000; ++static int sysctl_rebuild_window_size = 256; ++static int sysctl_disk_idle_size = 4096; + static inline int speed_min(mddev_t *mddev) + { + return mddev->sync_speed_min ? +@@ -121,6 +123,22 @@ static ctl_table raid_table[] = { + .mode = S_IRUGO|S_IWUSR, + .proc_handler = &proc_dointvec, + }, ++ { ++ .ctl_name = DEV_RAID_REBUILD_WINDOW, ++ .procname = "rebuild_window_size", ++ .data = &sysctl_rebuild_window_size, ++ .maxlen = sizeof(int), ++ .mode = S_IRUGO|S_IWUSR, ++ .proc_handler = &proc_dointvec, ++ }, ++ { ++ .ctl_name = DEV_RAID_DISK_IDLE_SIZE, ++ .procname = "disk_idle_size", ++ .data = &sysctl_disk_idle_size, ++ .maxlen = sizeof(int), ++ .mode = S_IRUGO|S_IWUSR, ++ .proc_handler = &proc_dointvec, ++ }, + { .ctl_name = 0 } + }; + +@@ -4980,14 +4998,15 @@ static int is_mddev_idle(mddev_t *mddev) + mdk_rdev_t * rdev; + struct list_head *tmp; + int idle; +- unsigned long curr_events; ++ unsigned long rw, sync; + + idle = 1; + ITERATE_RDEV(mddev,rdev,tmp) { + struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; +- curr_events = disk_stat_read(disk, sectors[0]) + +- disk_stat_read(disk, sectors[1]) - +- atomic_read(&disk->sync_io); ++ ++ rw = disk_stat_read(disk, sectors[READ])+disk_stat_read(disk, sectors[WRITE]); ++ sync = atomic_read(&disk->sync_io); ++ + /* The difference between curr_events and last_events + * will be affected by any new non-sync IO (making + * curr_events bigger) and any difference in the amount of +@@ -5001,9 +5020,9 @@ static int is_mddev_idle(mddev_t *mddev) + * + * Note: the following is an unsigned comparison. + */ +- if ((curr_events - rdev->last_events + 4096) > 8192) { +- rdev->last_events = curr_events; ++ if (rw - rdev->last_events > sync + sysctl_disk_idle_size) { + idle = 0; ++ rdev->last_events = rw - sync; + } + } + return idle; +@@ -5069,8 +5088,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wa + void md_do_sync(mddev_t *mddev) + { + mddev_t *mddev2; +- unsigned int currspeed = 0, +- window; ++ unsigned int currspeed = 0; + sector_t max_sectors,j, io_sectors; + unsigned long mark[SYNC_MARKS]; + sector_t mark_cnt[SYNC_MARKS]; +@@ -5190,9 +5208,8 @@ void md_do_sync(mddev_t *mddev) + /* + * Tune reconstruction: + */ +- window = 32*(PAGE_SIZE/512); + printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n", +- window/2,(unsigned long long) max_sectors/2); ++ sysctl_rebuild_window_size/2,(unsigned long long) max_sectors/2); + + atomic_set(&mddev->recovery_active, 0); + init_waitqueue_head(&mddev->recovery_wait); +@@ -5230,7 +5247,7 @@ void md_do_sync(mddev_t *mddev) + */ + md_new_event(mddev); + +- if (last_check + window > io_sectors || j == max_sectors) ++ if (last_check + sysctl_rebuild_window_size > io_sectors || j == max_sectors) + continue; + + last_check = io_sectors; +@@ -5251,7 +5268,6 @@ void md_do_sync(mddev_t *mddev) + last_mark = next; + } + +- + if (kthread_should_stop()) { + /* + * got a signal, exit. +@@ -5275,10 +5291,16 @@ void md_do_sync(mddev_t *mddev) + + currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 + /((jiffies-mddev->resync_mark)/HZ +1) +1; +- + if (currspeed > speed_min(mddev)) { + if ((currspeed > speed_max(mddev)) || + !is_mddev_idle(mddev)) { ++ static unsigned long next_report; ++ if (time_after(jiffies, next_report)) { ++ printk(KERN_INFO "md: rebuild %s throttled due to IO\n", ++ mdname(mddev)); ++ /* once per 10 minutes */ ++ next_report = jiffies + 600 * HZ; ++ } + msleep(500); + goto repeat; + } +diff -pur linux-2.6.18-53.1.21.orig/include/linux/sysctl.h linux-2.6.18-53.1.21/include/linux/sysctl.h +--- linux-2.6.18-53.1.21.orig/include/linux/sysctl.h 2008-11-25 11:21:59.000000000 +0800 ++++ linux-2.6.18-53.1.21/include/linux/sysctl.h 2008-11-25 11:22:26.000000000 +0800 +@@ -903,7 +903,9 @@ enum { + /* /proc/sys/dev/raid */ + enum { + DEV_RAID_SPEED_LIMIT_MIN=1, +- DEV_RAID_SPEED_LIMIT_MAX=2 ++ DEV_RAID_SPEED_LIMIT_MAX=2, ++ DEV_RAID_REBUILD_WINDOW=3, ++ DEV_RAID_DISK_IDLE_SIZE=4 + }; + + /* /proc/sys/dev/parport/default */ diff --git a/lustre/kernel_patches/patches/md-soft-lockups.patch b/lustre/kernel_patches/patches/md-soft-lockups.patch new file mode 100644 index 0000000..cde9a34 --- /dev/null +++ b/lustre/kernel_patches/patches/md-soft-lockups.patch @@ -0,0 +1,13 @@ +Index: linux-2.6.18-92.1.10/drivers/md/raid5.c +=================================================================== +--- linux-2.6.18-92.1.10.orig/drivers/md/raid5.c 2008-11-10 11:00:51.000000000 +0900 ++++ linux-2.6.18-92.1.10/drivers/md/raid5.c 2008-11-10 11:02:38.000000000 +0900 +@@ -3251,6 +3251,8 @@ + handle_stripe(sh, conf->spare_page, NULL); + release_stripe(sh); + ++ cond_resched(); ++ + spin_lock_irq(&conf->device_lock); + } + PRINTK("%d stripes handled\n", handled); diff --git a/lustre/kernel_patches/patches/raid5-zerocopy-rhel5.patch b/lustre/kernel_patches/patches/raid5-zerocopy-rhel5.patch index dd80825..e66ae01 100644 --- a/lustre/kernel_patches/patches/raid5-zerocopy-rhel5.patch +++ b/lustre/kernel_patches/patches/raid5-zerocopy-rhel5.patch @@ -1,6 +1,6 @@ -diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c ---- linux-2.6.18-53.orig/drivers/md/raid5.c 2007-12-28 19:09:20.000000000 +0800 -+++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-28 19:09:32.000000000 +0800 +diff -pur linux-2.6.18-53.1.21.orig/drivers/md/raid5.c linux-2.6.18-53.1.21/drivers/md/raid5.c +--- linux-2.6.18-53.1.21.orig/drivers/md/raid5.c 2008-11-25 11:09:16.000000000 +0800 ++++ linux-2.6.18-53.1.21/drivers/md/raid5.c 2008-11-25 11:09:48.000000000 +0800 @@ -633,6 +633,7 @@ static int raid5_end_read_request(struct clear_buffer_uptodate(bh); } @@ -307,9 +307,9 @@ diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/rai seq_printf (seq, "\n\t\t%u delayed, %u bit delayed, %u active, queues: %u in, %u out\n", atomic_read(&conf->delayed), atomic_read(&conf->bit_delayed), atomic_read(&conf->active_stripes), -diff -pur linux-2.6.18-53.orig/include/linux/backing-dev.h linux-2.6.18-53/include/linux/backing-dev.h ---- linux-2.6.18-53.orig/include/linux/backing-dev.h 2007-12-28 14:49:26.000000000 +0800 -+++ linux-2.6.18-53/include/linux/backing-dev.h 2007-12-28 19:09:32.000000000 +0800 +diff -pur linux-2.6.18-53.1.21.orig/include/linux/backing-dev.h linux-2.6.18-53.1.21/include/linux/backing-dev.h +--- linux-2.6.18-53.1.21.orig/include/linux/backing-dev.h 2008-11-25 11:09:20.000000000 +0800 ++++ linux-2.6.18-53.1.21/include/linux/backing-dev.h 2008-11-25 11:09:48.000000000 +0800 @@ -48,6 +48,7 @@ struct backing_dev_info { #define BDI_CAP_READ_MAP 0x00000010 /* Can be mapped for reading */ #define BDI_CAP_WRITE_MAP 0x00000020 /* Can be mapped for writing */ @@ -337,17 +337,17 @@ diff -pur linux-2.6.18-53.orig/include/linux/backing-dev.h linux-2.6.18-53/inclu + #endif /* _LINUX_BACKING_DEV_H */ -diff -pur linux-2.6.18-53.orig/include/linux/page-flags.h linux-2.6.18-53/include/linux/page-flags.h ---- linux-2.6.18-53.orig/include/linux/page-flags.h 2007-12-28 14:49:26.000000000 +0800 -+++ linux-2.6.18-53/include/linux/page-flags.h 2007-12-28 19:09:32.000000000 +0800 +diff -pur linux-2.6.18-53.1.21.orig/include/linux/page-flags.h linux-2.6.18-53.1.21/include/linux/page-flags.h +--- linux-2.6.18-53.1.21.orig/include/linux/page-flags.h 2008-11-25 11:09:20.000000000 +0800 ++++ linux-2.6.18-53.1.21/include/linux/page-flags.h 2008-11-25 11:12:08.000000000 +0800 @@ -86,6 +86,7 @@ #define PG_reclaim 17 /* To be reclaimed asap */ #define PG_nosave_free 18 /* Free, should not be written */ #define PG_buddy 19 /* Page is free, on buddy lists */ -+#define PG_constant 20 /* To mark if the page is constant */ - #define PG_xpmem 27 /* Testing for xpmem. */ ++#define PG_constant 20 /* To mark if the page is constant */ /* PG_owner_priv_1 users should have descriptive aliases */ + #define PG_checked PG_owner_priv_1 /* Used by some filesystems */ @@ -252,6 +253,14 @@ struct page; /* forward declaration */ @@ -363,9 +363,9 @@ diff -pur linux-2.6.18-53.orig/include/linux/page-flags.h linux-2.6.18-53/includ int test_clear_page_dirty(struct page *page); int test_clear_page_writeback(struct page *page); int test_set_page_writeback(struct page *page); -diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/include/linux/raid/raid5.h ---- linux-2.6.18-53.orig/include/linux/raid/raid5.h 2007-12-28 18:55:24.000000000 +0800 -+++ linux-2.6.18-53/include/linux/raid/raid5.h 2007-12-28 19:09:32.000000000 +0800 +diff -pur linux-2.6.18-53.1.21.orig/include/linux/raid/raid5.h linux-2.6.18-53.1.21/include/linux/raid/raid5.h +--- linux-2.6.18-53.1.21.orig/include/linux/raid/raid5.h 2008-11-25 11:09:20.000000000 +0800 ++++ linux-2.6.18-53.1.21/include/linux/raid/raid5.h 2008-11-25 11:09:48.000000000 +0800 @@ -156,8 +156,9 @@ struct stripe_head { #define R5_Overlap 7 /* There is a pending overlapping request on this block */ #define R5_ReadError 8 /* seen a read error here recently */ @@ -377,9 +377,9 @@ diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/includ /* * Write method */ -diff -pur linux-2.6.18-53.orig/mm/filemap.c linux-2.6.18-53/mm/filemap.c ---- linux-2.6.18-53.orig/mm/filemap.c 2007-12-28 14:49:26.000000000 +0800 -+++ linux-2.6.18-53/mm/filemap.c 2007-12-28 19:09:32.000000000 +0800 +diff -pur linux-2.6.18-53.1.21.orig/mm/filemap.c linux-2.6.18-53.1.21/mm/filemap.c +--- linux-2.6.18-53.1.21.orig/mm/filemap.c 2008-11-25 11:09:15.000000000 +0800 ++++ linux-2.6.18-53.1.21/mm/filemap.c 2008-11-25 11:09:48.000000000 +0800 @@ -30,6 +30,7 @@ #include #include diff --git a/lustre/kernel_patches/series/2.6-rhel5.series b/lustre/kernel_patches/series/2.6-rhel5.series index 8495289..43302ff 100644 --- a/lustre/kernel_patches/series/2.6-rhel5.series +++ b/lustre/kernel_patches/series/2.6-rhel5.series @@ -17,6 +17,7 @@ raid5-large-io-rhel5.patch raid5-stripe-by-stripe-handling-rhel5.patch raid5-merge-ios-rhel5.patch raid5-zerocopy-rhel5.patch -md-rebuild-policy.patch +md-rebuild-policy-rhel5.patch +md-soft-lockups.patch jbd-journal-chksum-2.6.18-vanilla.patch quota-large-limits-rhel5.patch