--- /dev/null
+--- linux-2.6.9.orig/drivers/md/raid6main.c 2006-09-07 23:10:43.000000000 +0800
++++ linux-2.6.9/drivers/md/raid6main.c 2006-09-07 23:11:25.000000000 +0800
+@@ -33,7 +33,7 @@
+ * Stripe cache
+ */
+
+-#define NR_STRIPES 256
++static int raid6_nr_stripes = 256 * 8;
+ #define STRIPE_SIZE PAGE_SIZE
+ #define STRIPE_SHIFT (PAGE_SHIFT - 9)
+ #define STRIPE_SECTORS (STRIPE_SIZE>>9)
+@@ -111,7 +111,7 @@ static inline void __release_stripe(raid
+ list_add_tail(&sh->lru, &conf->inactive_list);
+ atomic_dec(&conf->active_stripes);
+ if (!conf->inactive_blocked ||
+- atomic_read(&conf->active_stripes) < (NR_STRIPES*3/4))
++ atomic_read(&conf->active_stripes) < (raid6_nr_stripes*3/4))
+ wake_up(&conf->wait_for_stripe);
+ }
+ }
+@@ -274,7 +274,7 @@ static struct stripe_head *get_active_st
+ conf->inactive_blocked = 1;
+ wait_event_lock_irq(conf->wait_for_stripe,
+ !list_empty(&conf->inactive_list) &&
+- (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4)
++ (atomic_read(&conf->active_stripes) < (raid6_nr_stripes *3/4)
+ || !conf->inactive_blocked),
+ conf->device_lock,
+ unplug_slaves(conf->mddev);
+@@ -1805,7 +1805,7 @@ static int run (mddev_t *mddev)
+ conf->chunk_size = mddev->chunk_size;
+ conf->level = mddev->level;
+ conf->algorithm = mddev->layout;
+- conf->max_nr_stripes = NR_STRIPES;
++ conf->max_nr_stripes = raid6_nr_stripes;
+
+ /* device size must be a multiple of chunk size */
+ mddev->size &= ~(mddev->chunk_size/1024 -1);
+@@ -2139,5 +2139,6 @@ static void raid6_exit (void)
+
+ module_init(raid6_init);
+ module_exit(raid6_exit);
++module_param(raid6_nr_stripes, int, 0644);
+ MODULE_LICENSE("GPL");
+ MODULE_ALIAS("md-personality-8"); /* RAID6 */
--- /dev/null
+--- linux-2.6.9.orig/drivers/md/raid6main.c 2006-09-07 23:12:09.000000000 +0800
++++ linux-2.6.9/drivers/md/raid6main.c 2006-09-07 23:12:44.000000000 +0800
+@@ -1775,6 +1775,11 @@ static int run (mddev_t *mddev)
+ mddev->queue->unplug_fn = raid6_unplug_device;
+ mddev->queue->issue_flush_fn = raid6_issue_flush;
+
++ /* in order to support large I/Os */
++ blk_queue_max_sectors(mddev->queue, mddev->chunk_size * mddev->raid_disks >> 9);
++ mddev->queue->max_phys_segments = mddev->chunk_size * mddev->raid_disks >> PAGE_SHIFT;
++ mddev->queue->max_hw_segments = mddev->chunk_size * mddev->raid_disks >> PAGE_SHIFT;
++
+ PRINTK("raid6: run(%s) called.\n", mdname(mddev));
+
+ ITERATE_RDEV(mddev,rdev,tmp) {
--- /dev/null
+diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6main.c
+--- linux-2.6.9.orig/drivers/md/raid6main.c 2008-01-10 13:51:32.000000000 +0800
++++ linux-2.6.9/drivers/md/raid6main.c 2008-01-10 13:52:20.000000000 +0800
+@@ -956,6 +956,26 @@ static void add_stripe_bio (struct strip
+ }
+ }
+
++/*
++ * The whole idea is to collect all bio's and then issue them
++ * disk by disk to assist merging a bit -bzzz
++ */
++static void raid6_flush_bios(raid6_conf_t *conf, struct bio *bios[], int raid_disks)
++{
++ struct bio *bio, *nbio;
++ int i;
++
++ for (i = 0; i < raid_disks; i++) {
++ bio = bios[i];
++ while (bio) {
++ nbio = bio->bi_next;
++ bio->bi_next = NULL;
++ generic_make_request(bio);
++ bio = nbio;
++ }
++ bios[i] = NULL;
++ }
++}
+
+ /*
+ * handle_stripe - do things to a stripe.
+@@ -975,7 +995,7 @@ static void add_stripe_bio (struct strip
+ *
+ */
+
+-static void handle_stripe(struct stripe_head *sh)
++static void handle_stripe(struct stripe_head *sh, struct bio *bios[])
+ {
+ raid6_conf_t *conf = sh->raid_conf;
+ int disks = conf->raid_disks;
+@@ -1452,7 +1472,11 @@ static void handle_stripe(struct stripe_
+ bi->bi_size = STRIPE_SIZE;
+ bi->bi_next = NULL;
+ atomic_inc(&conf->out_reqs_in_queue);
+- generic_make_request(bi);
++ if(bios) {
++ bi->bi_next = bios[i];
++ bios[i] = bi;
++ } else
++ generic_make_request(bi);
+ } else {
+ PRINTK("skip op %ld on disc %d for sector %llu\n",
+ bi->bi_rw, i, (unsigned long long)sh->sector);
+@@ -1575,6 +1599,7 @@ static int make_request (request_queue_t
+ int sectors_per_chunk;
+ int stripes_per_chunk, sectors_per_block;
+ int sectors_per_stripe;
++ struct bio *bios[MD_SB_DISKS];
+ int i, j;
+
+ atomic_inc(&conf->in_reqs_in_queue);
+@@ -1611,6 +1636,7 @@ static int make_request (request_queue_t
+ sector_div(block, sectors_per_block);
+ sectors = bi->bi_size >> 9;
+
++ memset(&bios, 0, sizeof(bios));
+ repeat:
+ stripe = block * (sectors_per_block / data_disks);
+ b_sector = stripe * data_disks;
+@@ -1630,9 +1656,17 @@ static int make_request (request_queue_t
+ new_sector = raid6_compute_sector(r_sector, raid_disks,
+ data_disks, &dd_idx,
+ &pd_idx, conf);
+- if (sh == NULL)
+- sh = get_active_stripe(conf, new_sector, pd_idx,
+- (bi->bi_rw&RWA_MASK));
++ if (sh == NULL) {
++ /* first, try to get stripe w/o blocking
++ * if we can't, then it's time to submit
++ * all collected bio's in order to free
++ * some space in the cache -bzzz */
++ sh = get_active_stripe(conf, new_sector, pd_idx, 1);
++ if (!sh && !(bi->bi_rw&RWA_MASK)) {
++ raid6_flush_bios(conf, bios, raid_disks);
++ sh = get_active_stripe(conf, new_sector, pd_idx, 0);
++ }
++ }
+ if (sh) {
+ add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK));
+ } else {
+@@ -1653,7 +1687,7 @@ static int make_request (request_queue_t
+
+ if (sh) {
+ raid6_plug_device(conf);
+- handle_stripe(sh);
++ handle_stripe(sh, bios);
+ release_stripe(sh);
+ sh = NULL;
+ }
+@@ -1664,6 +1698,9 @@ static int make_request (request_queue_t
+ if(sectors > 0)
+ goto repeat;
+
++ /* now flush all bio's */
++ raid6_flush_bios(conf, bios, raid_disks);
++
+ spin_lock_irq(&conf->device_lock);
+ if (--bi->bi_phys_segments == 0) {
+ int bytes = bi->bi_size;
+@@ -1719,7 +1756,7 @@ static int sync_request (mddev_t *mddev,
+ clear_bit(STRIPE_INSYNC, &sh->state);
+ spin_unlock(&sh->lock);
+
+- handle_stripe(sh);
++ handle_stripe(sh, NULL);
+ release_stripe(sh);
+
+ return STRIPE_SECTORS;
+@@ -1769,7 +1806,7 @@ static void raid6d (mddev_t *mddev)
+ handled++;
+
+ atomic_inc(&conf->handled_in_raid5d);
+- handle_stripe(sh);
++ handle_stripe(sh, NULL);
+ release_stripe(sh);
+
+ spin_lock_irq(&conf->device_lock);
--- /dev/null
+diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6main.c
+--- linux-2.6.9.orig/drivers/md/raid6main.c 2008-01-10 13:55:37.000000000 +0800
++++ linux-2.6.9/drivers/md/raid6main.c 2008-01-10 13:55:56.000000000 +0800
+@@ -749,6 +749,10 @@ static void compute_parity(struct stripe
+ if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
+ chosen = sh->dev[i].towrite;
+ sh->dev[i].towrite = NULL;
++
++ if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
++ wake_up(&conf->wait_for_overlap);
++
+ if (sh->dev[i].written) BUG();
+ sh->dev[i].written = chosen;
+ }
+@@ -907,7 +911,7 @@ static void compute_block_2(struct strip
+ * toread/towrite point to the first in a chain.
+ * The bi_next chain must be in order.
+ */
+-static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
++static int add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
+ {
+ struct bio **bip;
+ raid6_conf_t *conf = sh->raid_conf;
+@@ -924,10 +928,13 @@ static void add_stripe_bio (struct strip
+ else
+ bip = &sh->dev[dd_idx].toread;
+ while (*bip && (*bip)->bi_sector < bi->bi_sector) {
+- BUG_ON((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector);
++ if((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector)
++ goto overlap;
+ bip = & (*bip)->bi_next;
+ }
+-/* FIXME do I need to worry about overlapping bion */
++ if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9))
++ goto overlap;
++
+ if (*bip && bi->bi_next && (*bip) != bi->bi_next)
+ BUG();
+ if (*bip)
+@@ -954,6 +961,14 @@ static void add_stripe_bio (struct strip
+ if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
+ set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
+ }
++
++ return 1;
++
++overlap:
++ set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
++ spin_unlock_irq(&conf->device_lock);
++ spin_unlock(&sh->lock);
++ return 0;
+ }
+
+ /*
+@@ -1038,6 +1053,9 @@ static void handle_stripe(struct stripe_
+ spin_lock_irq(&conf->device_lock);
+ rbi = dev->toread;
+ dev->toread = NULL;
++
++ if (test_and_clear_bit(R5_Overlap, &dev->flags))
++ wake_up(&conf->wait_for_overlap);
+ spin_unlock_irq(&conf->device_lock);
+ while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
+ copy_data(0, rbi, dev->page, dev->sector);
+@@ -1087,6 +1105,9 @@ static void handle_stripe(struct stripe_
+ sh->dev[i].towrite = NULL;
+ if (bi) to_write--;
+
++ if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
++ wake_up(&conf->wait_for_overlap);
++
+ while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
+ struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+ clear_bit(BIO_UPTODATE, &bi->bi_flags);
+@@ -1115,6 +1136,8 @@ static void handle_stripe(struct stripe_
+ if (!test_bit(R5_Insync, &sh->dev[i].flags)) {
+ bi = sh->dev[i].toread;
+ sh->dev[i].toread = NULL;
++ if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
++ wake_up(&conf->wait_for_overlap);
+ if (bi) to_read--;
+ while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
+ struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+@@ -1648,6 +1671,8 @@ static int make_request (request_queue_t
+ sh = NULL;
+ /* iterrate through all pages in the stripe */
+ for (j = 0; j < data_disks && sectors > 0; j++) {
++ DEFINE_WAIT(w);
++
+ if (r_sector + STRIPE_SECTORS <= bi->bi_sector ||
+ r_sector >= last_sector) {
+ r_sector += sectors_per_chunk;
+@@ -1656,6 +1681,9 @@ static int make_request (request_queue_t
+ new_sector = raid6_compute_sector(r_sector, raid_disks,
+ data_disks, &dd_idx,
+ &pd_idx, conf);
++
++retry:
++ prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
+ if (sh == NULL) {
+ /* first, try to get stripe w/o blocking
+ * if we can't, then it's time to submit
+@@ -1668,10 +1696,18 @@ static int make_request (request_queue_t
+ }
+ }
+ if (sh) {
+- add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK));
++ if(!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
++ /* Failed to be added due to overlapped. */
++ raid6_unplug_device(mddev->queue);
++ release_stripe(sh);
++ schedule();
++ goto retry;
++ }
++ finish_wait(&conf->wait_for_overlap, &w);
+ } else {
+ /* cannot get stripe for read-ahead, just give-up */
+ clear_bit(BIO_UPTODATE, &bi->bi_flags);
++ finish_wait(&conf->wait_for_overlap, &w);
+ sectors = 0;
+ break;
+ }
+@@ -1847,6 +1883,7 @@ static int run (mddev_t *mddev)
+
+ conf->device_lock = SPIN_LOCK_UNLOCKED;
+ init_waitqueue_head(&conf->wait_for_stripe);
++ init_waitqueue_head(&conf->wait_for_overlap);
+ INIT_LIST_HEAD(&conf->handle_list);
+ INIT_LIST_HEAD(&conf->delayed_list);
+ INIT_LIST_HEAD(&conf->inactive_list);
+diff -pur linux-2.6.9.orig/include/linux/raid/raid5.h linux-2.6.9/include/linux/raid/raid5.h
+--- linux-2.6.9.orig/include/linux/raid/raid5.h 2008-01-10 13:46:05.000000000 +0800
++++ linux-2.6.9/include/linux/raid/raid5.h 2008-01-10 13:55:56.000000000 +0800
+@@ -154,6 +154,8 @@ struct stripe_head {
+ #define R5_Wantwrite 5
+ #define R5_Syncio 6 /* this io need to be accounted as resync io */
+ #define R5_Direct 7 /* use page from passed bio to avoid memcpy */
++#define R5_Overlap 8 /* There is a pending overlapping request
++ * on this block */
+
+ /*
+ * Write method
+@@ -221,6 +223,7 @@ struct raid5_private_data {
+ atomic_t active_stripes;
+ struct list_head inactive_list;
+ wait_queue_head_t wait_for_stripe;
++ wait_queue_head_t wait_for_overlap;
+ int inactive_blocked; /* release of inactive stripes blocked,
+ * waiting for 25% to be free
+ */
--- /dev/null
+diff -pur linux-2.6.9-55.0.9.orig/drivers/md/raid6main.c linux-2.6.9-55.0.9/drivers/md/raid6main.c
+--- linux-2.6.9-55.0.9.orig/drivers/md/raid6main.c 2007-09-28 17:53:55.000000000 +0800
++++ linux-2.6.9-55.0.9/drivers/md/raid6main.c 2007-12-13 20:19:11.000000000 +0800
+@@ -96,9 +96,10 @@ static inline void __release_stripe(raid
+ if (atomic_read(&conf->active_stripes)==0)
+ BUG();
+ if (test_bit(STRIPE_HANDLE, &sh->state)) {
+- if (test_bit(STRIPE_DELAYED, &sh->state))
++ if (test_bit(STRIPE_DELAYED, &sh->state)) {
+ list_add_tail(&sh->lru, &conf->delayed_list);
+- else
++ atomic_inc(&conf->delayed);
++ } else
+ list_add_tail(&sh->lru, &conf->handle_list);
+ md_wakeup_thread(conf->mddev->thread);
+ } else {
+@@ -269,6 +270,7 @@ static struct stripe_head *get_active_st
+ if (noblock && sh == NULL)
+ break;
+ if (!sh) {
++ atomic_inc(&conf->out_of_stripes);
+ conf->inactive_blocked = 1;
+ wait_event_lock_irq(conf->wait_for_stripe,
+ !list_empty(&conf->inactive_list) &&
+@@ -290,6 +292,9 @@ static struct stripe_head *get_active_st
+ if (list_empty(&sh->lru))
+ BUG();
+ list_del_init(&sh->lru);
++ if (test_bit(STRIPE_DELAYED, &sh->state))
++ atomic_dec(&conf->delayed);
++
+ }
+ }
+ } while (sh == NULL);
+@@ -368,6 +373,8 @@ static int raid6_end_read_request (struc
+ if (bi->bi_size)
+ return 1;
+
++ atomic_dec(&conf->out_reqs_in_queue);
++
+ for (i=0 ; i<disks; i++)
+ if (bi == &sh->dev[i].req)
+ break;
+@@ -445,6 +452,8 @@ static int raid6_end_write_request (stru
+ if (bi == &sh->dev[i].req)
+ break;
+
++ atomic_dec(&conf->out_reqs_in_queue);
++
+ PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n",
+ (unsigned long long)sh->sector, i, atomic_read(&sh->count),
+ uptodate);
+@@ -989,6 +998,7 @@ static void handle_stripe(struct stripe_
+ spin_lock(&sh->lock);
+ clear_bit(STRIPE_HANDLE, &sh->state);
+ clear_bit(STRIPE_DELAYED, &sh->state);
++ atomic_inc(&conf->handle_called);
+
+ syncing = test_bit(STRIPE_SYNCING, &sh->state);
+ /* Now to look around and see what can be done */
+@@ -1257,6 +1267,7 @@ static void handle_stripe(struct stripe_
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantread, &dev->flags);
+ locked++;
++ atomic_inc(&conf->reads_for_rcw);
+ } else {
+ PRINTK("Request delayed stripe %llu block %d for Reconstruct\n",
+ (unsigned long long)sh->sector, i);
+@@ -1390,6 +1401,7 @@ static void handle_stripe(struct stripe_
+ bi->bi_next = NULL;
+ bi->bi_size = 0;
+ bi->bi_end_io(bi, bytes, 0);
++ atomic_dec(&conf->in_reqs_in_queue);
+ }
+ for (i=disks; i-- ;) {
+ int rw;
+@@ -1405,10 +1417,13 @@ static void handle_stripe(struct stripe_
+ bi = &sh->dev[i].req;
+
+ bi->bi_rw = rw;
+- if (rw)
++ if (rw) {
++ atomic_inc(&conf->writes_out);
+ bi->bi_end_io = raid6_end_write_request;
+- else
++ } else {
++ atomic_inc(&conf->reads_out);
+ bi->bi_end_io = raid6_end_read_request;
++ }
+
+ spin_lock_irq(&conf->device_lock);
+ rdev = conf->disks[i].rdev;
+@@ -1436,12 +1451,14 @@ static void handle_stripe(struct stripe_
+ bi->bi_io_vec[0].bv_offset = 0;
+ bi->bi_size = STRIPE_SIZE;
+ bi->bi_next = NULL;
++ atomic_inc(&conf->out_reqs_in_queue);
+ generic_make_request(bi);
+ } else {
+ PRINTK("skip op %ld on disc %d for sector %llu\n",
+ bi->bi_rw, i, (unsigned long long)sh->sector);
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
+ set_bit(STRIPE_HANDLE, &sh->state);
++ atomic_dec(&conf->delayed);
+ }
+ }
+ }
+@@ -1555,6 +1572,8 @@ static int make_request (request_queue_t
+ sector_t logical_sector, last_sector;
+ struct stripe_head *sh;
+
++ atomic_inc(&conf->in_reqs_in_queue);
++
+ if (unlikely(bio_barrier(bi))) {
+ bio_endio(bi, bi->bi_size, -EOPNOTSUPP);
+ return 0;
+@@ -1563,9 +1582,11 @@ static int make_request (request_queue_t
+ if (bio_data_dir(bi)==WRITE) {
+ disk_stat_inc(mddev->gendisk, writes);
+ disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi));
++ atomic_inc(&conf->writes_in);
+ } else {
+ disk_stat_inc(mddev->gendisk, reads);
+ disk_stat_add(mddev->gendisk, read_sectors, bio_sectors(bi));
++ atomic_inc(&conf->reads_in);
+ }
+
+ logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+@@ -1605,6 +1626,7 @@ static int make_request (request_queue_t
+
+ if ( bio_data_dir(bi) == WRITE )
+ md_write_end(mddev);
++ atomic_dec(&conf->in_reqs_in_queue);
+ bi->bi_size = 0;
+ bi->bi_end_io(bi, bytes, 0);
+ }
+@@ -1701,6 +1723,8 @@ static void raid6d (mddev_t *mddev)
+ spin_unlock_irq(&conf->device_lock);
+
+ handled++;
++
++ atomic_inc(&conf->handled_in_raid5d);
+ handle_stripe(sh);
+ release_stripe(sh);
+
+@@ -1940,6 +1964,23 @@ static void status (struct seq_file *seq
+ conf->disks[i].rdev &&
+ conf->disks[i].rdev->in_sync ? "U" : "_");
+ seq_printf (seq, "]");
++
++ seq_printf (seq, "\n\t\tin: %u reads, %u writes; out: %u reads, %u writes",
++ atomic_read(&conf->reads_in), atomic_read(&conf->writes_in),
++ atomic_read(&conf->reads_out), atomic_read(&conf->writes_out));
++ seq_printf (seq, "\n\t\t%u in raid5d, %u out of stripes, %u handle called",
++ atomic_read(&conf->handled_in_raid5d),
++ atomic_read(&conf->out_of_stripes),
++ atomic_read(&conf->handle_called));
++ seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw",
++ atomic_read(&conf->reads_for_rmw),
++ atomic_read(&conf->reads_for_rcw));
++ seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n",
++ atomic_read(&conf->delayed),
++ atomic_read(&conf->active_stripes),
++ atomic_read(&conf->in_reqs_in_queue),
++ atomic_read(&conf->out_reqs_in_queue));
++
+ #if RAID6_DUMPSTATE
+ seq_printf (seq, "\n");
+ printall(seq, conf);
--- /dev/null
+diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6main.c
+--- linux-2.6.9.orig/drivers/md/raid6main.c 2008-01-10 13:47:18.000000000 +0800
++++ linux-2.6.9/drivers/md/raid6main.c 2008-01-10 13:49:06.000000000 +0800
+@@ -1571,6 +1571,11 @@ static int make_request (request_queue_t
+ sector_t new_sector;
+ sector_t logical_sector, last_sector;
+ struct stripe_head *sh;
++ sector_t stripe, sectors, block, r_sector, b_sector;
++ int sectors_per_chunk;
++ int stripes_per_chunk, sectors_per_block;
++ int sectors_per_stripe;
++ int i, j;
+
+ atomic_inc(&conf->in_reqs_in_queue);
+
+@@ -1596,30 +1601,69 @@ static int make_request (request_queue_t
+ bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
+ if ( bio_data_dir(bi) == WRITE )
+ md_write_start(mddev);
+- for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
+
+- new_sector = raid6_compute_sector(logical_sector,
+- raid_disks, data_disks, &dd_idx, &pd_idx, conf);
+-
+- PRINTK("raid6: make_request, sector %Lu logical %Lu\n",
+- (unsigned long long)new_sector,
+- (unsigned long long)logical_sector);
++ sectors_per_chunk = conf->chunk_size >> 9;
++ stripes_per_chunk = conf->chunk_size / STRIPE_SIZE;
++ sectors_per_stripe = STRIPE_SECTORS * data_disks;
++ sectors_per_block = stripes_per_chunk * sectors_per_stripe;
++
++ block = logical_sector & ~((sector_t)sectors_per_block - 1);
++ sector_div(block, sectors_per_block);
++ sectors = bi->bi_size >> 9;
++
++ repeat:
++ stripe = block * (sectors_per_block / data_disks);
++ b_sector = stripe * data_disks;
++ /* iterate through all stripes in this block,
++ * where block is a set of internal stripes
++ * which covers chunk */
++ for (i = 0; i < stripes_per_chunk && sectors > 0; i++) {
++ r_sector = b_sector + (i * STRIPE_SECTORS);
++ sh = NULL;
++ /* iterrate through all pages in the stripe */
++ for (j = 0; j < data_disks && sectors > 0; j++) {
++ if (r_sector + STRIPE_SECTORS <= bi->bi_sector ||
++ r_sector >= last_sector) {
++ r_sector += sectors_per_chunk;
++ continue;
++ }
++ new_sector = raid6_compute_sector(r_sector, raid_disks,
++ data_disks, &dd_idx,
++ &pd_idx, conf);
++ if (sh == NULL)
++ sh = get_active_stripe(conf, new_sector, pd_idx,
++ (bi->bi_rw&RWA_MASK));
++ if (sh) {
++ add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK));
++ } else {
++ /* cannot get stripe for read-ahead, just give-up */
++ clear_bit(BIO_UPTODATE, &bi->bi_flags);
++ sectors = 0;
++ break;
++ }
++
++ BUG_ON (new_sector != stripe);
++ sectors -= STRIPE_SECTORS;
++ if (bi->bi_sector > r_sector)
++ sectors += bi->bi_sector - r_sector;
++ if (r_sector + STRIPE_SECTORS > last_sector)
++ sectors += r_sector + STRIPE_SECTORS - last_sector;
++ r_sector += sectors_per_chunk;
++ }
+
+- sh = get_active_stripe(conf, new_sector, pd_idx, (bi->bi_rw&RWA_MASK));
+ if (sh) {
+-
+- add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK));
+-
+ raid6_plug_device(conf);
+ handle_stripe(sh);
+ release_stripe(sh);
+- } else {
+- /* cannot get stripe for read-ahead, just give-up */
+- clear_bit(BIO_UPTODATE, &bi->bi_flags);
+- break;
++ sh = NULL;
+ }
+
++ stripe += STRIPE_SECTORS;
+ }
++ block++;
++ if(sectors > 0)
++ goto repeat;
++
+ spin_lock_irq(&conf->device_lock);
+ if (--bi->bi_phys_segments == 0) {
+ int bytes = bi->bi_size;
--- /dev/null
+diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6main.c
+--- linux-2.6.9.orig/drivers/md/raid6main.c 2008-01-10 14:02:08.000000000 +0800
++++ linux-2.6.9/drivers/md/raid6main.c 2008-01-10 14:01:56.000000000 +0800
+@@ -430,6 +430,7 @@ static int raid6_end_read_request (struc
+ clear_buffer_uptodate(bh);
+ }
+ #endif
++ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
+ set_bit(STRIPE_HANDLE, &sh->state);
+ release_stripe(sh);
+@@ -468,6 +469,10 @@ static int raid6_end_write_request (stru
+
+ rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
+
++ if (test_bit(R5_Direct, &sh->dev[i].flags)) {
++ BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
++ sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
++ }
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
+ set_bit(STRIPE_HANDLE, &sh->state);
+ __release_stripe(conf, sh);
+@@ -664,7 +669,27 @@ static sector_t compute_blocknr(struct s
+ return r_sector;
+ }
+
++static struct page *zero_copy_data(struct bio *bio, sector_t sector)
++{
++ sector_t bi_sector = bio->bi_sector;
++ struct page *page = NULL;
++ struct bio_vec *bvl;
++ int i;
+
++ bio_for_each_segment(bvl, bio, i) {
++ if (sector == bi_sector)
++ page = bio_iovec_idx(bio, i)->bv_page;
++ bi_sector += bio_iovec_idx(bio, i)->bv_len >> 9;
++ if (bi_sector >= sector + STRIPE_SECTORS) {
++ /* check if the stripe is covered by one page */
++ if (page == bio_iovec_idx(bio, i)->bv_page &&
++ PageConstant(page))
++ return page;
++ return NULL;
++ }
++ }
++ return NULL;
++}
+
+ /*
+ * Copy data between a page in the stripe cache, and one or more bion
+@@ -731,6 +756,7 @@ static void compute_parity(struct stripe
+ raid6_conf_t *conf = sh->raid_conf;
+ int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = conf->raid_disks, count;
+ struct bio *chosen;
++ struct page *page;
+ /**** FIX THIS: This could be very bad if disks is close to 256 ****/
+ void *ptrs[disks];
+
+@@ -761,18 +787,46 @@ static void compute_parity(struct stripe
+ BUG(); /* Not implemented yet */
+ }
+
+- for (i = disks; i--;)
+- if (sh->dev[i].written) {
+- sector_t sector = sh->dev[i].sector;
+- struct bio *wbi = sh->dev[i].written;
+- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
+- copy_data(1, wbi, sh->dev[i].page, sector);
+- wbi = r5_next_bio(wbi, sector);
++ for (i = disks; i--;) {
++ struct bio *wbi = sh->dev[i].written;
++ sector_t sector;
++
++ if (!wbi)
++ continue;
++
++ sector = sh->dev[i].sector;
++ set_bit(R5_LOCKED, &sh->dev[i].flags);
++ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));
++
++ /* check if it's covered by a single page
++ * and whole stripe is written at once.
++ * in this case we can avoid memcpy() */
++ if (!wbi->bi_next && test_bit(R5_Insync, &sh->dev[i].flags) &&
++ test_bit(R5_OVERWRITE, &sh->dev[i].flags)) {
++ page = zero_copy_data(wbi, sector);
++ /* we don't do zerocopy on a HighMem page. Raid6 tend
++ * to prepare all of the pages' content to be accessed
++ * before computing PQ parity. If we need to support HighMem
++ * page also, we have to modify the gen_syndrome()
++ * algorithm. -jay */
++ if (page && !PageHighMem(page)) {
++ atomic_inc(&conf->writes_zcopy);
++ sh->dev[i].req.bi_io_vec[0].bv_page = page;
++ set_bit(R5_Direct, &sh->dev[i].flags);
++ clear_bit(R5_UPTODATE, &sh->dev[i].flags);
++ clear_bit(R5_OVERWRITE, &sh->dev[i].flags);
++ continue;
+ }
++ }
+
+- set_bit(R5_LOCKED, &sh->dev[i].flags);
+- set_bit(R5_UPTODATE, &sh->dev[i].flags);
++ atomic_inc(&conf->writes_copied);
++ clear_bit(R5_OVERWRITE, &sh->dev[i].flags);
++ set_bit(R5_UPTODATE, &sh->dev[i].flags);
++ while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
++ copy_data(1, wbi, sh->dev[i].page, sector);
++ wbi = r5_next_bio(wbi, sector);
+ }
++ }
+
+ // switch(method) {
+ // case RECONSTRUCT_WRITE:
+@@ -783,7 +837,10 @@ static void compute_parity(struct stripe
+ count = 0;
+ i = d0_idx;
+ do {
+- ptrs[count++] = page_address(sh->dev[i].page);
++ if (test_bit(R5_Direct, &sh->dev[i].flags))
++ ptrs[count++] = page_address(sh->dev[i].req.bi_io_vec[0].bv_page);
++ else
++ ptrs[count++] = page_address(sh->dev[i].page);
+
+ i = raid6_next_disk(i, disks);
+ } while ( i != d0_idx );
+@@ -1185,7 +1242,8 @@ static void handle_stripe(struct stripe_
+ if (sh->dev[i].written) {
+ dev = &sh->dev[i];
+ if (!test_bit(R5_LOCKED, &dev->flags) &&
+- test_bit(R5_UPTODATE, &dev->flags) ) {
++ (test_bit(R5_UPTODATE, &dev->flags) ||
++ test_bit(R5_Direct, &dev->flags)) ) {
+ /* We can return any write requests */
+ struct bio *wbi, *wbi2;
+ PRINTK("Return write for stripe %llu disc %d\n",
+@@ -1193,6 +1251,7 @@ static void handle_stripe(struct stripe_
+ spin_lock_irq(&conf->device_lock);
+ wbi = dev->written;
+ dev->written = NULL;
++ clear_bit(R5_Direct, &dev->flags);
+ while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
+ wbi2 = r5_next_bio(wbi, dev->sector);
+ if (--wbi->bi_phys_segments == 0) {
+@@ -2008,6 +2067,7 @@ static int run (mddev_t *mddev)
+ if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
+ mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
+ }
++ mddev->queue->backing_dev_info.capabilities |= BDI_CAP_PAGE_CONST_WRITE;
+
+ /* Ok, everything is just fine now */
+ mddev->array_size = mddev->size * (mddev->raid_disks - 2);
+@@ -2095,9 +2155,11 @@ static void status (struct seq_file *seq
+ atomic_read(&conf->handled_in_raid5d),
+ atomic_read(&conf->out_of_stripes),
+ atomic_read(&conf->handle_called));
+- seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw",
++ seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw. zcopy writes: %u, copied writes: %u",
+ atomic_read(&conf->reads_for_rmw),
+- atomic_read(&conf->reads_for_rcw));
++ atomic_read(&conf->reads_for_rcw),
++ atomic_read(&conf->writes_zcopy),
++ atomic_read(&conf->writes_copied));
+ seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n",
+ atomic_read(&conf->delayed),
+ atomic_read(&conf->active_stripes),
raid5-merge-ios.patch
raid5-serialize-ovelapping-reqs.patch
raid5-zerocopy.patch
+raid6-stats.patch
+raid6-configurable-cachesize.patch
+raid6-large-io.patch
+raid6-stripe-by-stripe-handling.patch
+raid6-merge-ios.patch
+raid6-serialize-ovelapping-reqs.patch
+raid6-zerocopy.patch
jbd-stats-2.6.9.patch
bitops_ext2_find_next_le_bit-2.6.patch
quota-deadlock-on-pagelock-core.patch