-In case of full-stripe writes don't copy data into internal cache.
-This optimization reduces CPU load by 30% rougly.
-
-Index: linux-2.6.9/include/linux/raid/raid5.h
-===================================================================
---- linux-2.6.9.orig/include/linux/raid/raid5.h 2006-05-21 17:57:25.000000000 +0400
-+++ linux-2.6.9/include/linux/raid/raid5.h 2006-05-22 00:10:04.000000000 +0400
-@@ -152,6 +152,7 @@ struct stripe_head {
- #define R5_Wantread 4 /* want to schedule a read */
- #define R5_Wantwrite 5
- #define R5_Syncio 6 /* this io need to be accounted as resync io */
-+#define R5_Direct 7 /* use page fom passed bio to avoid memcpy */
-
- /*
- * Write method
-Index: linux-2.6.9/drivers/md/raid5.c
-===================================================================
---- linux-2.6.9.orig/drivers/md/raid5.c 2006-05-22 00:10:01.000000000 +0400
-+++ linux-2.6.9/drivers/md/raid5.c 2006-05-22 00:10:04.000000000 +0400
-@@ -411,6 +411,8 @@ static int raid5_end_read_request (struc
- clear_buffer_uptodate(bh);
- }
- #endif
-+ if (test_bit(R5_Direct, &sh->dev[i].flags))
-+ printk("R5_Direct for READ ?!\n");
- clear_bit(R5_LOCKED, &sh->dev[i].flags);
- set_bit(STRIPE_HANDLE, &sh->state);
- release_stripe(sh);
-@@ -449,6 +451,10 @@ static int raid5_end_write_request (stru
-
- rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
-
-+ if (test_bit(R5_Direct, &sh->dev[i].flags)) {
-+ BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
-+ sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
-+ }
- clear_bit(R5_LOCKED, &sh->dev[i].flags);
- set_bit(STRIPE_HANDLE, &sh->state);
- __release_stripe(conf, sh);
-@@ -673,6 +679,49 @@ static void copy_data(int frombio, struc
- }
- }
-
-+static struct page *zero_copy_data(struct bio *bio, sector_t sector)
-+{
-+ struct bio_vec *bvl;
-+ int i;
-+
-+ for (;bio && bio->bi_sector < sector+STRIPE_SECTORS;
-+ bio = r5_next_bio(bio, sector) ) {
-+ int page_offset;
-+ if (bio->bi_sector >= sector)
-+ page_offset = (signed)(bio->bi_sector - sector) * 512;
-+ else
-+ page_offset = (signed)(sector - bio->bi_sector) * -512;
-+ bio_for_each_segment(bvl, bio, i) {
-+ int len = bio_iovec_idx(bio,i)->bv_len;
-+ int clen;
-+ int b_offset = 0;
-+
-+ if (page_offset < 0) {
-+ b_offset = -page_offset;
-+ page_offset += b_offset;
-+ len -= b_offset;
-+ }
-+
-+ if (len > 0 && page_offset + len > STRIPE_SIZE)
-+ clen = STRIPE_SIZE - page_offset;
-+ else clen = len;
-+
-+ if (clen > 0) {
-+ BUG_ON(clen < STRIPE_SIZE);
-+ /*printk(" sector %lu: page %p from index %u\n",
-+ (unsigned long) sector,
-+ bio_iovec_idx(bio, i)->bv_page, i);*/
-+ return bio_iovec_idx(bio, i)->bv_page;
-+ }
-+ if (clen < len) /* hit end of page */
-+ break;
-+ page_offset += len;
-+ }
-+ }
-+ BUG();
-+ return NULL;
-+}
-+
- #define check_xor() do { \
- if (count == MAX_XOR_BLOCKS) { \
- xor_block(count, STRIPE_SIZE, ptr); \
-@@ -717,6 +766,8 @@ static void compute_parity(struct stripe
- int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count;
- void *ptr[MAX_XOR_BLOCKS];
- struct bio *chosen;
-+ struct page *page;
-+ int zerocopy = 0;
-
- PRINTK("compute_parity, stripe %llu, method %d\n",
- (unsigned long long)sh->sector, method);
-@@ -743,13 +794,17 @@ static void compute_parity(struct stripe
- break;
- case RECONSTRUCT_WRITE:
- memset(ptr[0], 0, STRIPE_SIZE);
-- for (i= disks; i-- ;)
-+ zerocopy = 1;
-+ for (i= disks; i-- ;) {
-+ if (i != pd_idx && !sh->dev[i].towrite)
-+ zerocopy = 0;
- if (i!=pd_idx && sh->dev[i].towrite) {
- chosen = sh->dev[i].towrite;
- sh->dev[i].towrite = NULL;
- if (sh->dev[i].written) BUG();
- sh->dev[i].written = chosen;
- }
-+ }
- break;
- case CHECK_PARITY:
- break;
-@@ -759,34 +814,62 @@ static void compute_parity(struct stripe
- count = 1;
- }
-
-- for (i = disks; i--;)
-- if (sh->dev[i].written) {
-- sector_t sector = sh->dev[i].sector;
-- struct bio *wbi = sh->dev[i].written;
-- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
-- copy_data(1, wbi, sh->dev[i].page, sector);
-- wbi = r5_next_bio(wbi, sector);
-- }
-+ for (i = disks; i--;) {
-+ struct bio *wbi = sh->dev[i].written;
-+ sector_t sector;
-+
-+ if (!wbi)
-+ continue;
-+
-+ sector = sh->dev[i].sector;
-+ set_bit(R5_LOCKED, &sh->dev[i].flags);
-+ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));
-+
-+ /* check if it's covered by a single page
-+ and whole stripe is written at once.
-+ * in this case we can avoid memcpy() */
-+ if (zerocopy && wbi && wbi->bi_next == NULL &&
-+ test_bit(R5_OVERWRITE, &sh->dev[i].flags)) {
-+ page = zero_copy_data(wbi, sector);
-+ BUG_ON(PageHighMem(page));
-+ sh->dev[i].req.bi_io_vec[0].bv_page = page;
-+ set_bit(R5_Direct, &sh->dev[i].flags);
-+ clear_bit(R5_UPTODATE, &sh->dev[i].flags);
-+ continue;
-+ }
-
-- set_bit(R5_LOCKED, &sh->dev[i].flags);
-- set_bit(R5_UPTODATE, &sh->dev[i].flags);
-+ set_bit(R5_UPTODATE, &sh->dev[i].flags);
-+ while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
-+ copy_data(1, wbi, sh->dev[i].page, sector);
-+ wbi = r5_next_bio(wbi, sector);
- }
-+ }
-
- switch(method) {
- case RECONSTRUCT_WRITE:
- case CHECK_PARITY:
-- for (i=disks; i--;)
-- if (i != pd_idx) {
-- ptr[count++] = page_address(sh->dev[i].page);
-- check_xor();
-- }
-+ for (i=disks; i--;) {
-+ if (i == pd_idx)
-+ continue;
-+ if (test_bit(R5_Direct, &sh->dev[i].flags))
-+ page = sh->dev[i].req.bi_io_vec[0].bv_page;
-+ else
-+ page = sh->dev[i].page;
-+ ptr[count++] = page_address(page);
-+ check_xor();
-+ }
- break;
- case READ_MODIFY_WRITE:
-- for (i = disks; i--;)
-- if (sh->dev[i].written) {
-- ptr[count++] = page_address(sh->dev[i].page);
-- check_xor();
-- }
-+ for (i = disks; i--;) {
-+ if (!sh->dev[i].written)
-+ continue;
-+ if (test_bit(R5_Direct, &sh->dev[i].flags))
-+ page = sh->dev[i].req.bi_io_vec[0].bv_page;
-+ else
-+ page = sh->dev[i].page;
-+ ptr[count++] = page_address(page);
-+ check_xor();
-+ }
- }
- if (count != 1)
- xor_block(count, STRIPE_SIZE, ptr);
-@@ -1012,7 +1094,7 @@ static void handle_stripe(struct stripe_
- dev = &sh->dev[sh->pd_idx];
- if ( written &&
- ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
-- test_bit(R5_UPTODATE, &dev->flags))
-+ (test_bit(R5_UPTODATE, &dev->flags) || test_bit(R5_Direct, &dev->flags)))
- || (failed == 1 && failed_num == sh->pd_idx))
- ) {
- /* any written block on an uptodate or failed drive can be returned.
-@@ -1023,13 +1105,16 @@ static void handle_stripe(struct stripe_
- if (sh->dev[i].written) {
- dev = &sh->dev[i];
- if (!test_bit(R5_LOCKED, &dev->flags) &&
-- test_bit(R5_UPTODATE, &dev->flags) ) {
-+ (test_bit(R5_UPTODATE, &dev->flags) ||
-+ test_bit(R5_Direct, &dev->flags)) ) {
- /* We can return any write requests */
- struct bio *wbi, *wbi2;
- PRINTK("Return write for disc %d\n", i);
- spin_lock_irq(&conf->device_lock);
- wbi = dev->written;
- dev->written = NULL;
-+ if (test_bit(R5_Direct, &dev->flags))
-+ clear_bit(R5_Direct, &dev->flags);
- while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
- wbi2 = r5_next_bio(wbi, dev->sector);
- if (--wbi->bi_phys_segments == 0) {