diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c --- linux-2.6.18-53.orig/drivers/md/raid5.c 2007-12-28 18:55:24.000000000 +0800 +++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-28 19:08:15.000000000 +0800 @@ -1277,7 +1277,26 @@ static void compute_block_2(struct strip } } +/* + * The whole idea is to collect all bio's and then issue them + * disk by disk to assist merging a bit -bzzz + */ +static void raid5_flush_bios(raid5_conf_t *conf, struct bio *bios[], int raid_disks) +{ + struct bio *bio, *nbio; + int i; + for (i = 0; i < raid_disks; i++) { + bio = bios[i]; + while (bio) { + nbio = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = nbio; + } + bios[i] = NULL; + } +} /* * Each stripe/dev can have one or more bion attached. @@ -1392,7 +1411,7 @@ static int stripe_to_pdidx(sector_t stri * */ -static void handle_stripe5(struct stripe_head *sh) +static void handle_stripe5(struct stripe_head *sh, struct bio *bios[]) { raid5_conf_t *conf = sh->raid_conf; int disks = sh->disks; @@ -1939,7 +1958,11 @@ static void handle_stripe5(struct stripe test_bit(R5_ReWrite, &sh->dev[i].flags)) atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); atomic_inc(&conf->out_reqs_in_queue); - generic_make_request(bi); + if (bios) { + bi->bi_next = bios[i]; + bios[i] = bi; + } else + generic_make_request(bi); } else { if (rw == 1) set_bit(STRIPE_DEGRADED, &sh->state); @@ -1951,7 +1974,7 @@ static void handle_stripe5(struct stripe } } -static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) +static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page, struct bio *bios[]) { raid6_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks; @@ -2499,7 +2522,11 @@ static void handle_stripe6(struct stripe if (rw == WRITE && test_bit(R5_ReWrite, &sh->dev[i].flags)) atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); - generic_make_request(bi); + if (bios) { + bi->bi_next = bios[i]; + bios[i] = bi; + } else + generic_make_request(bi); atomic_inc(&conf->out_reqs_in_queue); } else { if (rw == 1) @@ -2512,12 +2539,12 @@ static void handle_stripe6(struct stripe } } -static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) +static void handle_stripe(struct stripe_head *sh, struct page *tmp_page, struct bio *bios[]) { if (sh->raid_conf->level == 6) - handle_stripe6(sh, tmp_page); + handle_stripe6(sh, tmp_page, bios); else - handle_stripe5(sh); + handle_stripe5(sh, bios); } @@ -2670,6 +2697,7 @@ static int make_request(request_queue_t int stripes_per_chunk, sectors_per_block; int sectors_per_stripe; int i, j; + struct bio *bios[MD_SB_DISKS]; DEFINE_WAIT(w); int disks, data_disks; @@ -2698,6 +2726,7 @@ static int make_request(request_queue_t sectors = bi->bi_size >> 9; stripes_per_chunk = conf->chunk_size / STRIPE_SIZE; + memset(&bios, 0, sizeof(bios)); redo_bio: /* stripe by stripe handle needs a stable raid layout, so if this * reuqest covers the expanding region, wait it over. @@ -2756,8 +2785,10 @@ retry: * the raid layout has been changed, we have to redo the * whole bio because we don't which sectors in it has been * done, and which is not done. -jay */ - if (raid5_redo_bio(conf, bi, disks, logical_sector)) + if (raid5_redo_bio(conf, bi, disks, logical_sector)) { + raid5_flush_bios(conf, bios, disks); goto redo_bio; + } if (test_bit(STRIPE_EXPANDING, &sh->state)) { /* Stripe is busy expanding or @@ -2766,6 +2797,7 @@ retry: */ release_stripe(sh); sh = NULL; + raid5_flush_bios(conf, bios, disks); raid5_unplug_device(mddev->queue); schedule(); goto retry; @@ -2784,17 +2816,19 @@ retry: */ if (r_sector >= mddev->suspend_lo && r_sector < mddev->suspend_hi) { - handle_stripe(sh, NULL); + handle_stripe(sh, NULL, NULL); release_stripe(sh); sh = NULL; + raid5_flush_bios(conf, bios, disks); schedule(); goto retry; } if (!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { - handle_stripe(sh, NULL); + handle_stripe(sh, NULL, NULL); release_stripe(sh); sh = NULL; + raid5_flush_bios(conf, bios, disks); raid5_unplug_device(mddev->queue); schedule(); goto retry; @@ -2810,7 +2844,7 @@ retry: r_sector += sectors_per_chunk; } if (sh) { - handle_stripe(sh, NULL); + handle_stripe(sh, NULL, NULL); release_stripe(sh); sh = NULL; } @@ -2820,6 +2854,9 @@ retry: if (sectors > 0) goto repeat; + /* flush all of the bios */ + raid5_flush_bios(conf, bios, disks); + spin_lock_irq(&conf->device_lock); remaining = --bi->bi_phys_segments; spin_unlock_irq(&conf->device_lock); @@ -3035,7 +3072,7 @@ static inline sector_t sync_request(mdde clear_bit(STRIPE_INSYNC, &sh->state); spin_unlock(&sh->lock); - handle_stripe(sh, NULL); + handle_stripe(sh, NULL, NULL); release_stripe(sh); return STRIPE_SECTORS; @@ -3091,7 +3128,7 @@ static void raid5d (mddev_t *mddev) handled++; atomic_inc(&conf->handled_in_raid5d); - handle_stripe(sh, conf->spare_page); + handle_stripe(sh, conf->spare_page, NULL); release_stripe(sh); spin_lock_irq(&conf->device_lock);