diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6main.c --- linux-2.6.9.orig/drivers/md/raid6main.c 2008-01-10 13:55:37.000000000 +0800 +++ linux-2.6.9/drivers/md/raid6main.c 2008-01-10 13:55:56.000000000 +0800 @@ -749,6 +749,10 @@ static void compute_parity(struct stripe if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) { chosen = sh->dev[i].towrite; sh->dev[i].towrite = NULL; + + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) + wake_up(&conf->wait_for_overlap); + if (sh->dev[i].written) BUG(); sh->dev[i].written = chosen; } @@ -907,7 +911,7 @@ static void compute_block_2(struct strip * toread/towrite point to the first in a chain. * The bi_next chain must be in order. */ -static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite) +static int add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite) { struct bio **bip; raid6_conf_t *conf = sh->raid_conf; @@ -924,10 +928,13 @@ static void add_stripe_bio (struct strip else bip = &sh->dev[dd_idx].toread; while (*bip && (*bip)->bi_sector < bi->bi_sector) { - BUG_ON((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector); + if((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector) + goto overlap; bip = & (*bip)->bi_next; } -/* FIXME do I need to worry about overlapping bion */ + if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9)) + goto overlap; + if (*bip && bi->bi_next && (*bip) != bi->bi_next) BUG(); if (*bip) @@ -954,6 +961,14 @@ static void add_stripe_bio (struct strip if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); } + + return 1; + +overlap: + set_bit(R5_Overlap, &sh->dev[dd_idx].flags); + spin_unlock_irq(&conf->device_lock); + spin_unlock(&sh->lock); + return 0; } /* @@ -1038,6 +1053,9 @@ static void handle_stripe(struct stripe_ spin_lock_irq(&conf->device_lock); rbi = dev->toread; dev->toread = NULL; + + if (test_and_clear_bit(R5_Overlap, &dev->flags)) + wake_up(&conf->wait_for_overlap); spin_unlock_irq(&conf->device_lock); while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { copy_data(0, rbi, dev->page, dev->sector); @@ -1087,6 +1105,9 @@ static void handle_stripe(struct stripe_ sh->dev[i].towrite = NULL; if (bi) to_write--; + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) + wake_up(&conf->wait_for_overlap); + while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); @@ -1115,6 +1136,8 @@ static void handle_stripe(struct stripe_ if (!test_bit(R5_Insync, &sh->dev[i].flags)) { bi = sh->dev[i].toread; sh->dev[i].toread = NULL; + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) + wake_up(&conf->wait_for_overlap); if (bi) to_read--; while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); @@ -1648,6 +1671,8 @@ static int make_request (request_queue_t sh = NULL; /* iterrate through all pages in the stripe */ for (j = 0; j < data_disks && sectors > 0; j++) { + DEFINE_WAIT(w); + if (r_sector + STRIPE_SECTORS <= bi->bi_sector || r_sector >= last_sector) { r_sector += sectors_per_chunk; @@ -1656,6 +1681,9 @@ static int make_request (request_queue_t new_sector = raid6_compute_sector(r_sector, raid_disks, data_disks, &dd_idx, &pd_idx, conf); + +retry: + prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); if (sh == NULL) { /* first, try to get stripe w/o blocking * if we can't, then it's time to submit @@ -1668,10 +1696,18 @@ static int make_request (request_queue_t } } if (sh) { - add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK)); + if(!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { + /* Failed to be added due to overlapped. */ + raid6_unplug_device(mddev->queue); + release_stripe(sh); + schedule(); + goto retry; + } + finish_wait(&conf->wait_for_overlap, &w); } else { /* cannot get stripe for read-ahead, just give-up */ clear_bit(BIO_UPTODATE, &bi->bi_flags); + finish_wait(&conf->wait_for_overlap, &w); sectors = 0; break; } @@ -1847,6 +1883,7 @@ static int run (mddev_t *mddev) conf->device_lock = SPIN_LOCK_UNLOCKED; init_waitqueue_head(&conf->wait_for_stripe); + init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->delayed_list); INIT_LIST_HEAD(&conf->inactive_list); diff -pur linux-2.6.9.orig/include/linux/raid/raid5.h linux-2.6.9/include/linux/raid/raid5.h --- linux-2.6.9.orig/include/linux/raid/raid5.h 2008-01-10 13:46:05.000000000 +0800 +++ linux-2.6.9/include/linux/raid/raid5.h 2008-01-10 13:55:56.000000000 +0800 @@ -154,6 +154,8 @@ struct stripe_head { #define R5_Wantwrite 5 #define R5_Syncio 6 /* this io need to be accounted as resync io */ #define R5_Direct 7 /* use page from passed bio to avoid memcpy */ +#define R5_Overlap 8 /* There is a pending overlapping request + * on this block */ /* * Write method @@ -221,6 +223,7 @@ struct raid5_private_data { atomic_t active_stripes; struct list_head inactive_list; wait_queue_head_t wait_for_stripe; + wait_queue_head_t wait_for_overlap; int inactive_blocked; /* release of inactive stripes blocked, * waiting for 25% to be free */