1 diff -pur linux-2.6.9-67.orig/drivers/md/raid6main.c linux-2.6.9-67/drivers/md/raid6main.c
2 --- linux-2.6.9-67.orig/drivers/md/raid6main.c 2009-02-15 10:24:30.000000000 +0800
3 +++ linux-2.6.9-67/drivers/md/raid6main.c 2009-02-15 10:26:17.000000000 +0800
4 @@ -430,6 +430,9 @@ static int raid6_end_read_request (struc
5 clear_buffer_uptodate(bh);
8 + /* Read on a Directing write is allowable */
9 + /* BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)) */
10 + BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page != sh->dev[i].page);
11 clear_bit(R5_LOCKED, &sh->dev[i].flags);
12 set_bit(STRIPE_HANDLE, &sh->state);
14 @@ -468,6 +471,10 @@ static int raid6_end_write_request (stru
16 rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
18 + if (test_bit(R5_Direct, &sh->dev[i].flags)) {
19 + BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
20 + sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
22 clear_bit(R5_LOCKED, &sh->dev[i].flags);
23 set_bit(STRIPE_HANDLE, &sh->state);
24 __release_stripe(conf, sh);
25 @@ -664,7 +671,27 @@ static sector_t compute_blocknr(struct s
29 +static struct page *zero_copy_data(struct bio *bio, sector_t sector)
31 + sector_t bi_sector = bio->bi_sector;
32 + struct page *page = NULL;
33 + struct bio_vec *bvl;
36 + bio_for_each_segment(bvl, bio, i) {
37 + if (sector == bi_sector)
38 + page = bio_iovec_idx(bio, i)->bv_page;
39 + bi_sector += bio_iovec_idx(bio, i)->bv_len >> 9;
40 + if (bi_sector >= sector + STRIPE_SECTORS) {
41 + /* check if the stripe is covered by one page */
42 + if (page == bio_iovec_idx(bio, i)->bv_page &&
52 * Copy data between a page in the stripe cache, and one or more bion
53 @@ -731,6 +758,7 @@ static void compute_parity(struct stripe
54 raid6_conf_t *conf = sh->raid_conf;
55 int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = conf->raid_disks, count;
58 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
61 @@ -761,18 +789,46 @@ static void compute_parity(struct stripe
62 BUG(); /* Not implemented yet */
65 - for (i = disks; i--;)
66 - if (sh->dev[i].written) {
67 - sector_t sector = sh->dev[i].sector;
68 - struct bio *wbi = sh->dev[i].written;
69 - while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
70 - copy_data(1, wbi, sh->dev[i].page, sector);
71 - wbi = r5_next_bio(wbi, sector);
72 + for (i = disks; i--;) {
73 + struct bio *wbi = sh->dev[i].written;
79 + sector = sh->dev[i].sector;
80 + set_bit(R5_LOCKED, &sh->dev[i].flags);
81 + BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));
83 + /* check if it's covered by a single page
84 + * and whole stripe is written at once.
85 + * in this case we can avoid memcpy() */
86 + if (!wbi->bi_next && test_bit(R5_Insync, &sh->dev[i].flags) &&
87 + test_bit(R5_OVERWRITE, &sh->dev[i].flags)) {
88 + page = zero_copy_data(wbi, sector);
89 + /* we don't do zerocopy on a HighMem page. Raid6 tend
90 + * to prepare all of the pages' content to be accessed
91 + * before computing PQ parity. If we need to support HighMem
92 + * page also, we have to modify the gen_syndrome()
93 + * algorithm. -jay */
94 + if (page && !PageHighMem(page)) {
95 + atomic_inc(&conf->writes_zcopy);
96 + sh->dev[i].req.bi_io_vec[0].bv_page = page;
97 + set_bit(R5_Direct, &sh->dev[i].flags);
98 + clear_bit(R5_UPTODATE, &sh->dev[i].flags);
99 + clear_bit(R5_OVERWRITE, &sh->dev[i].flags);
104 - set_bit(R5_LOCKED, &sh->dev[i].flags);
105 - set_bit(R5_UPTODATE, &sh->dev[i].flags);
106 + atomic_inc(&conf->writes_copied);
107 + clear_bit(R5_OVERWRITE, &sh->dev[i].flags);
108 + set_bit(R5_UPTODATE, &sh->dev[i].flags);
109 + while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
110 + copy_data(1, wbi, sh->dev[i].page, sector);
111 + wbi = r5_next_bio(wbi, sector);
116 // case RECONSTRUCT_WRITE:
117 @@ -783,7 +839,10 @@ static void compute_parity(struct stripe
121 - ptrs[count++] = page_address(sh->dev[i].page);
122 + if (test_bit(R5_Direct, &sh->dev[i].flags))
123 + ptrs[count++] = page_address(sh->dev[i].req.bi_io_vec[0].bv_page);
125 + ptrs[count++] = page_address(sh->dev[i].page);
127 i = raid6_next_disk(i, disks);
128 } while ( i != d0_idx );
129 @@ -1185,7 +1244,8 @@ static void handle_stripe(struct stripe_
130 if (sh->dev[i].written) {
132 if (!test_bit(R5_LOCKED, &dev->flags) &&
133 - test_bit(R5_UPTODATE, &dev->flags) ) {
134 + (test_bit(R5_UPTODATE, &dev->flags) ||
135 + test_bit(R5_Direct, &dev->flags)) ) {
136 /* We can return any write requests */
137 struct bio *wbi, *wbi2;
138 PRINTK("Return write for stripe %llu disc %d\n",
139 @@ -1193,6 +1253,7 @@ static void handle_stripe(struct stripe_
140 spin_lock_irq(&conf->device_lock);
143 + clear_bit(R5_Direct, &dev->flags);
144 while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
145 wbi2 = r5_next_bio(wbi, dev->sector);
146 if (--wbi->bi_phys_segments == 0) {
147 @@ -1503,6 +1564,15 @@ static void handle_stripe(struct stripe_
149 PRINTK("skip op %ld on disc %d for sector %llu\n",
150 bi->bi_rw, i, (unsigned long long)sh->sector);
152 + if (test_bit(R5_Direct, &sh->dev[i].flags)) {
153 + /* restore the page pointer of req, otherwise,
154 + * no any read is permitted on this stripe, this is
155 + * not what we want. -jay */
156 + BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
157 + sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
160 clear_bit(R5_LOCKED, &sh->dev[i].flags);
161 set_bit(STRIPE_HANDLE, &sh->state);
162 atomic_dec(&conf->delayed);
163 @@ -2008,6 +2078,7 @@ static int run (mddev_t *mddev)
164 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
165 mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
167 + mddev->queue->backing_dev_info.capabilities |= BDI_CAP_PAGE_CONST_WRITE;
169 /* Ok, everything is just fine now */
170 mddev->array_size = mddev->size * (mddev->raid_disks - 2);
171 @@ -2095,9 +2166,11 @@ static void status (struct seq_file *seq
172 atomic_read(&conf->handled_in_raid5d),
173 atomic_read(&conf->out_of_stripes),
174 atomic_read(&conf->handle_called));
175 - seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw",
176 + seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw. zcopy writes: %u, copied writes: %u",
177 atomic_read(&conf->reads_for_rmw),
178 - atomic_read(&conf->reads_for_rcw));
179 + atomic_read(&conf->reads_for_rcw),
180 + atomic_read(&conf->writes_zcopy),
181 + atomic_read(&conf->writes_copied));
182 seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n",
183 atomic_read(&conf->delayed),
184 atomic_read(&conf->active_stripes),