1 diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c
2 --- linux-2.6.18-53.orig/drivers/md/raid5.c 2007-12-28 14:55:08.000000000 +0800
3 +++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-28 18:52:08.000000000 +0800
4 @@ -2626,6 +2626,35 @@ static int raid5_issue_flush(request_que
8 +static inline int raid5_expanding_overlap(raid5_conf_t *conf, struct bio *bi)
10 + sector_t first_sector, last_sector;
12 + if (likely(conf->expand_progress == MaxSector))
15 + first_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
16 + last_sector = bi->bi_sector + (bi->bi_size>>9);
18 + return (first_sector < conf->expand_progress &&
19 + last_sector >= conf->expand_lo);
22 +static inline int raid5_redo_bio(raid5_conf_t *conf, struct bio *bi, int disks, sector_t sector)
26 + if (likely(conf->expand_progress == MaxSector))
29 + spin_lock_irq(&conf->device_lock);
30 + redo = (raid5_expanding_overlap(conf, bi) ||
31 + (unlikely(sector < conf->expand_progress) &&
32 + disks == conf->previous_raid_disks));
33 + spin_unlock_irq(&conf->device_lock);
37 static int make_request(request_queue_t *q, struct bio * bi)
39 mddev_t *mddev = q->queuedata;
40 @@ -2636,6 +2665,14 @@ static int make_request(request_queue_t
41 struct stripe_head *sh;
42 const int rw = bio_data_dir(bi);
44 + sector_t stripe, sectors, block, r_sector, b_sector;
45 + int sectors_per_chunk = conf->chunk_size >> 9;
46 + int stripes_per_chunk, sectors_per_block;
47 + int sectors_per_stripe;
51 + int disks, data_disks;
53 atomic_inc(&conf->in_reqs_in_queue);
55 @@ -2653,105 +2690,136 @@ static int make_request(request_queue_t
57 atomic_inc(&conf->reads_in);
60 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
61 last_sector = bi->bi_sector + (bi->bi_size>>9);
63 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
65 - for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
67 - int disks, data_disks;
70 - prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
71 - if (likely(conf->expand_progress == MaxSector))
72 - disks = conf->raid_disks;
74 - /* spinlock is needed as expand_progress may be
75 - * 64bit on a 32bit platform, and so it might be
76 - * possible to see a half-updated value
77 - * Ofcourse expand_progress could change after
78 - * the lock is dropped, so once we get a reference
79 - * to the stripe that we think it is, we will have
82 - spin_lock_irq(&conf->device_lock);
83 - disks = conf->raid_disks;
84 - if (logical_sector >= conf->expand_progress)
85 - disks = conf->previous_raid_disks;
87 - if (logical_sector >= conf->expand_lo) {
88 - spin_unlock_irq(&conf->device_lock);
93 - spin_unlock_irq(&conf->device_lock);
95 - data_disks = disks - conf->max_degraded;
96 + sectors = bi->bi_size >> 9;
97 + stripes_per_chunk = conf->chunk_size / STRIPE_SIZE;
99 - new_sector = raid5_compute_sector(logical_sector, disks, data_disks,
100 - &dd_idx, &pd_idx, conf);
101 - PRINTK("raid5: make_request, sector %llu logical %llu\n",
102 - (unsigned long long)new_sector,
103 - (unsigned long long)logical_sector);
105 + /* stripe by stripe handle needs a stable raid layout, so if this
106 + * reuqest covers the expanding region, wait it over.
107 + * Furthermore, we may get here with partial request handled, so
108 + * wait for the bi_phys_segment to be 1 also. -jay */
109 + spin_lock_irq(&conf->device_lock);
110 + wait_event_lock_irq(conf->wait_for_overlap,
111 + (bi->bi_phys_segments == 1) &&
112 + !raid5_expanding_overlap(conf, bi),
114 + (unplug_slaves(conf->mddev), atomic_inc(&conf->expanding_overlap)));
116 + disks = conf->raid_disks;
117 + if (unlikely(logical_sector >= conf->expand_progress))
118 + disks = conf->previous_raid_disks;
119 + data_disks = disks - conf->max_degraded;
120 + spin_unlock_irq(&conf->device_lock);
122 - sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));
124 - if (unlikely(conf->expand_progress != MaxSector)) {
125 - /* expansion might have moved on while waiting for a
126 - * stripe, so we must do the range check again.
127 - * Expansion could still move past after this
128 - * test, but as we are holding a reference to
129 - * 'sh', we know that if that happens,
130 - * STRIPE_EXPANDING will get set and the expansion
131 - * won't proceed until we finish with the stripe.
133 - int must_retry = 0;
134 - spin_lock_irq(&conf->device_lock);
135 - if (logical_sector < conf->expand_progress &&
136 - disks == conf->previous_raid_disks)
137 - /* mismatch, need to try again */
139 - spin_unlock_irq(&conf->device_lock);
141 - release_stripe(sh);
143 + /* compute the block # */
144 + sectors_per_stripe = STRIPE_SECTORS * data_disks;
145 + sectors_per_block = stripes_per_chunk * sectors_per_stripe;
147 + block = logical_sector & ~((sector_t)sectors_per_block - 1);
148 + sector_div(block, sectors_per_block);
151 + stripe = block * (sectors_per_block / data_disks);
152 + b_sector = stripe * data_disks;
153 + /* iterate through all stripes in this block,
154 + * where block is a set of internal stripes
155 + * which covers chunk */
157 + for (i = 0; i < stripes_per_chunk && sectors > 0; i++) {
158 + r_sector = b_sector + (i * STRIPE_SECTORS);
160 + /* iterrate through all pages in the stripe */
161 + for (j = 0; j < data_disks && sectors > 0; j++) {
164 + if (r_sector + STRIPE_SECTORS <= bi->bi_sector ||
165 + r_sector >= last_sector) {
166 + r_sector += sectors_per_chunk;
171 + prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
172 + new_sector = raid5_compute_sector(r_sector, disks,
173 + data_disks, &dd_idx,
176 + sh = get_active_stripe(conf, new_sector, disks, pd_idx,
177 + (bi->bi_rw&RWA_MASK));
179 + /* we're handling the bio stripe by stripe, so when we found
180 + * the raid layout has been changed, we have to redo the
181 + * whole bio because we don't which sectors in it has been
182 + * done, and which is not done. -jay */
183 + if (raid5_redo_bio(conf, bi, disks, logical_sector))
186 + if (test_bit(STRIPE_EXPANDING, &sh->state)) {
187 + /* Stripe is busy expanding or
188 + * add failed due to overlap. Flush everything
191 + release_stripe(sh);
193 + raid5_unplug_device(mddev->queue);
198 + /* cannot get stripe for read-ahead, just give-up */
199 + finish_wait(&conf->wait_for_overlap, &w);
200 + clear_bit(BIO_UPTODATE, &bi->bi_flags);
206 /* FIXME what if we get a false positive because these
209 - if (logical_sector >= mddev->suspend_lo &&
210 - logical_sector < mddev->suspend_hi) {
211 + if (r_sector >= mddev->suspend_lo &&
212 + r_sector < mddev->suspend_hi) {
213 + handle_stripe(sh, NULL);
220 - if (test_bit(STRIPE_EXPANDING, &sh->state) ||
221 - !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
222 - /* Stripe is busy expanding or
223 - * add failed due to overlap. Flush everything
226 - raid5_unplug_device(mddev->queue);
227 + if (!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
228 + handle_stripe(sh, NULL);
231 + raid5_unplug_device(mddev->queue);
235 finish_wait(&conf->wait_for_overlap, &w);
237 + BUG_ON (new_sector != stripe);
238 + sectors -= STRIPE_SECTORS;
239 + if (bi->bi_sector > r_sector)
240 + sectors += bi->bi_sector - r_sector;
241 + if (r_sector + STRIPE_SECTORS > last_sector)
242 + sectors += r_sector + STRIPE_SECTORS - last_sector;
243 + r_sector += sectors_per_chunk;
246 handle_stripe(sh, NULL);
249 - /* cannot get stripe for read-ahead, just give-up */
250 - clear_bit(BIO_UPTODATE, &bi->bi_flags);
251 - finish_wait(&conf->wait_for_overlap, &w);
256 + stripe += STRIPE_SECTORS;
262 spin_lock_irq(&conf->device_lock);
263 remaining = --bi->bi_phys_segments;
264 spin_unlock_irq(&conf->device_lock);
265 @@ -3439,6 +3507,8 @@ static void status (struct seq_file *seq
266 atomic_read(&conf->active_stripes),
267 atomic_read(&conf->in_reqs_in_queue),
268 atomic_read(&conf->out_reqs_in_queue));
269 + seq_printf (seq, "\t\t%u expanding overlap\n",
270 + atomic_read(&conf->expanding_overlap));
272 seq_printf (seq, "\n");
274 diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/include/linux/raid/raid5.h
275 --- linux-2.6.18-53.orig/include/linux/raid/raid5.h 2007-12-28 14:55:08.000000000 +0800
276 +++ linux-2.6.18-53/include/linux/raid/raid5.h 2007-12-28 18:09:37.000000000 +0800
277 @@ -278,6 +278,7 @@ struct raid5_private_data {
278 atomic_t bit_delayed;
279 atomic_t in_reqs_in_queue;
280 atomic_t out_reqs_in_queue;
281 + atomic_t expanding_overlap;
284 typedef struct raid5_private_data raid5_conf_t;