Whamcloud - gitweb
b=18649 set wait_recovery_complete() MAX value to max recovery time estimated
[fs/lustre-release.git] / lustre / kernel_patches / patches / raid5-stripe-by-stripe-handling.patch
1 Helps to avoid unnesessary reads if request covers full stripe.
2
3 Note that reads needed to update parity hurt performance badly
4
5 Index: linux-2.6.9/drivers/md/raid5.c
6 ===================================================================
7 --- linux-2.6.9.orig/drivers/md/raid5.c 2006-05-22 00:09:56.000000000 +0400
8 +++ linux-2.6.9/drivers/md/raid5.c      2006-05-22 00:10:01.000000000 +0400
9 @@ -1412,6 +1412,11 @@ static int make_request (request_queue_t
10         sector_t new_sector;
11         sector_t logical_sector, last_sector;
12         struct stripe_head *sh;
13 +       sector_t stripe, sectors, block, r_sector, b_sector;
14 +       int sectors_per_chunk = conf->chunk_size >> 9;
15 +       int stripes_per_chunk, sectors_per_block;
16 +       int sectors_per_stripe;
17 +       int i, j;
18  
19         atomic_inc(&conf->in_reqs_in_queue);
20  
21 @@ -1431,30 +1436,66 @@ static int make_request (request_queue_t
22         bi->bi_phys_segments = 1;       /* over-loaded to count active stripes */
23         if ( bio_data_dir(bi) == WRITE )
24                 md_write_start(mddev);
25 -       for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
26 -               
27 -               new_sector = raid5_compute_sector(logical_sector,
28 -                                                 raid_disks, data_disks, &dd_idx, &pd_idx, conf);
29 -
30 -               PRINTK("raid5: make_request, sector %Lu logical %Lu\n",
31 -                       (unsigned long long)new_sector, 
32 -                       (unsigned long long)logical_sector);
33  
34 -               sh = get_active_stripe(conf, new_sector, pd_idx, (bi->bi_rw&RWA_MASK));
35 +       stripes_per_chunk = conf->chunk_size / STRIPE_SIZE;
36 +       sectors_per_stripe = STRIPE_SECTORS * data_disks;
37 +       sectors_per_block = stripes_per_chunk * sectors_per_stripe;
38 +
39 +       block = logical_sector & ~((sector_t)sectors_per_block - 1);
40 +       sector_div(block, sectors_per_block);
41 +       sectors = bi->bi_size >> 9;
42 +
43 +repeat:
44 +       stripe = block * (sectors_per_block / data_disks);
45 +       b_sector = stripe * data_disks;
46 +       /* iterate through all stripes in this block,
47 +        * where block is a set of internal stripes
48 +        * which covers chunk */
49 +       for (i = 0; i < stripes_per_chunk && sectors > 0; i++) {
50 +               r_sector = b_sector + (i * STRIPE_SECTORS);
51 +               sh = NULL;
52 +               /* iterrate through all pages in the stripe */
53 +               for (j = 0; j < data_disks && sectors > 0; j++) {
54 +                       if (r_sector + STRIPE_SECTORS <= bi->bi_sector ||
55 +                                       r_sector >= last_sector) {
56 +                               r_sector += sectors_per_chunk;
57 +                               continue;
58 +                       }
59 +                       new_sector = raid5_compute_sector(r_sector, raid_disks,
60 +                                                       data_disks, &dd_idx, 
61 +                                                       &pd_idx, conf);
62 +                       if (sh == NULL)
63 +                               sh = get_active_stripe(conf, new_sector, pd_idx,
64 +                                                       (bi->bi_rw&RWA_MASK));
65 +                       if (sh) {
66 +                               add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK));
67 +                       } else {
68 +                               /* cannot get stripe for read-ahead, just give-up */
69 +                               clear_bit(BIO_UPTODATE, &bi->bi_flags);
70 +                               sectors = 0;
71 +                               break;
72 +                       }
73 +
74 +                       BUG_ON (new_sector != stripe);
75 +                       sectors -= STRIPE_SECTORS;
76 +                       if (bi->bi_sector > r_sector)
77 +                               sectors += bi->bi_sector - r_sector;
78 +                       if (r_sector + STRIPE_SECTORS > last_sector)
79 +                               sectors += r_sector + STRIPE_SECTORS - last_sector;
80 +                       r_sector += sectors_per_chunk;
81 +               }
82                 if (sh) {
83 -
84 -                       add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK));
85 -
86                         raid5_plug_device(conf);
87                         handle_stripe(sh);
88                         release_stripe(sh);
89 -               } else {
90 -                       /* cannot get stripe for read-ahead, just give-up */
91 -                       clear_bit(BIO_UPTODATE, &bi->bi_flags);
92 -                       break;
93 +                       sh = NULL;
94                 }
95 -                       
96 +               stripe += STRIPE_SECTORS;
97         }
98 +       block++; 
99 +       if (sectors > 0)
100 +               goto repeat;
101 +
102         spin_lock_irq(&conf->device_lock);
103         if (--bi->bi_phys_segments == 0) {
104                 int bytes = bi->bi_size;