Whamcloud - gitweb
b=20533 set the default max_sectors to the raid5/6 stripe size
[fs/lustre-release.git] / lustre / kernel_patches / patches / raid6-serialize-ovelapping-reqs.patch
1 diff -pur linux-2.6.9.orig/drivers/md/raid6main.c linux-2.6.9/drivers/md/raid6main.c
2 --- linux-2.6.9.orig/drivers/md/raid6main.c     2008-01-10 13:55:37.000000000 +0800
3 +++ linux-2.6.9/drivers/md/raid6main.c  2008-01-10 13:55:56.000000000 +0800
4 @@ -749,6 +749,10 @@ static void compute_parity(struct stripe
5                         if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
6                                 chosen = sh->dev[i].towrite;
7                                 sh->dev[i].towrite = NULL;
8 +
9 +                               if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
10 +                                       wake_up(&conf->wait_for_overlap);
11 +
12                                 if (sh->dev[i].written) BUG();
13                                 sh->dev[i].written = chosen;
14                         }
15 @@ -907,7 +911,7 @@ static void compute_block_2(struct strip
16   * toread/towrite point to the first in a chain.
17   * The bi_next chain must be in order.
18   */
19 -static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
20 +static int add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
21  {
22         struct bio **bip;
23         raid6_conf_t *conf = sh->raid_conf;
24 @@ -924,10 +928,13 @@ static void add_stripe_bio (struct strip
25         else
26                 bip = &sh->dev[dd_idx].toread;
27         while (*bip && (*bip)->bi_sector < bi->bi_sector) {
28 -               BUG_ON((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector);
29 +               if((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector)
30 +                       goto overlap;
31                 bip = & (*bip)->bi_next;
32         }
33 -/* FIXME do I need to worry about overlapping bion */
34 +       if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9))
35 +               goto overlap;
36 +
37         if (*bip && bi->bi_next && (*bip) != bi->bi_next)
38                 BUG();
39         if (*bip)
40 @@ -954,6 +961,14 @@ static void add_stripe_bio (struct strip
41                 if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
42                         set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
43         }
44 +
45 +       return 1;
46 +
47 +overlap:
48 +       set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
49 +       spin_unlock_irq(&conf->device_lock);
50 +       spin_unlock(&sh->lock);
51 +       return 0;
52  }
53  
54  /*
55 @@ -1038,6 +1053,9 @@ static void handle_stripe(struct stripe_
56                         spin_lock_irq(&conf->device_lock);
57                         rbi = dev->toread;
58                         dev->toread = NULL;
59 +
60 +                       if (test_and_clear_bit(R5_Overlap, &dev->flags))
61 +                               wake_up(&conf->wait_for_overlap);
62                         spin_unlock_irq(&conf->device_lock);
63                         while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
64                                 copy_data(0, rbi, dev->page, dev->sector);
65 @@ -1087,6 +1105,9 @@ static void handle_stripe(struct stripe_
66                         sh->dev[i].towrite = NULL;
67                         if (bi) to_write--;
68  
69 +                       if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
70 +                               wake_up(&conf->wait_for_overlap);
71 +
72                         while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
73                                 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
74                                 clear_bit(BIO_UPTODATE, &bi->bi_flags);
75 @@ -1115,6 +1136,8 @@ static void handle_stripe(struct stripe_
76                         if (!test_bit(R5_Insync, &sh->dev[i].flags)) {
77                                 bi = sh->dev[i].toread;
78                                 sh->dev[i].toread = NULL;
79 +                               if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
80 +                                       wake_up(&conf->wait_for_overlap);
81                                 if (bi) to_read--;
82                                 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
83                                         struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
84 @@ -1648,6 +1671,8 @@ static int make_request (request_queue_t
85                 sh = NULL;
86                 /* iterrate through all pages in the stripe */
87                 for (j = 0; j < data_disks && sectors > 0; j++) {
88 +                       DEFINE_WAIT(w);
89 +
90                         if (r_sector + STRIPE_SECTORS <= bi->bi_sector ||
91                             r_sector >= last_sector) {
92                                 r_sector += sectors_per_chunk;
93 @@ -1656,6 +1681,9 @@ static int make_request (request_queue_t
94                         new_sector = raid6_compute_sector(r_sector, raid_disks,
95                                                         data_disks, &dd_idx, 
96                                                         &pd_idx, conf);
97 +
98 +retry:
99 +                       prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
100                         if (sh == NULL) {
101                                 /* first, try to get stripe w/o blocking
102                                  * if we can't, then it's time to submit
103 @@ -1668,10 +1696,18 @@ static int make_request (request_queue_t
104                                 }
105                         }
106                         if (sh) {
107 -                               add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK));
108 +                               if(!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
109 +                                       /* Failed to be added due to overlapped. */
110 +                                       raid6_unplug_device(mddev->queue);
111 +                                       release_stripe(sh);
112 +                                       schedule();
113 +                                       goto retry;
114 +                               }
115 +                               finish_wait(&conf->wait_for_overlap, &w);
116                         } else {
117                                 /* cannot get stripe for read-ahead, just give-up */
118                                 clear_bit(BIO_UPTODATE, &bi->bi_flags);
119 +                               finish_wait(&conf->wait_for_overlap, &w);
120                                 sectors = 0;
121                                 break;
122                         }
123 @@ -1847,6 +1883,7 @@ static int run (mddev_t *mddev)
124  
125         conf->device_lock = SPIN_LOCK_UNLOCKED;
126         init_waitqueue_head(&conf->wait_for_stripe);
127 +       init_waitqueue_head(&conf->wait_for_overlap);
128         INIT_LIST_HEAD(&conf->handle_list);
129         INIT_LIST_HEAD(&conf->delayed_list);
130         INIT_LIST_HEAD(&conf->inactive_list);
131 diff -pur linux-2.6.9.orig/include/linux/raid/raid5.h linux-2.6.9/include/linux/raid/raid5.h
132 --- linux-2.6.9.orig/include/linux/raid/raid5.h 2008-01-10 13:46:05.000000000 +0800
133 +++ linux-2.6.9/include/linux/raid/raid5.h      2008-01-10 13:55:56.000000000 +0800
134 @@ -154,6 +154,8 @@ struct stripe_head {
135  #define        R5_Wantwrite    5
136  #define        R5_Syncio       6       /* this io need to be accounted as resync io */
137  #define        R5_Direct       7       /* use page from passed bio to avoid memcpy */
138 +#define        R5_Overlap      8       /* There is a pending overlapping request 
139 +                                        * on this block */
140  
141  /*
142   * Write method
143 @@ -221,6 +223,7 @@ struct raid5_private_data {
144         atomic_t                active_stripes;
145         struct list_head        inactive_list;
146         wait_queue_head_t       wait_for_stripe;
147 +       wait_queue_head_t       wait_for_overlap;
148         int                     inactive_blocked;       /* release of inactive stripes blocked,
149                                                          * waiting for 25% to be free
150                                                          */