Whamcloud - gitweb
b=24214 Discard preallocation blocks after failed allocated.
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext3-mballoc-pa_free-mismatch.patch
1 Index: linux-stage/fs/ext3/mballoc.c
2 ===================================================================
3 --- linux-stage.orig/fs/ext3/mballoc.c  2010-01-26 23:36:25.000000000 +0800
4 +++ linux-stage/fs/ext3/mballoc.c       2010-01-26 23:42:57.000000000 +0800
5 @@ -35,6 +35,7 @@
6  #include <linux/pagemap.h>
7  #include <linux/seq_file.h>
8  #include <linux/version.h>
9 +#include <linux/genhd.h>
10  
11  #include "group.h"
12  
13 @@ -360,6 +361,7 @@
14         unsigned short          pa_free;        /* how many blocks are free */
15         unsigned short          pa_linear;      /* consumed in one direction
16                                                  * strictly, for group prealloc */
17 +       unsigned short          pa_error;
18         spinlock_t              *pa_obj_lock;
19         struct inode            *pa_inode;      /* hack, for history only */
20  };
21 @@ -3558,6 +3560,7 @@
22         spin_lock_init(&pa->pa_lock);
23         pa->pa_deleted = 0;
24         pa->pa_linear = 0;
25 +       pa->pa_error = 0;
26  
27         mb_debug("new inode pa %p: %lu/%lu for %lu\n", pa,
28                         pa->pa_pstart, pa->pa_len, pa->pa_lstart);
29 @@ -3615,6 +3618,7 @@
30         spin_lock_init(&pa->pa_lock);
31         pa->pa_deleted = 0;
32         pa->pa_linear = 1;
33 +       pa->pa_error = 0;
34  
35         mb_debug("new group pa %p: %lu/%lu for %lu\n", pa,
36                         pa->pa_pstart, pa->pa_len, pa->pa_lstart);
37 @@ -3671,7 +3675,10 @@
38         sector_t start;
39         int err = 0, free = 0;
40  
41 +       BUG_ON(!ext3_is_group_locked(sb, e3b->bd_group));
42         BUG_ON(pa->pa_deleted == 0);
43 +       BUG_ON(pa->pa_linear != 0);
44 +       BUG_ON(pa->pa_inode == NULL);
45         ext3_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
46         BUG_ON(group != e3b->bd_group && pa->pa_len != 0);
47         end = bit + pa->pa_len;
48 @@ -3704,14 +3711,19 @@
49                 mb_free_blocks(pa->pa_inode, e3b, bit, next - bit);
50                 bit = next + 1;
51         }
52 -       if (free != pa->pa_free) {
53 -               printk("pa %p: logic %lu, phys. %lu, len %lu\n",
54 -                       pa, (unsigned long) pa->pa_lstart,
55 -                       (unsigned long) pa->pa_pstart,
56 -                       (unsigned long) pa->pa_len);
57 -               printk("free %u, pa_free %u\n", free, pa->pa_free);
58 -       }
59 -       BUG_ON(free != pa->pa_free);
60 +
61 +       /* "free < pa->pa_free" means we maybe double alloc the same blocks,
62 +        * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
63 +       if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free))
64 +               ext3_error(sb, __FUNCTION__, "pa free mismatch: [pa %p] "
65 +                             "[phy %lu] [logic %lu] [len %u] [free %u] "
66 +                             "[error %u] [inode %lu] [freed %u]", pa,
67 +                             (unsigned long)pa->pa_pstart,
68 +                             (unsigned long)pa->pa_lstart,
69 +                             (unsigned)pa->pa_len, (unsigned)pa->pa_free,
70 +                             (unsigned)pa->pa_error, pa->pa_inode->i_ino,
71 +                             free);
72 +       BUG_ON(pa->pa_free != free);
73         atomic_add(free, &sbi->s_mb_discarded);
74  
75         return err;
76 @@ -4189,6 +4201,19 @@
77                 *errp = -EDQUOT;
78                 return 0;
79         }
80 +
81 +       if (dev_check_rdonly(sb->s_bdev)) {
82 +               struct block_device *bdev = sb->s_bdev;
83 +
84 +               printk(KERN_WARNING "Alloc from readonly device %s (%#x): "
85 +                      "[inode %lu] [logic %lu] [goal %lu] [ll %lu] [pl %lu] "
86 +                      "[lr %lu] [pr %lu] [len %lu] [flags %lu]\n",
87 +                      bdev->bd_disk ? bdev->bd_disk->disk_name : "",
88 +                      bdev->bd_dev, ar->inode->i_ino, ar->logical, ar->goal,
89 +                      ar->lleft, ar->pleft, ar->lright, ar->pright, ar->len,
90 +                      ar->flags);
91 +       }
92 +
93         inquota = ar->len;
94  
95         ext3_mb_poll_new_transaction(sb, handle);
96 @@ -4217,10 +4242,34 @@
97         }
98  
99         if (likely(ac.ac_status == AC_STATUS_FOUND)) {
100 -               ext3_mb_mark_diskspace_used(&ac, handle);
101 -               *errp = 0;
102 -               block = ext3_grp_offs_to_block(sb, &ac.ac_b_ex);
103 -               ar->len = ac.ac_b_ex.fe_len;
104 +               *errp = ext3_mb_mark_diskspace_used(&ac, handle);
105 +               if (!*errp) {
106 +                       block = ext3_grp_offs_to_block(sb, &ac.ac_b_ex);
107 +                       ar->len = ac.ac_b_ex.fe_len;
108 +               } else {
109 +                        ac.ac_b_ex.fe_len = 0;
110 +                        ar->len = 0;
111 +                        ext3_mb_show_ac(&ac);
112 +                        if (ac.ac_pa) {
113 +                                struct ext3_prealloc_space *pa = ac.ac_pa;
114 +
115 +                               /* We can not make sure whether the bitmap has
116 +                                * been updated or not when fail case. So can
117 +                                * not revert pa_free back, just mark pa_error*/
118 +                               pa->pa_error++;
119 +                                ext3_error(sb, __FUNCTION__,
120 +                                             "Updating bitmap error: [err %d] "
121 +                                             "[pa %p] [phy %lu] [logic %lu] "
122 +                                             "[len %u] [free %u] [error %u] "
123 +                                             "[inode %lu]", *errp, pa,
124 +                                             (unsigned long)pa->pa_pstart,
125 +                                             (unsigned long)pa->pa_lstart,
126 +                                             (unsigned)pa->pa_len,
127 +                                             (unsigned)pa->pa_free,
128 +                                             (unsigned)pa->pa_error,
129 +                                       pa->pa_inode ? pa->pa_inode->i_ino : 0);
130 +                        }
131 +               }
132         } else {
133                 freed  = ext3_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len);
134                 if (freed)
135 @@ -4388,6 +4437,15 @@
136                 goto error_return;
137         }
138  
139 +       if (dev_check_rdonly(sb->s_bdev)) {
140 +               struct block_device *bdev = sb->s_bdev;
141 +
142 +               printk(KERN_WARNING "Release to readonly device %s (%#x): "
143 +                      "[inode %lu] [block %lu] [count %lu] [is_meta %d]\n",
144 +                      bdev->bd_disk ? bdev->bd_disk->disk_name : "",
145 +                      bdev->bd_dev, inode->i_ino, block, count, metadata);
146 +       }
147 +
148         ext3_debug("freeing block %lu\n", block);
149  
150         ac.ac_op = EXT3_MB_HISTORY_FREE;