Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-noread-2.4.21-chaos.patch
1  fs/ext3/ialloc.c        |   47 ++++++++++++++++++++++-
2  fs/ext3/inode.c         |   96 +++++++++++++++++++++++++++++++++++++-----------
3  include/linux/ext3_fs.h |    2 +
4  3 files changed, 121 insertions(+), 24 deletions(-)
5
6 Index: linux-2.4.21-chaos/fs/ext3/ialloc.c
7 ===================================================================
8 --- linux-2.4.21-chaos.orig/fs/ext3/ialloc.c    2003-12-12 12:56:39.000000000 +0300
9 +++ linux-2.4.21-chaos/fs/ext3/ialloc.c 2003-12-12 13:21:50.000000000 +0300
10 @@ -290,6 +290,37 @@
11  }
12  
13  /*
14 + * @block_group: block group of inode
15 + * @offset: relative offset of inode within @block_group
16 + *
17 + * Check whether any of the inodes in this disk block are in use.
18 + *
19 + * Caller must be holding superblock lock (group/bitmap read lock in future).
20 + */
21 +int ext3_itable_block_used(struct super_block *sb, unsigned int block_group,
22 +                          int offset)
23 +{
24 +       int bitmap_nr = load_inode_bitmap(sb, block_group);
25 +       int inodes_per_block;
26 +       unsigned long inum, iend;
27 +       struct buffer_head *ibitmap;
28 +
29 +       if (bitmap_nr < 0)
30 +               return 1;
31 +
32 +       inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size;
33 +       inum = offset & ~(inodes_per_block - 1);
34 +       iend = inum + inodes_per_block;
35 +       ibitmap = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr];
36 +       for (; inum < iend; inum++) {
37 +               if (inum != offset && ext3_test_bit(inum, ibitmap->b_data))
38 +                       return 1;
39 +       }
40 +
41 +       return 0;
42 +}
43 +
44 +/*
45   * There are two policies for allocating an inode.  If the new inode is
46   * a directory, then a forward search is made for a block group with both
47   * free space and a low directory-to-inode ratio; if that fails, then of
48 @@ -312,6 +343,7 @@
49         struct ext3_group_desc * gdp;
50         struct ext3_group_desc * tmp;
51         struct ext3_super_block * es;
52 +       struct ext3_iloc iloc;
53         int err = 0;
54  
55         /* Cannot create files in a deleted directory */
56 @@ -513,8 +545,19 @@
57         inode->i_generation = sbi->s_next_generation++;
58  
59         ei->i_state = EXT3_STATE_NEW;
60 -       err = ext3_mark_inode_dirty(handle, inode);
61 -       if (err) goto fail;
62 +       err = ext3_get_inode_loc_new(inode, &iloc, 1);
63 +       if (err) goto fail;
64 +       BUFFER_TRACE(iloc->bh, "get_write_access");
65 +       err = ext3_journal_get_write_access(handle, iloc.bh);
66 +       if (err) {
67 +               brelse(iloc.bh);
68 +               iloc.bh = NULL;
69 +               goto fail;
70 +       }
71 +       err = ext3_mark_iloc_dirty(handle, inode, &iloc);
72 +       if (err) goto fail;
73
74 +
75  
76  #ifdef CONFIG_EXT3_FS_XATTR
77         init_rwsem(&EXT3_I(inode)->xattr_sem);
78 Index: linux-2.4.21-chaos/fs/ext3/inode.c
79 ===================================================================
80 --- linux-2.4.21-chaos.orig/fs/ext3/inode.c     2003-12-12 13:01:48.000000000 +0300
81 +++ linux-2.4.21-chaos/fs/ext3/inode.c  2003-12-12 13:22:45.000000000 +0300
82 @@ -2291,16 +2291,21 @@
83  }
84  #endif /* EXT3_DELETE_THREAD */
85  
86 -/* 
87 - * ext3_get_inode_loc returns with an extra refcount against the
88 - * inode's underlying buffer_head on success. 
89 - */
90 -
91 -int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc)
92 +#define NUM_INODE_PREREAD      16
93 +
94 +/*
95 + * ext3_get_inode_loc returns with an extra refcount against the inode's
96 + * underlying buffer_head on success.  If this is for a new inode allocation
97 + * (new is non-zero) then we may be able to optimize away the read if there
98 + * are no other in-use inodes in this inode table block.  If we need to do
99 + * a read, then read in a whole chunk of blocks to avoid blocking again soon
100 + * if we are doing lots of creates/updates.
101 + */
102 +int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new)
103  {
104         struct super_block *sb = inode->i_sb;
105         struct ext3_sb_info *sbi = EXT3_SB(sb);
106 -       struct buffer_head *bh = 0;
107 +       struct buffer_head *bh[NUM_INODE_PREREAD];
108         unsigned long block;
109         unsigned long block_group;
110         unsigned long group_desc;
111 @@ -2322,30 +2327,72 @@
112         }
113         group_desc = block_group >> sbi->s_desc_per_block_bits;
114         desc = block_group & (sbi->s_desc_per_block - 1);
115 -       bh = sbi->s_group_desc[group_desc];
116 -       if (!bh) {
117 +       if (!(sbi->s_group_desc[group_desc])) {
118                 ext3_error(sb, __FUNCTION__, "Descriptor not loaded");
119                 goto bad_inode;
120         }
121  
122 -       gdp = (struct ext3_group_desc *) bh->b_data;
123 +       gdp = (struct ext3_group_desc *)(sbi->s_group_desc[group_desc]->b_data);
124         /*
125          * Figure out the offset within the block group inode table
126          */
127 -       offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group) *
128 -               sbi->s_inode_size;
129 +       offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb));
130 +
131         block = le32_to_cpu(gdp[desc].bg_inode_table) +
132 -               (offset >> EXT3_BLOCK_SIZE_BITS(sb));
133 -       if (!(bh = sb_bread(sb, block))) {
134 -               ext3_error (sb, __FUNCTION__,
135 -                           "unable to read inode block - "
136 -                           "inode=%lu, block=%lu", inode->i_ino, block);
137 -               goto bad_inode;
138 -       }
139 -       offset &= (EXT3_BLOCK_SIZE(sb) - 1);
140 +               (offset * EXT3_INODE_SIZE(sb) >> EXT3_BLOCK_SIZE_BITS(sb));
141  
142 -       iloc->bh = bh;
143 -       iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset);
144 +       bh[0] = sb_getblk(inode->i_sb, block);
145 +       if (buffer_uptodate(bh[0]))
146 +               goto done;
147
148 +       /* If we don't really need to read this block, and it isn't already
149 +        * in memory, then we just zero it out.  Otherwise, we keep the
150 +        * current block contents (deleted inode data) for posterity.
151 +        */
152 +       if (new && !ext3_itable_block_used(inode->i_sb, block_group, offset)) {
153 +               lock_buffer(bh[0]);
154 +               memset(bh[0]->b_data, 0, bh[0]->b_size);
155 +               mark_buffer_uptodate(bh[0], 1);
156 +               unlock_buffer(bh[0]);
157 +       } else {
158 +               unsigned long block_end, itable_end;
159 +               int count = 1;
160
161 +               itable_end = le32_to_cpu(gdp[desc].bg_inode_table) +
162 +                               inode->i_sb->u.ext3_sb.s_itb_per_group;
163 +               block_end = block + NUM_INODE_PREREAD;
164 +               if (block_end > itable_end)
165 +                       block_end = itable_end;
166 +
167 +               for (++block; block < block_end; block++) {
168 +                       bh[count] = sb_getblk(inode->i_sb, block);
169 +                       if (count && (buffer_uptodate(bh[count]) ||
170 +                                     buffer_locked(bh[count]))) {
171 +                               __brelse(bh[count]);
172 +                       } else
173 +                               count++;
174 +               }
175
176 +               ll_rw_block(READ, count, bh);
177
178 +               /* Release all but the block we actually need (bh[0]) */
179 +               while (--count > 0)
180 +                       __brelse(bh[count]);
181
182 +               wait_on_buffer(bh[0]);
183 +               if (!buffer_uptodate(bh[0])) {
184 +                       ext3_error(inode->i_sb, __FUNCTION__,
185 +                                  "unable to read inode block - "
186 +                                  "inode=%lu, block=%lu", inode->i_ino,
187 +                                  bh[0]->b_blocknr);
188 +                       goto bad_inode;
189 +               }
190 +       }
191 + done:
192 +       offset = (offset * EXT3_INODE_SIZE(inode->i_sb)) & (EXT3_BLOCK_SIZE(inode->i_sb) - 1);
193 +
194 +       iloc->bh = bh[0];
195 +       iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset);
196         iloc->block_group = block_group;
197         
198         return 0;
199 @@ -2370,6 +2417,11 @@
200  }
201  
202  
203 +int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
204 +{
205 +       return ext3_get_inode_loc_new(inode, iloc, 0);
206 +}
207
208  void ext3_read_inode(struct inode * inode)
209  {
210         struct ext3_iloc iloc;
211 Index: linux-2.4.21-chaos/include/linux/ext3_fs.h
212 ===================================================================
213 --- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h     2003-12-12 13:01:48.000000000 +0300
214 +++ linux-2.4.21-chaos/include/linux/ext3_fs.h  2003-12-12 13:21:50.000000000 +0300
215 @@ -683,6 +683,8 @@
216  extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
217  extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
218  
219 +extern int ext3_itable_block_used(struct super_block *sb, unsigned int, int);
220 +extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int);
221  extern int  ext3_get_inode_loc (struct inode *, struct ext3_iloc *);
222  extern void ext3_read_inode (struct inode *);
223  extern void ext3_write_inode (struct inode *, int);