Whamcloud - gitweb
b=3119
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-o_direct-1.2.4.20-rh.patch
1
2 Index: linux-2.4.20-rh/fs/ext3/inode.c
3 ===================================================================
4 --- linux-2.4.20-rh.orig/fs/ext3/inode.c        2003-09-04 18:01:41.000000000 +0800
5 +++ linux-2.4.20-rh/fs/ext3/inode.c     2003-09-04 18:18:54.000000000 +0800
6 @@ -27,6 +27,7 @@
7  #include <linux/ext3_jbd.h>
8  #include <linux/jbd.h>
9  #include <linux/locks.h>
10 +#include <linux/iobuf.h>
11  #include <linux/smp_lock.h>
12  #include <linux/highuid.h>
13  #include <linux/quotaops.h>
14 @@ -743,9 +744,9 @@
15   * The BKL may not be held on entry here.  Be sure to take it early.
16   */
17  
18 -static int ext3_get_block_handle(handle_t *handle, struct inode *inode, 
19 -                                long iblock,
20 -                                struct buffer_head *bh_result, int create)
21 +static int
22 +ext3_get_block_handle(handle_t *handle, struct inode *inode, long iblock,
23 +               struct buffer_head *bh_result, int create, int extend_disksize)
24  {
25         int err = -EIO;
26         int offsets[4];
27 @@ -825,15 +826,18 @@
28         if (err)
29                 goto cleanup;
30  
31 -       new_size = inode->i_size;
32 -       /*
33 -        * This is not racy against ext3_truncate's modification of i_disksize
34 -        * because VM/VFS ensures that the file cannot be extended while
35 -        * truncate is in progress.  It is racy between multiple parallel
36 -        * instances of get_block, but we have the BKL.
37 -        */
38 -       if (new_size > inode->u.ext3_i.i_disksize)
39 -               inode->u.ext3_i.i_disksize = new_size;
40 +       if (extend_disksize) {
41 +               /*
42 +                * This is not racy against ext3_truncate's modification of
43 +                * i_disksize because VM/VFS ensures that the file cannot be
44 +                * extended while truncate is in progress.  It is racy between
45 +                * multiple parallel instances of get_block, but we have BKL.
46 +                */
47 +               struct ext3_inode_info *ei = EXT3_I(inode);
48 +               new_size = inode->i_size;
49 +               if (new_size > ei->i_disksize)
50 +                       ei->i_disksize = new_size;
51 +       }
52  
53         bh_result->b_state |= (1UL << BH_New);
54         goto got_it;
55 @@ -861,7 +865,38 @@
56                 handle = ext3_journal_current_handle();
57                 J_ASSERT(handle != 0);
58         }
59 -       ret = ext3_get_block_handle(handle, inode, iblock, bh_result, create);
60 +       ret = ext3_get_block_handle(handle, inode, iblock,
61 +                               bh_result, create, 1);
62 +       return ret;
63 +}
64 +
65 +#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
66 +
67 +static int
68 +ext3_direct_io_get_block(struct inode *inode, long iblock,
69 +               struct buffer_head *bh_result, int create)
70 +{
71 +       handle_t *handle = journal_current_handle();
72 +       int ret = 0;
73 +
74 +       lock_kernel();
75 +       if (handle && handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) {
76 +               /*
77 +                * Getting low on buffer credits...
78 +                */
79 +               if (!ext3_journal_extend(handle, DIO_CREDITS)) {
80 +                       /*
81 +                        * Couldn't extend the transaction.  Start a new one
82 +                        */
83 +                       ret = ext3_journal_restart(handle, DIO_CREDITS);
84 +               }
85 +       }
86 +       if (ret == 0)
87 +               ret = ext3_get_block_handle(handle, inode, iblock,
88 +                                       bh_result, create, 0);
89 +       if (ret == 0)
90 +               bh_result->b_size = (1 << inode->i_blkbits);
91 +       unlock_kernel();
92         return ret;
93  }
94  
95 @@ -879,7 +914,7 @@
96         dummy.b_state = 0;
97         dummy.b_blocknr = -1000;
98         buffer_trace_init(&dummy.b_history);
99 -       *errp = ext3_get_block_handle(handle, inode, block, &dummy, create);
100 +       *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
101         if (!*errp && buffer_mapped(&dummy)) {
102                 struct buffer_head *bh;
103                 bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
104 @@ -1387,6 +1422,67 @@
105         return journal_try_to_free_buffers(journal, page, wait);
106  }
107  
108 +static int
109 +ext3_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
110 +               unsigned long blocknr, int blocksize)
111 +{
112 +       struct ext3_inode_info *ei = EXT3_I(inode);
113 +       handle_t *handle = NULL;
114 +       int ret;
115 +       int orphan = 0;
116 +       loff_t offset = blocknr << inode->i_blkbits;    /* ugh */
117 +       ssize_t count = iobuf->length;                  /* ditto */
118 +
119 +       if (rw == WRITE) {
120 +               loff_t final_size = offset + count;
121 +
122 +               lock_kernel();
123 +               handle = ext3_journal_start(inode, DIO_CREDITS);
124 +               unlock_kernel();
125 +               if (IS_ERR(handle)) {
126 +                       ret = PTR_ERR(handle);
127 +                       goto out;
128 +               }
129 +               if (final_size > inode->i_size) {
130 +                       lock_kernel();
131 +                       ret = ext3_orphan_add(handle, inode);
132 +                       unlock_kernel();
133 +                       if (ret)
134 +                               goto out_stop;
135 +                       orphan = 1;
136 +                       ei->i_disksize = inode->i_size;
137 +               }
138 +       }
139 +
140 +       ret = generic_direct_IO(rw, inode, iobuf, blocknr,
141 +                               blocksize, ext3_direct_io_get_block);
142 +
143 +out_stop:
144 +       if (handle) {
145 +               int err;
146 +
147 +               lock_kernel();
148 +               if (orphan) 
149 +                       ext3_orphan_del(handle, inode);
150 +               if (orphan && ret > 0) {
151 +                       loff_t end = offset + ret;
152 +                       if (end > inode->i_size) {
153 +                               ei->i_disksize = end;
154 +                               inode->i_size = end;
155 +                               err = ext3_mark_inode_dirty(handle, inode);
156 +                               if (!ret) 
157 +                                       ret = err;
158 +                       }
159 +               }
160 +               err = ext3_journal_stop(handle, inode);
161 +               if (ret == 0)
162 +                       ret = err;
163 +               unlock_kernel();
164 +       }
165 +out:
166 +       return ret;
167 +
168 +}
169  
170  struct address_space_operations ext3_aops = {
171         readpage:       ext3_readpage,          /* BKL not held.  Don't need */
172 @@ -1397,6 +1493,7 @@
173         bmap:           ext3_bmap,              /* BKL held */
174         flushpage:      ext3_flushpage,         /* BKL not held.  Don't need */
175         releasepage:    ext3_releasepage,       /* BKL not held.  Don't need */
176 +       direct_IO:      ext3_direct_IO,         /* BKL not held.  Don't need */
177  };
178  
179  /*
180 @@ -2970,7 +3067,7 @@
181         /* alloc blocks one by one */
182         for (i = 0; i < nblocks; i++) {
183                 ret = ext3_get_block_handle(handle, inode, blocks[i],
184 -                                               &bh_tmp, 1);
185 +                                               &bh_tmp, 1, 1);
186                 if (ret)
187                         break;
188  
189 @@ -3030,7 +3127,7 @@
190                  if (blocks[i] != 0)
191                          continue;
192  
193 -                rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1);
194 +                rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1, 1);
195                  if (rc) {
196                          printk(KERN_INFO "ext3_map_inode_page: error %d "
197                                 "allocating block %ld\n", rc, iblock);