Whamcloud - gitweb
32adb9f651ef7397f7d1c8c9f63ebadaf73c7414
[fs/lustre-release.git] / lustre / obdfilter / filter_io_24.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  linux/fs/obdfilter/filter_io.c
5  *
6  *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
7  *   Author: Peter Braam <braam@clusterfs.com>
8  *   Author: Andreas Dilger <adilger@clusterfs.com>
9  *   Author: Phil Schwan <phil@clusterfs.com>
10  *
11  *   This file is part of Lustre, http://www.lustre.org.
12  *
13  *   Lustre is free software; you can redistribute it and/or
14  *   modify it under the terms of version 2 of the GNU General Public
15  *   License as published by the Free Software Foundation.
16  *
17  *   Lustre is distributed in the hope that it will be useful,
18  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  *   GNU General Public License for more details.
21  *
22  *   You should have received a copy of the GNU General Public License
23  *   along with Lustre; if not, write to the Free Software
24  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26
27 #include <linux/config.h>
28 #include <linux/module.h>
29 #include <linux/pagemap.h> // XXX kill me soon
30 #include <linux/version.h>
31
32 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
33
34 #define DEBUG_SUBSYSTEM S_FILTER
35
36 #include <linux/iobuf.h>
37 #include <linux/locks.h>
38
39 #include <linux/obd_class.h>
40 #include <linux/lustre_fsfilt.h>
41 #include "filter_internal.h"
42
43
44 /* We should only change the file mtime (and not the ctime, like
45  * update_inode_times() in generic_file_write()) when we only change data. */
46 void inode_update_time(struct inode *inode, int ctime_too)
47 {
48         time_t now = CURRENT_TIME;
49         if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
50                 return;
51         inode->i_mtime = now;
52         if (ctime_too)
53                 inode->i_ctime = now;
54         mark_inode_dirty_sync(inode);
55 }
56
57 /* Bug 2254 -- this is better done in ext3_map_inode_page, but this
58  * workaround will suffice until everyone has upgraded their kernels */
59 static void check_pending_bhs(unsigned long *blocks, int nr_pages, dev_t dev,
60                               int size)
61 {
62 #if (LUSTRE_KERNEL_VERSION < 32)
63         struct buffer_head *bh;
64         int i;
65
66         for (i = 0; i < nr_pages; i++) {
67                 bh = get_hash_table(dev, blocks[i], size);
68                 if (bh == NULL)
69                         continue;
70                 if (!buffer_dirty(bh)) {
71                         put_bh(bh);
72                         continue;
73                 }
74                 mark_buffer_clean(bh);
75                 wait_on_buffer(bh);
76                 clear_bit(BH_Req, &bh->b_state);
77                 __brelse(bh);
78         }
79 #endif
80 }
81
82 /* Must be called with i_sem taken; this will drop it */
83 static int filter_direct_io(int rw, struct dentry *dchild, struct kiobuf *iobuf,
84                             struct obd_export *exp, struct iattr *attr,
85                             struct obd_trans_info *oti, void **wait_handle)
86 {
87         struct obd_device *obd = exp->exp_obd;
88         struct inode *inode = dchild->d_inode;
89         struct page *page;
90         unsigned long *b = iobuf->blocks;
91         int rc, i, create = (rw == OBD_BRW_WRITE), blocks_per_page;
92         int *cr, cleanup_phase = 0, *created = NULL;
93         int committed = 0;
94         ENTRY;
95
96         blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
97         if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS)
98                 GOTO(cleanup, rc = -EINVAL);
99
100         OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page);
101         if (created == NULL)
102                 GOTO(cleanup, rc = -ENOMEM);
103         cleanup_phase = 1;
104
105         rc = lock_kiovec(1, &iobuf, 1);
106         if (rc < 0)
107                 GOTO(cleanup, rc);
108         cleanup_phase = 2;
109
110         down(&exp->exp_obd->u.filter.fo_alloc_lock);
111         for (i = 0, cr = created, b = iobuf->blocks; i < iobuf->nr_pages; i++){
112                 page = iobuf->maplist[i];
113
114                 rc = fsfilt_map_inode_page(obd, inode, page, b, cr, create);
115                 if (rc) {
116                         CERROR("ino %lu, blk %lu cr %u create %d: rc %d\n",
117                                inode->i_ino, *b, *cr, create, rc);
118                         up(&exp->exp_obd->u.filter.fo_alloc_lock);
119                         GOTO(cleanup, rc);
120                 }
121
122                 b += blocks_per_page;
123                 cr += blocks_per_page;
124         }
125         up(&exp->exp_obd->u.filter.fo_alloc_lock);
126
127         filter_tally_write(&obd->u.filter, iobuf->maplist, iobuf->nr_pages,
128                            iobuf->blocks, blocks_per_page);
129
130         if (attr->ia_size > inode->i_size)
131                 attr->ia_valid |= ATTR_SIZE;
132         rc = fsfilt_setattr(obd, dchild, oti->oti_handle, attr, 0);
133         if (rc)
134                 GOTO(cleanup, rc);
135
136         up(&inode->i_sem);
137         cleanup_phase = 3;
138
139         rc = filter_finish_transno(exp, oti, 0);
140         if (rc)
141                 GOTO(cleanup, rc);
142
143         rc = fsfilt_commit_async(obd, inode, oti->oti_handle, wait_handle);
144         oti->oti_handle = NULL;
145         committed = 1;
146         if (rc)
147                 GOTO(cleanup, rc);
148
149         check_pending_bhs(iobuf->blocks, iobuf->nr_pages, inode->i_dev,
150                           1 << inode->i_blkbits);
151
152         rc = filemap_fdatasync(inode->i_mapping);
153         if (rc == 0)
154                 rc = fsync_inode_data_buffers(inode);
155         if (rc == 0)
156                 rc = filemap_fdatawait(inode->i_mapping);
157         if (rc < 0)
158                 GOTO(cleanup, rc);
159
160         rc = brw_kiovec(WRITE, 1, &iobuf, inode->i_dev, iobuf->blocks,
161                         1 << inode->i_blkbits);
162         CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n",
163                iobuf->nr_pages, rc);
164         if (rc != (1 << inode->i_blkbits) * iobuf->nr_pages * blocks_per_page)
165                 CERROR("short write?  expected %d, wrote %d\n",
166                        (1 << inode->i_blkbits) * iobuf->nr_pages *
167                        blocks_per_page, rc);
168         if (rc > 0)
169                 rc = 0;
170
171         EXIT;
172 cleanup:
173         if (!committed) {
174                 int err = fsfilt_commit_async(obd, inode,
175                                               oti->oti_handle, wait_handle);
176                 oti->oti_handle = NULL;
177                 if (err)
178                         CERROR("can't close transaction: %d\n", err);
179                 /*
180                  * this is error path, so we prefer to return
181                  * original error, not this one
182                  */
183         }
184
185         switch(cleanup_phase) {
186         case 3:
187         case 2:
188                 unlock_kiovec(1, &iobuf);
189         case 1:
190                 OBD_FREE(created, sizeof(*created) *
191                          iobuf->nr_pages*blocks_per_page);
192         case 0:
193                 if (cleanup_phase == 3)
194                         break;
195                 up(&inode->i_sem);
196                 break;
197         default:
198                 CERROR("corrupt cleanup_phase (%d)?\n", cleanup_phase);
199                 LBUG();
200                 break;
201         }
202         return rc;
203 }
204
205 int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
206                           struct obd_ioobj *obj, int niocount,
207                           struct niobuf_local *res, struct obd_trans_info *oti)
208 {
209         struct obd_device *obd = exp->exp_obd;
210         struct obd_run_ctxt saved;
211         struct niobuf_local *lnb;
212         struct fsfilt_objinfo fso;
213         struct iattr iattr = { 0 };
214         struct kiobuf *iobuf;
215         struct inode *inode = NULL;
216         int rc = 0, i, cleanup_phase = 0, err;
217         unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */
218         void *wait_handle;
219         ENTRY;
220         LASSERT(oti != NULL);
221         LASSERT(objcount == 1);
222         LASSERT(current->journal_info == NULL);
223
224         rc = alloc_kiovec(1, &iobuf);
225         if (rc)
226                 GOTO(cleanup, rc);
227         cleanup_phase = 1;
228
229 #if (LINUX_VERSION_CODE == KERNEL_VERSION(2,4,18))
230         iobuf->dovary = 0; /* this prevents corruption, not present in 2.4.20 */
231 #endif
232         rc = expand_kiobuf(iobuf, obj->ioo_bufcnt);
233         if (rc)
234                 GOTO(cleanup, rc);
235
236         iobuf->offset = 0;
237         iobuf->length = PAGE_SIZE * obj->ioo_bufcnt;
238         iobuf->nr_pages = obj->ioo_bufcnt;
239
240         cleanup_phase = 1;
241         fso.fso_dentry = res->dentry;
242         fso.fso_bufcnt = obj->ioo_bufcnt;
243         inode = res->dentry->d_inode;
244
245         iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME);
246         for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
247                 loff_t this_size;
248                 iobuf->maplist[i] = lnb->page;
249                 /* We expect these pages to be in offset order, but we'll
250                  * be forgiving */
251                 this_size = lnb->offset + lnb->len;
252                 if (this_size > iattr.ia_size)
253                         iattr.ia_size = this_size;
254         }
255
256         push_ctxt(&saved, &obd->obd_ctxt, NULL);
257         cleanup_phase = 2;
258
259         down(&inode->i_sem);
260         oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, res,
261                                            oti);
262         if (IS_ERR(oti->oti_handle)) {
263                 rc = PTR_ERR(oti->oti_handle);
264                 CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
265                        "error starting transaction: rc = %d\n", rc);
266                 oti->oti_handle = NULL;
267                 GOTO(cleanup, rc);
268         }
269
270         if (time_after(jiffies, now + 15 * HZ))
271                 CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
272
273         rc = filter_direct_io(OBD_BRW_WRITE, res->dentry, iobuf, exp, &iattr,
274                               oti, &wait_handle);
275         if (rc == 0)
276                 obdo_from_inode(oa, inode, FILTER_VALID_FLAGS);
277
278         if (time_after(jiffies, now + 15 * HZ))
279                 CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
280
281         err = fsfilt_commit_wait(obd, inode, wait_handle);
282         if (err)
283                 rc = err;
284         if (obd_sync_filter)
285                 LASSERT(oti->oti_transno <= obd->obd_last_committed);
286         if (time_after(jiffies, now + 15 * HZ))
287                 CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
288
289 cleanup:
290         switch (cleanup_phase) {
291         case 2:
292                 pop_ctxt(&saved, &obd->obd_ctxt, NULL);
293                 LASSERT(current->journal_info == NULL);
294         case 1:
295                 free_kiovec(1, &iobuf);
296         case 0:
297                 for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
298                         /* flip_.. gets a ref, while free_page only frees
299                          * when it decrefs to 0 */
300                         if (rc == 0)
301                                 flip_into_page_cache(inode, lnb->page);
302                         __free_page(lnb->page);
303                 }
304                 f_dput(res->dentry);
305         }
306
307         RETURN(rc);
308 }
309
310 #endif
311