1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * linux/fs/obdfilter/filter_io.c
6 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
7 * Author: Peter Braam <braam@clusterfs.com>
8 * Author: Andreas Dilger <adilger@clusterfs.com>
9 * Author: Phil Schwan <phil@clusterfs.com>
11 * This file is part of Lustre, http://www.lustre.org.
13 * Lustre is free software; you can redistribute it and/or
14 * modify it under the terms of version 2 of the GNU General Public
15 * License as published by the Free Software Foundation.
17 * Lustre is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with Lustre; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #include <linux/config.h>
28 #include <linux/module.h>
29 #include <linux/pagemap.h> // XXX kill me soon
30 #include <linux/version.h>
32 #define DEBUG_SUBSYSTEM S_FILTER
34 #include <linux/obd_class.h>
35 #include <linux/lustre_fsfilt.h>
36 #include "filter_internal.h"
38 #warning "implement writeback mode -bzzz"
40 /* 512byte block min */
41 #define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512)
43 atomic_t numreqs; /* number of reqs being processed */
44 struct bio *bio_list; /* list of completed bios */
45 wait_queue_head_t wait;
46 int created[MAX_BLOCKS_PER_PAGE];
47 unsigned long blocks[MAX_BLOCKS_PER_PAGE];
51 static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
53 struct dio_request *dreq = bio->bi_private;
56 spin_lock_irqsave(&dreq->lock, flags);
57 bio->bi_private = dreq->bio_list;
59 spin_unlock_irqrestore(&dreq->lock, flags);
60 if (atomic_dec_and_test(&dreq->numreqs))
66 static int can_be_merged(struct bio *bio, sector_t sector)
73 size = bio->bi_size >> 9;
74 return bio->bi_sector + size == sector ? 1 : 0;
77 /* See if there are unallocated parts in given file region */
78 static int filter_range_is_mapped(struct inode *inode, obd_size offset, int len)
80 sector_t (*fs_bmap)(struct address_space *, sector_t) =
81 inode->i_mapping->a_ops->bmap;
84 /* We can't know if we are overwriting or not */
88 offset >>= inode->i_blkbits;
89 len >>= inode->i_blkbits;
91 for (j = 0; j <= len; j++)
92 if (fs_bmap(inode->i_mapping, offset + j) == 0)
98 int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
99 int objcount, struct obd_ioobj *obj, int niocount,
100 struct niobuf_local *res, struct obd_trans_info *oti,
103 struct bio *bio = NULL;
104 int blocks_per_page, err;
105 struct niobuf_local *lnb;
106 struct lvfs_run_ctxt saved;
107 struct fsfilt_objinfo fso;
108 struct iattr iattr = { 0 };
109 struct inode *inode = NULL;
110 unsigned long now = jiffies;
111 int i, k, cleanup_phase = 0;
113 struct dio_request *dreq = NULL;
114 struct obd_device *obd = exp->exp_obd;
118 LASSERT(oti != NULL);
119 LASSERT(objcount == 1);
120 LASSERT(current->journal_info == NULL);
125 inode = res->dentry->d_inode;
126 blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
127 LASSERT(blocks_per_page <= MAX_BLOCKS_PER_PAGE);
129 OBD_ALLOC(dreq, sizeof(*dreq));
134 dreq->bio_list = NULL;
135 init_waitqueue_head(&dreq->wait);
136 atomic_set(&dreq->numreqs, 0);
137 spin_lock_init(&dreq->lock);
140 fso.fso_dentry = res->dentry;
141 fso.fso_bufcnt = obj->ioo_bufcnt;
143 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
146 oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso,
149 if (IS_ERR(oti->oti_handle)) {
150 rc = PTR_ERR(oti->oti_handle);
151 CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
152 "error starting transaction: rc = %d\n", rc);
153 oti->oti_handle = NULL;
157 if (time_after(jiffies, now + 15 * HZ))
158 CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
160 iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME);
161 for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
164 struct page *pages[1];
167 /* If overwriting an existing block, we don't need a grant */
168 if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC &&
169 filter_range_is_mapped(inode, lnb->offset, lnb->len))
172 if (lnb->rc) /* ENOSPC, network RPC error, etc. */
175 /* get block number for next page */
176 pages[0] = lnb->page;
177 rc = fsfilt_map_inode_pages(obd, inode, pages, 1,
178 dreq->blocks, dreq->created, 1,
183 for (k = 0; k < blocks_per_page; k++) {
184 sector = dreq->blocks[k] *(inode->i_sb->s_blocksize>>9);
185 offs = k * inode->i_sb->s_blocksize;
187 if (!bio || !can_be_merged(bio, sector) ||
188 !bio_add_page(bio, lnb->page, lnb->len, offs)) {
190 atomic_inc(&dreq->numreqs);
191 submit_bio(WRITE, bio);
194 /* allocate new bio */
195 bio = bio_alloc(GFP_NOIO, obj->ioo_bufcnt);
196 bio->bi_bdev = inode->i_sb->s_bdev;
197 bio->bi_sector = sector;
198 bio->bi_end_io = dio_complete_routine;
199 bio->bi_private = dreq;
201 if (!bio_add_page(bio, lnb->page, lnb->len, 0))
206 /* we expect these pages to be in offset order, but we'll
208 this_size = lnb->offset + lnb->len;
209 if (this_size > iattr.ia_size)
210 iattr.ia_size = this_size;
213 #warning This probably needs filemap_fdatasync() like filter_io_24 (bug 2366)
215 atomic_inc(&dreq->numreqs);
216 fsfilt_send_bio(obd, inode, bio);
219 /* time to wait for I/O completion */
220 wait_event(dreq->wait, atomic_read(&dreq->numreqs) == 0);
223 while (dreq->bio_list) {
224 bio = dreq->bio_list;
225 dreq->bio_list = bio->bi_private;
230 if (iattr.ia_size > inode->i_size) {
231 CDEBUG(D_INFO, "setting i_size to "LPU64"\n",
234 iattr.ia_valid |= ATTR_SIZE;
236 fsfilt_setattr(obd, res->dentry, oti->oti_handle,
241 if (time_after(jiffies, now + 15 * HZ))
242 CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
244 rc = filter_finish_transno(exp, oti, rc);
246 err = fsfilt_commit(obd, inode, oti->oti_handle, obd_sync_filter);
251 LASSERT(oti->oti_transno <= obd->obd_last_committed);
253 if (time_after(jiffies, now + 15 * HZ))
254 CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
257 filter_grant_commit(exp, niocount, res);
259 switch (cleanup_phase) {
261 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
262 LASSERT(current->journal_info == NULL);
264 OBD_FREE(dreq, sizeof(*dreq));
266 for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
267 filter_release_write_page(&obd->u.filter,
268 res->dentry->d_inode, lnb,