From 9377e08f7ebbdb6ec2a927c20d4f71fc771c3390 Mon Sep 17 00:00:00 2001 From: alex Date: Thu, 4 Sep 2003 17:01:15 +0000 Subject: [PATCH] - those files contain 2.4- and 2.6-specific routines of filter I/O part --- lustre/obdfilter/filter_io_24.c | 224 ++++++++++++++++++++++++++++++++++++++++ lustre/obdfilter/filter_io_26.c | 192 ++++++++++++++++++++++++++++++++++ 2 files changed, 416 insertions(+) create mode 100644 lustre/obdfilter/filter_io_24.c create mode 100644 lustre/obdfilter/filter_io_26.c diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c new file mode 100644 index 0000000..dd1a185 --- /dev/null +++ b/lustre/obdfilter/filter_io_24.c @@ -0,0 +1,224 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * linux/fs/obdfilter/filter_io.c + * + * Copyright (c) 2001-2003 Cluster File Systems, Inc. + * Author: Peter Braam + * Author: Andreas Dilger + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include // XXX kill me soon +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + +#define DEBUG_SUBSYSTEM S_FILTER + +#include + +#include +#include +#include "filter_internal.h" + + +int ext3_map_inode_page(struct inode *inode, struct page *page, + unsigned long *blocks, int *created, int create); +int filter_direct_io(int rw, struct inode *inode, struct kiobuf *iobuf) +{ + struct page *page; + unsigned long *b = iobuf->blocks; + int rc, i, create = (rw == OBD_BRW_WRITE), blocks_per_page, *created; + int *cr, cleanup_phase; + ENTRY; + + blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS) + RETURN(-EINVAL); + + OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page); + if (created == NULL) + RETURN(-ENOMEM); + cleanup_phase = 1; + + rc = lock_kiovec(1, &iobuf, 1); + if (rc < 0) + GOTO(cleanup, rc); + cleanup_phase = 2; + + down(&inode->i_sem); + cleanup_phase = 3; + for (i = 0, cr = created, b = iobuf->blocks; i < iobuf->nr_pages; i++){ + page = iobuf->maplist[i]; + + rc = ext3_map_inode_page(inode, page, b, cr, create); + if (rc) + GOTO(cleanup, rc); + + b += blocks_per_page; + cr += blocks_per_page; + } + up(&inode->i_sem); + cleanup_phase = 2; + + rc = brw_kiovec(WRITE, 1, &iobuf, inode->i_dev, iobuf->blocks, + 1 << inode->i_blkbits); + CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n", + iobuf->nr_pages, rc); + if (rc != (1 << inode->i_blkbits) * iobuf->nr_pages * blocks_per_page) + CERROR("short write? expected %d, wrote %d\n", + (1 << inode->i_blkbits) * iobuf->nr_pages * + blocks_per_page, rc); + if (rc > 0) + rc = 0; + + EXIT; +cleanup: + switch(cleanup_phase) { + case 3: + up(&inode->i_sem); + case 2: + unlock_kiovec(1, &iobuf); + case 1: + OBD_FREE(created, sizeof(*created) * + iobuf->nr_pages*blocks_per_page); + break; + default: + CERROR("corrupt cleanup_phase (%d)?\n", cleanup_phase); + LBUG(); + break; + } + return rc; +} + +int filter_commitrw_write(struct obd_export *exp, int objcount, + struct obd_ioobj *obj, int niocount, + struct niobuf_local *res, + struct obd_trans_info *oti) +{ + struct obd_device *obd = exp->exp_obd; + struct obd_run_ctxt saved; + struct niobuf_local *lnb; + struct fsfilt_objinfo fso; + struct iattr iattr = { .ia_valid = ATTR_SIZE, .ia_size = 0, }; + struct kiobuf *iobuf; + struct inode *inode = NULL; + int rc = 0, i, cleanup_phase = 0, err; + unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */ + ENTRY; + LASSERT(oti != NULL); + LASSERT(objcount == 1); + LASSERT(current->journal_info == NULL); + + rc = alloc_kiovec(1, &iobuf); + if (rc) + GOTO(cleanup, rc); + cleanup_phase = 1; + +#if (LINUX_VERSION_CODE == KERNEL_VERSION(2,4,18)) + iobuf->dovary = 0; /* this prevents corruption, not present in 2.4.20 */ +#endif + rc = expand_kiobuf(iobuf, obj->ioo_bufcnt); + if (rc) + GOTO(cleanup, rc); + + iobuf->offset = 0; + iobuf->length = PAGE_SIZE * obj->ioo_bufcnt; + iobuf->nr_pages = obj->ioo_bufcnt; + + cleanup_phase = 1; + fso.fso_dentry = res->dentry; + fso.fso_bufcnt = obj->ioo_bufcnt; + inode = res->dentry->d_inode; + + for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { + loff_t this_size; + iobuf->maplist[i] = lnb->page; + /* We expect these pages to be in offset order, but we'll + * be forgiving */ + this_size = lnb->offset + lnb->len; + if (this_size > iattr.ia_size) + iattr.ia_size = this_size; + } + + push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); + cleanup_phase = 2; + + oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, oti); + if (IS_ERR(oti->oti_handle)) { + rc = PTR_ERR(oti->oti_handle); + CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, + "error starting transaction: rc = %d\n", rc); + oti->oti_handle = NULL; + GOTO(cleanup, rc); + } + + if (time_after(jiffies, now + 15 * HZ)) + CERROR("slow brw_start %lus\n", (jiffies - now) / HZ); + + rc = filter_direct_io(OBD_BRW_WRITE, inode, iobuf); + if (rc == 0) { + down(&inode->i_sem); + inode_update_time(inode, 1); + if (iattr.ia_size > inode->i_size) { + CDEBUG(D_INFO, "setting i_size to "LPU64"\n", + iattr.ia_size); + fsfilt_setattr(obd, res->dentry, oti->oti_handle, + &iattr, 0); + } + up(&inode->i_sem); + } + + if (time_after(jiffies, now + 15 * HZ)) + CERROR("slow direct_io %lus\n", (jiffies - now) / HZ); + + rc = filter_finish_transno(exp, oti, rc); + err = fsfilt_commit(obd, inode, oti->oti_handle, obd_sync_filter); + if (err) + rc = err; + if (obd_sync_filter) + LASSERT(oti->oti_transno <= obd->obd_last_committed); + if (time_after(jiffies, now + 15 * HZ)) + CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ); + +cleanup: + switch (cleanup_phase) { + case 2: + pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); + LASSERT(current->journal_info == NULL); + case 1: + free_kiovec(1, &iobuf); + case 0: + for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { + /* flip_.. gets a ref, while free_page only frees + * when it decrefs to 0 */ + if (rc == 0) + flip_into_page_cache(inode, lnb->page); + __free_page(lnb->page); + } + f_dput(res->dentry); + } + + RETURN(rc); +} + +#endif + diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c new file mode 100644 index 0000000..479eab1 --- /dev/null +++ b/lustre/obdfilter/filter_io_26.c @@ -0,0 +1,192 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * linux/fs/obdfilter/filter_io.c + * + * Copyright (c) 2001-2003 Cluster File Systems, Inc. + * Author: Peter Braam + * Author: Andreas Dilger + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include // XXX kill me soon +#include + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) + +#define DEBUG_SUBSYSTEM S_FILTER + +#include +#include +#include "filter_internal.h" + +int ext3_map_inode_page(struct inode *inode, struct page *page, + unsigned long *blocks, int *created, int create); + +static int can_be_merged(struct bio *bio, sector_t sector) +{ + int size; + + if (!bio) + return 0; + + size = bio->bi_size >> 9; + return bio->bi_sector + size == sector ? 1 : 0; +} + +int filter_commitrw_write(struct obd_export *exp, int objcount, + struct obd_ioobj *obj, int niocount, + struct niobuf_local *res, + struct obd_trans_info *oti) +{ + struct obd_device *obd = exp->exp_obd; + struct obd_run_ctxt saved; + struct niobuf_local *lnb; + struct fsfilt_objinfo fso; + struct iattr iattr = { .ia_valid = ATTR_SIZE, .ia_size = 0, }; + struct kiobuf *iobuf; + struct inode *inode = NULL; + int rc = 0, i, k, cleanup_phase = 0, err; + unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */ + struct bio *bio = NULL, *bio_list = NULL; + int created[16]; /* 8KB pages man , 512bytes block min */ + unsigned long blocks[16]; + int blocks_per_page; + ENTRY; + LASSERT(oti != NULL); + LASSERT(objcount == 1); + LASSERT(current->journal_info == NULL); + + blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + LASSERT(blocks_per_page <= 16); + + cleanup_phase = 1; + fso.fso_dentry = res->dentry; + fso.fso_bufcnt = obj->ioo_bufcnt; + inode = res->dentry->d_inode; + + push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); + cleanup_phase = 2; + + oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, oti); + if (IS_ERR(oti->oti_handle)) { + rc = PTR_ERR(oti->oti_handle); + CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, + "error starting transaction: rc = %d\n", rc); + oti->oti_handle = NULL; + GOTO(cleanup, rc); + } + + if (time_after(jiffies, now + 15 * HZ)) + CERROR("slow brw_start %lus\n", (jiffies - now) / HZ); + + for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { + loff_t this_size; + sector_t sector; + int offs; + + /* get block number for next page */ + rc = ext3_map_inode_page(inode, lnb->page, blocks, created, 1); + if (rc) + GOTO(cleanup, rc); + + for (k = 0; k < blocks_per_page; k++) { + sector = blocks[k] * (inode->i_sb->s_blocksize >> 9); + offs = k * inode->i_sb->s_blocksize; + + if (!bio || !can_be_merged(bio, sector) || + !bio_add_page(bio, lnb->page, lnb->len, offs)) { + if (bio) { + submit_bio(WRITE, bio); + bio = NULL; + } + /* allocate new bio */ + bio = bio_alloc(GFP_NOIO, obj->ioo_bufcnt); + bio->bi_bdev = inode->i_sb->s_bdev; + bio->bi_sector = sector; + bio->bi_end_io = NULL; /* FIXME */ + + /* put on the list */ + bio->bi_private = bio_list; + bio_list = bio; + + if (!bio_add_page(bio, lnb->page, lnb->len, 0)) + LBUG(); + } + } + + /* We expect these pages to be in offset order, but we'll + * be forgiving */ + this_size = lnb->offset + lnb->len; + if (this_size > iattr.ia_size) + iattr.ia_size = this_size; + } + if (bio) + submit_bio(WRITE, bio); + + /* time to wait for I/O completion */ + + if (rc == 0) { + down(&inode->i_sem); + inode_update_time(inode, 1); + if (iattr.ia_size > inode->i_size) { + CDEBUG(D_INFO, "setting i_size to "LPU64"\n", + iattr.ia_size); + fsfilt_setattr(obd, res->dentry, oti->oti_handle, + &iattr, 0); + } + up(&inode->i_sem); + } + + if (time_after(jiffies, now + 15 * HZ)) + CERROR("slow direct_io %lus\n", (jiffies - now) / HZ); + + rc = filter_finish_transno(exp, oti, rc); + err = fsfilt_commit(obd, inode, oti->oti_handle, obd_sync_filter); + if (err) + rc = err; + if (obd_sync_filter) + LASSERT(oti->oti_transno <= obd->obd_last_committed); + if (time_after(jiffies, now + 15 * HZ)) + CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ); + +cleanup: + switch (cleanup_phase) { + case 2: + pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); + LASSERT(current->journal_info == NULL); + case 1: + case 0: + for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { + /* flip_.. gets a ref, while free_page only frees + * when it decrefs to 0 */ + if (rc == 0) + flip_into_page_cache(inode, lnb->page); + __free_page(lnb->page); + } + f_dput(res->dentry); + } + + RETURN(rc); +} + + +#endif + -- 1.8.3.1