From 6add041c291a2d238a94f617e8caeba19dd7c5ca Mon Sep 17 00:00:00 2001 From: wangdi Date: Sun, 15 May 2005 16:03:42 +0000 Subject: [PATCH] Branch HEAD merge b4892 from b1_4 --- lustre/include/linux/lustre_compat25.h | 5 ++- lustre/obdfilter/filter.c | 15 ++----- lustre/obdfilter/filter_io_24.c | 68 ++++++++++++++++++++------------ lustre/obdfilter/filter_io_26.c | 72 +++++++++++++++++++++------------- 4 files changed, 94 insertions(+), 66 deletions(-) diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 2adb2bd..c8b047c 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -120,7 +120,6 @@ static inline int cleanup_group_info(void) /* XXX our code should be using the 2.6 calls, not the other way around */ #define TryLockPage(page) TestSetPageLocked(page) -#define filemap_fdatasync(mapping) filemap_fdatawrite(mapping) #define Page_Uptodate(page) PageUptodate(page) #define KDEVT_INIT(val) (val) @@ -232,6 +231,7 @@ typedef long sector_t; #define ll_pgcache_unlock(mapping) spin_unlock(&pagecache_lock) #define ll_call_writepage(inode, page) \ (inode)->i_mapping->a_ops->writepage(page) +#define filemap_fdatawrite(mapping) filemap_fdatasync(mapping) #define ll_invalidate_inode_pages(inode) invalidate_inode_pages(inode) #define ll_truncate_complete_page(page) truncate_complete_page(page) @@ -275,6 +275,9 @@ static inline int mapping_mapped(struct address_space *mapping) #define __set_page_ll_data(page, llap) page->private = (unsigned long)llap #define __clear_page_ll_data(page) page->private = 0 #define PageWriteback(page) 0 +#define set_page_writeback(page) do {} while (0) +#define end_page_writeback(page) do {} while (0) + #define end_page_writeback(page) #ifdef ZAP_PAGE_RANGE_VMA diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 43a778d..f59981e 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -2680,16 +2680,9 @@ cleanup: switch(cleanup_phase) { case 3: if (fcc != NULL) { - if (oti != NULL) - fsfilt_add_journal_cb(obd, filter->fo_sb, 0, - oti->oti_handle, - filter_cancel_cookies_cb, - fcc); - else - fsfilt_add_journal_cb(obd, filter->fo_sb, 0, - handle, - filter_cancel_cookies_cb, - fcc); + fsfilt_add_journal_cb(obd, filter->fo_sb, 0, + oti ? oti->oti_handle : handle, + filter_cancel_cookies_cb, fcc); } rc = filter_finish_transno(exp, oti, rc); rc2 = fsfilt_commit(obd, filter->fo_sb, dparent->d_inode, @@ -2761,7 +2754,7 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); down(&dentry->d_inode->i_sem); - rc = filemap_fdatasync(dentry->d_inode->i_mapping); + rc = filemap_fdatawrite(dentry->d_inode->i_mapping); if (rc == 0) { /* just any file to grab fsync method - "file" arg unused */ struct file *file = filter->fo_rcvd_filp; diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index 43bf78d..4b2d012 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -108,23 +108,55 @@ static void dump_page(int rw, unsigned long block, struct page *page) } #endif -static void filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf) +/* These are our hacks to keep our directio/bh IO coherent with ext3's + * page cache use. Most notably ext3 reads file data into the page + * cache when it is zeroing the tail of partial-block truncates and + * leaves it there, sometimes generating io from it at later truncates. + * This removes the partial page and its buffers from the page cache, + * so it should only ever cause a wait in rare cases, as otherwise we + * always do full-page IO to the OST. + * + * The call to truncate_complete_page() will call journal_flushpage() to + * free the buffers and drop the page from cache. The buffers should not + * be dirty, because we already called fdatasync/fdatawait on them. + */ +static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf) { struct page *page; - int i; + int i, rc, rc2; + + check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages, + inode->i_dev, 1 << inode->i_blkbits); + + /* This is nearly generic_osync_inode, without the waiting on the inode + rc = generic_osync_inode(inode, inode->i_mapping, + OSYNC_DATA|OSYNC_METADATA); + */ + rc = filemap_fdatasync(inode->i_mapping); + rc2 = fsync_inode_data_buffers(inode); + if (rc == 0) + rc = rc2; + rc2 = filemap_fdatawait(inode->i_mapping); + if (rc == 0) + rc = rc2; + if (rc != 0) + RETURN(rc); + /* be careful to call this after fsync_inode_data_buffers has waited + * for IO to complete before we evict it from the cache */ for (i = 0; i < iobuf->nr_pages ; i++) { page = find_lock_page(inode->i_mapping, iobuf->maplist[i]->index); if (page == NULL) continue; - if (page->mapping != NULL) { - block_flushpage(page, 0); - truncate_complete_page(page); - } + if (page->mapping != NULL) + ll_truncate_complete_page(page); + unlock_page(page); page_cache_release(page); } + + return 0; } /* Must be called with i_sem taken for writes; this will drop it */ @@ -196,28 +228,10 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf, GOTO(cleanup, rc); } - /* these are our hacks to keep our directio/bh IO coherent with ext3's - * page cache use. Most notably ext3 reads file data into the page - * cache when it is zeroing the tail of partial-block truncates and - * leaves it there, sometimes generating io from it at later truncates. - * Someday very soon we'll be performing our brw_kiovec() IO to and - * from the page cache. */ - - check_pending_bhs(iobuf->blocks, iobuf->nr_pages, inode->i_dev, - 1 << inode->i_blkbits); - - rc = filemap_fdatasync(inode->i_mapping); - if (rc == 0) - rc = fsync_inode_data_buffers(inode); - if (rc == 0) - rc = filemap_fdatawait(inode->i_mapping); + rc = filter_clear_page_cache(inode, iobuf); if (rc < 0) GOTO(cleanup, rc); - /* be careful to call this after fsync_inode_data_buffers has waited - * for IO to complete before we evict it from the cache */ - filter_clear_page_cache(inode, iobuf); - rc = fsfilt_send_bio(rw, obd, inode, iobuf); CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n", @@ -420,7 +434,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (err) rc = err; if (obd_sync_filter && !err) - LASSERT(oti->oti_transno <= obd->obd_last_committed); + LASSERTF(oti->oti_transno <= obd->obd_last_committed, + "oti_transno "LPU64" last_committed "LPU64"\n", + oti->oti_transno, obd->obd_last_committed); fsfilt_check_slow(now, obd_timeout, "commitrw commit"); cleanup: filter_grant_commit(exp, niocount, res); diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 5a5f234..5fc41e3 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -297,28 +297,56 @@ int filter_do_bio(struct obd_device *obd, struct inode *inode, rc = dreq->dr_error; RETURN(rc); } - -static void filter_clear_page_cache(struct inode *inode, - struct dio_request *iobuf) + +/* These are our hacks to keep our directio/bh IO coherent with ext3's + * page cache use. Most notably ext3 reads file data into the page + * cache when it is zeroing the tail of partial-block truncates and + * leaves it there, sometimes generating io from it at later truncates. + * This removes the partial page and its buffers from the page cache, + * so it should only ever cause a wait in rare cases, as otherwise we + * always do full-page IO to the OST. + * + * The call to truncate_complete_page() will call journal_invalidatepage() + * to free the buffers and drop the page from cache. The buffers should + * not be dirty, because we already called fdatasync/fdatawait on them. + */ +static int filter_clear_page_cache(struct inode *inode, + struct dio_request *iobuf) { struct page *page; - int i; - - for (i = 0; i < iobuf->dr_npages ; i++) { + int i, rc, rc2; + + /* This is nearly generic_osync_inode, without the waiting on the inode + rc = generic_osync_inode(inode, inode->i_mapping, + OSYNC_DATA|OSYNC_METADATA); + */ + rc = filemap_fdatawrite(inode->i_mapping); + rc2 = sync_mapping_buffers(inode->i_mapping); + if (rc == 0) + rc = rc2; + rc2 = filemap_fdatawait(inode->i_mapping); + if (rc == 0) + rc = rc2; + if (rc != 0) + RETURN(rc); + + /* be careful to call this after fsync_inode_data_buffers has waited + * for IO to complete before we evict it from the cache */ + for (i = 0; i < iobuf->dr_npages; i++) { page = find_lock_page(inode->i_mapping, - iobuf->dr_pages[i]->index); + iobuf->dr_pages[i]->index); if (page == NULL) - continue; + continue; if (page->mapping != NULL) { - wait_on_page_writeback(page); - block_invalidatepage(page, 0); - ll_truncate_complete_page(page); + wait_on_page_writeback(page); + ll_truncate_complete_page(page); } + unlock_page(page); page_cache_release(page); } + return 0; } - /* Must be called with i_sem taken for writes; this will drop it */ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, struct obd_export *exp, struct iattr *attr, @@ -373,24 +401,10 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, RETURN(rc); } - /* This is nearly osync_inode, without the waiting - rc = generic_osync_inode(inode, inode->i_mapping, - OSYNC_DATA|OSYNC_METADATA); */ - rc = filemap_fdatawrite(inode->i_mapping); - rc2 = sync_mapping_buffers(inode->i_mapping); - if (rc == 0) - rc = rc2; - rc2 = filemap_fdatawait(inode->i_mapping); - if (rc == 0) - rc = rc2; - + rc = filter_clear_page_cache(inode, dreq); if (rc != 0) RETURN(rc); - /* be careful to call this after fsync_inode_data_buffers has waited - * for IO to complete before we evict it from the cache */ - filter_clear_page_cache(inode, dreq); - RETURN(filter_do_bio(obd, inode, dreq, rw)); } @@ -511,7 +525,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, rc = err; if (obd_sync_filter && !err) - LASSERT(oti->oti_transno <= obd->obd_last_committed); + LASSERTF(oti->oti_transno <= obd->obd_last_committed, + "oti_transno "LPU64" last_committed "LPU64"\n", + oti->oti_transno, obd->obd_last_committed); fsfilt_check_slow(now, obd_timeout, "commitrw commit"); -- 1.8.3.1