From 658c1ba287f424b750a23941a580e21b1e2316be Mon Sep 17 00:00:00 2001 From: adilger Date: Thu, 12 May 2005 23:13:24 +0000 Subject: [PATCH] Branch b1_4 Move all pagecache vs. directio handling into filter_clear_page_cache() and add comments about what is going on here and why. Use the fs-specific ->invalidatepage called from truncate_complete_page() instead of calling block_invalidatepage() directly. Wait on page for the 2.6 code, and use 2.6 functions and have compat macros for 2.4 instead. b=4892 r=green --- lustre/include/linux/lustre_compat25.h | 10 ++--- lustre/obdfilter/filter.c | 13 ++----- lustre/obdfilter/filter_io_24.c | 67 +++++++++++++++++++++------------- lustre/obdfilter/filter_io_26.c | 59 ++++++++++++++++++++---------- 4 files changed, 90 insertions(+), 59 deletions(-) diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 0760eaa..488a1c1 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -44,9 +44,8 @@ #endif /* XXX our code should be using the 2.6 calls, not the other way around */ -#define TryLockPage(page) TestSetPageLocked(page) -#define filemap_fdatasync(mapping) filemap_fdatawrite(mapping) -#define Page_Uptodate(page) PageUptodate(page) +#define TryLockPage(page) TestSetPageLocked(page) +#define Page_Uptodate(page) PageUptodate(page) #define ll_redirty_page(page) set_page_dirty(page) #define KDEVT_INIT(val) (val) @@ -167,6 +166,7 @@ typedef long sector_t; #define ll_pgcache_unlock(mapping) spin_unlock(&pagecache_lock) #define ll_call_writepage(inode, page) \ (inode)->i_mapping->a_ops->writepage(page) +#define filemap_fdatawrite(mapping) filemap_fdatasync(mapping) #define ll_invalidate_inode_pages(inode) invalidate_inode_pages(inode) #define ll_truncate_complete_page(page) truncate_complete_page(page) @@ -236,8 +236,8 @@ static inline void cond_resched(void) #define __set_page_ll_data(page, llap) page->private = (unsigned long)llap #define __clear_page_ll_data(page) page->private = 0 #define PageWriteback(page) 0 -#define set_page_writeback(page) -#define end_page_writeback(page) +#define set_page_writeback(page) do {} while (0) +#define end_page_writeback(page) do {} while (0) static inline int mapping_mapped(struct address_space *mapping) { diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index ae39b15..207aa33 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -2316,14 +2316,9 @@ cleanup: switch(cleanup_phase) { case 3: if (fcc != NULL) { - if (oti != NULL) - fsfilt_add_journal_cb(obd, 0, oti->oti_handle, - filter_cancel_cookies_cb, - fcc); - else - fsfilt_add_journal_cb(obd, 0, handle, - filter_cancel_cookies_cb, - fcc); + fsfilt_add_journal_cb(obd, 0, + oti ? oti->oti_handle : handle, + filter_cancel_cookies_cb, fcc); } rc = filter_finish_transno(exp, oti, rc); rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0); @@ -2403,7 +2398,7 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); down(&dentry->d_inode->i_sem); - rc = filemap_fdatasync(dentry->d_inode->i_mapping); + rc = filemap_fdatawrite(dentry->d_inode->i_mapping); if (rc == 0) { /* just any file to grab fsync method - "file" arg unused */ struct file *file = filter->fo_rcvd_filp; diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index d4327ca..1fca494 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -107,23 +107,55 @@ static void dump_page(int rw, unsigned long block, struct page *page) } #endif -static void filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf) +/* These are our hacks to keep our directio/bh IO coherent with ext3's + * page cache use. Most notably ext3 reads file data into the page + * cache when it is zeroing the tail of partial-block truncates and + * leaves it there, sometimes generating io from it at later truncates. + * This removes the partial page and its buffers from the page cache, + * so it should only ever cause a wait in rare cases, as otherwise we + * always do full-page IO to the OST. + * + * The call to truncate_complete_page() will call journal_flushpage() to + * free the buffers and drop the page from cache. The buffers should not + * be dirty, because we already called fdatasync/fdatawait on them. + */ +static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf) { struct page *page; - int i; + int i, rc, rc2; + + check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages, + inode->i_dev, 1 << inode->i_blkbits); + + /* This is nearly generic_osync_inode, without the waiting on the inode + rc = generic_osync_inode(inode, inode->i_mapping, + OSYNC_DATA|OSYNC_METADATA); + */ + rc = filemap_fdatasync(inode->i_mapping); + rc2 = fsync_inode_data_buffers(inode); + if (rc == 0) + rc = rc2; + rc2 = filemap_fdatawait(inode->i_mapping); + if (rc == 0) + rc = rc2; + if (rc != 0) + RETURN(rc); + /* be careful to call this after fsync_inode_data_buffers has waited + * for IO to complete before we evict it from the cache */ for (i = 0; i < iobuf->nr_pages ; i++) { page = find_lock_page(inode->i_mapping, iobuf->maplist[i]->index); if (page == NULL) continue; - if (page->mapping != NULL) { - block_flushpage(page, 0); - truncate_complete_page(page); - } + if (page->mapping != NULL) + ll_truncate_complete_page(page); + unlock_page(page); page_cache_release(page); } + + return 0; } /* Must be called with i_sem taken for writes; this will drop it */ @@ -196,27 +228,10 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf, GOTO(cleanup, rc); } - /* these are our hacks to keep our directio/bh IO coherent with ext3's - * page cache use. Most notably ext3 reads file data into the page - * cache when it is zeroing the tail of partial-block truncates and - * leaves it there, sometimes generating io from it at later truncates. - * Someday very soon we'll be performing our brw_kiovec() IO to and - * from the page cache. */ - check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages, - inode->i_dev, 1 << inode->i_blkbits); - - rc = filemap_fdatasync(inode->i_mapping); - if (rc == 0) - rc = fsync_inode_data_buffers(inode); - if (rc == 0) - rc = filemap_fdatawait(inode->i_mapping); + rc = filter_clear_page_cache(inode, iobuf); if (rc < 0) GOTO(cleanup, rc); - /* be careful to call this after fsync_inode_data_buffers has waited - * for IO to complete before we evict it from the cache */ - filter_clear_page_cache(inode, iobuf); - rc = fsfilt_send_bio(rw, obd, inode, iobuf); CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n", @@ -420,7 +435,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (err) rc = err; if (obd_sync_filter && !err) - LASSERT(oti->oti_transno <= obd->obd_last_committed); + LASSERTF(oti->oti_transno <= obd->obd_last_committed, + "oti_transno "LPU64" last_committed "LPU64"\n", + oti->oti_transno, obd->obd_last_committed); fsfilt_check_slow(now, obd_timeout, "commitrw commit"); cleanup: diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index a248361..f844b67 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -352,24 +352,55 @@ int filter_do_bio(struct obd_device *obd, struct inode *inode, RETURN(rc); } -static void filter_clear_page_cache(struct inode *inode, struct dio_request *iobuf) +/* These are our hacks to keep our directio/bh IO coherent with ext3's + * page cache use. Most notably ext3 reads file data into the page + * cache when it is zeroing the tail of partial-block truncates and + * leaves it there, sometimes generating io from it at later truncates. + * This removes the partial page and its buffers from the page cache, + * so it should only ever cause a wait in rare cases, as otherwise we + * always do full-page IO to the OST. + * + * The call to truncate_complete_page() will call journal_invalidatepage() + * to free the buffers and drop the page from cache. The buffers should + * not be dirty, because we already called fdatasync/fdatawait on them. + */ +static int filter_clear_page_cache(struct inode *inode, + struct dio_request *iobuf) { struct page *page; - int i; + int i, rc, rc2; - for (i = 0; i < iobuf->dr_npages ; i++) { + /* This is nearly generic_osync_inode, without the waiting on the inode + rc = generic_osync_inode(inode, inode->i_mapping, + OSYNC_DATA|OSYNC_METADATA); + */ + rc = filemap_fdatawrite(inode->i_mapping); + rc2 = sync_mapping_buffers(inode->i_mapping); + if (rc == 0) + rc = rc2; + rc2 = filemap_fdatawait(inode->i_mapping); + if (rc == 0) + rc = rc2; + if (rc != 0) + RETURN(rc); + + /* be careful to call this after fsync_inode_data_buffers has waited + * for IO to complete before we evict it from the cache */ + for (i = 0; i < iobuf->dr_npages; i++) { page = find_lock_page(inode->i_mapping, iobuf->dr_pages[i]->index); if (page == NULL) continue; if (page->mapping != NULL) { - block_invalidatepage(page, 0); + wait_on_page_writeback(page); ll_truncate_complete_page(page); } unlock_page(page); page_cache_release(page); } + + return 0; } static int filter_quota_enforcement(struct obd_device *obd, @@ -511,24 +542,10 @@ remap: RETURN(rc); } - /* This is nearly osync_inode, without the waiting - rc = generic_osync_inode(inode, inode->i_mapping, - OSYNC_DATA|OSYNC_METADATA); */ - rc = filemap_fdatawrite(inode->i_mapping); - rc2 = sync_mapping_buffers(inode->i_mapping); - if (rc == 0) - rc = rc2; - rc2 = filemap_fdatawait(inode->i_mapping); - if (rc == 0) - rc = rc2; - + rc = filter_clear_page_cache(inode, dreq); if (rc != 0) RETURN(rc); - /* be careful to call this after fsync_inode_data_buffers has waited - * for IO to complete before we evict it from the cache */ - filter_clear_page_cache(inode, dreq); - RETURN(filter_do_bio(obd, inode, dreq, rw)); } @@ -662,7 +679,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, rc = err; if (obd_sync_filter && !err) - LASSERT(oti->oti_transno <= obd->obd_last_committed); + LASSERTF(oti->oti_transno <= obd->obd_last_committed, + "oti_transno "LPU64" last_committed "LPU64"\n", + oti->oti_transno, obd->obd_last_committed); fsfilt_check_slow(now, obd_timeout, "commitrw commit"); -- 1.8.3.1