Whamcloud - gitweb
Branch b1_4
authoradilger <adilger>
Thu, 12 May 2005 23:13:24 +0000 (23:13 +0000)
committeradilger <adilger>
Thu, 12 May 2005 23:13:24 +0000 (23:13 +0000)
Move all pagecache vs. directio handling into filter_clear_page_cache()
and add comments about what is going on here and why.

Use the fs-specific ->invalidatepage called from truncate_complete_page()
instead of calling block_invalidatepage() directly.

Wait on page for the 2.6 code, and use 2.6 functions and have compat
macros for 2.4 instead.
b=4892
r=green

lustre/include/linux/lustre_compat25.h
lustre/obdfilter/filter.c
lustre/obdfilter/filter_io_24.c
lustre/obdfilter/filter_io_26.c

index 0760eaa..488a1c1 100644 (file)
@@ -44,9 +44,8 @@
 #endif
 
 /* XXX our code should be using the 2.6 calls, not the other way around */
-#define TryLockPage(page)                TestSetPageLocked(page)
-#define filemap_fdatasync(mapping)       filemap_fdatawrite(mapping)
-#define Page_Uptodate(page)              PageUptodate(page)
+#define TryLockPage(page)               TestSetPageLocked(page)
+#define Page_Uptodate(page)             PageUptodate(page)
 #define ll_redirty_page(page)           set_page_dirty(page)
 
 #define KDEVT_INIT(val)                 (val)
@@ -167,6 +166,7 @@ typedef long sector_t;
 #define ll_pgcache_unlock(mapping)      spin_unlock(&pagecache_lock)
 #define ll_call_writepage(inode, page)  \
                                (inode)->i_mapping->a_ops->writepage(page)
+#define filemap_fdatawrite(mapping)      filemap_fdatasync(mapping)
 #define ll_invalidate_inode_pages(inode) invalidate_inode_pages(inode)
 #define ll_truncate_complete_page(page) truncate_complete_page(page)
 
@@ -236,8 +236,8 @@ static inline void cond_resched(void)
 #define __set_page_ll_data(page, llap) page->private = (unsigned long)llap
 #define __clear_page_ll_data(page) page->private = 0
 #define PageWriteback(page) 0
-#define set_page_writeback(page)
-#define end_page_writeback(page)
+#define set_page_writeback(page) do {} while (0)
+#define end_page_writeback(page) do {} while (0)
 
 static inline int mapping_mapped(struct address_space *mapping)
 {
index ae39b15..207aa33 100644 (file)
@@ -2316,14 +2316,9 @@ cleanup:
         switch(cleanup_phase) {
         case 3:
                 if (fcc != NULL) {
-                        if (oti != NULL)
-                                fsfilt_add_journal_cb(obd, 0, oti->oti_handle,
-                                                      filter_cancel_cookies_cb,
-                                                      fcc);
-                        else
-                                fsfilt_add_journal_cb(obd, 0, handle,
-                                                      filter_cancel_cookies_cb,
-                                                      fcc);
+                        fsfilt_add_journal_cb(obd, 0,
+                                              oti ? oti->oti_handle : handle,
+                                              filter_cancel_cookies_cb, fcc);
                 }
                 rc = filter_finish_transno(exp, oti, rc);
                 rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
@@ -2403,7 +2398,7 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
         push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
 
         down(&dentry->d_inode->i_sem);
-        rc = filemap_fdatasync(dentry->d_inode->i_mapping);
+        rc = filemap_fdatawrite(dentry->d_inode->i_mapping);
         if (rc == 0) {
                 /* just any file to grab fsync method - "file" arg unused */
                 struct file *file = filter->fo_rcvd_filp;
index d4327ca..1fca494 100644 (file)
@@ -107,23 +107,55 @@ static void dump_page(int rw, unsigned long block, struct page *page)
 }
 #endif
 
-static void filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
+/* These are our hacks to keep our directio/bh IO coherent with ext3's
+ * page cache use.  Most notably ext3 reads file data into the page
+ * cache when it is zeroing the tail of partial-block truncates and
+ * leaves it there, sometimes generating io from it at later truncates.
+ * This removes the partial page and its buffers from the page cache,
+ * so it should only ever cause a wait in rare cases, as otherwise we
+ * always do full-page IO to the OST.
+ *
+ * The call to truncate_complete_page() will call journal_flushpage() to
+ * free the buffers and drop the page from cache.  The buffers should not
+ * be dirty, because we already called fdatasync/fdatawait on them.
+ */
+static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
 {
         struct page *page;
-        int i;
+        int i, rc, rc2;
+
+        check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages,
+                          inode->i_dev, 1 << inode->i_blkbits);
+
+        /* This is nearly generic_osync_inode, without the waiting on the inode
+        rc = generic_osync_inode(inode, inode->i_mapping,
+                                 OSYNC_DATA|OSYNC_METADATA);
+         */
+        rc = filemap_fdatasync(inode->i_mapping);
+        rc2 = fsync_inode_data_buffers(inode);
+        if (rc == 0)
+                rc = rc2;
+        rc2 = filemap_fdatawait(inode->i_mapping);
+        if (rc == 0)
+                rc = rc2;
+        if (rc != 0)
+                RETURN(rc);
 
+        /* be careful to call this after fsync_inode_data_buffers has waited
+         * for IO to complete before we evict it from the cache */
         for (i = 0; i < iobuf->nr_pages ; i++) {
                 page = find_lock_page(inode->i_mapping,
                                       iobuf->maplist[i]->index);
                 if (page == NULL)
                         continue;
-                if (page->mapping != NULL) {
-                        block_flushpage(page, 0);
-                        truncate_complete_page(page);
-                }
+                if (page->mapping != NULL)
+                        ll_truncate_complete_page(page);
+
                 unlock_page(page);
                 page_cache_release(page);
         }
+
+        return 0;
 }
 
 /* Must be called with i_sem taken for writes; this will drop it */
@@ -196,27 +228,10 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf,
                         GOTO(cleanup, rc);
         }
 
-        /* these are our hacks to keep our directio/bh IO coherent with ext3's
-         * page cache use.  Most notably ext3 reads file data into the page
-         * cache when it is zeroing the tail of partial-block truncates and
-         * leaves it there, sometimes generating io from it at later truncates.
-         * Someday very soon we'll be performing our brw_kiovec() IO to and
-         * from the page cache. */
-        check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages,
-                          inode->i_dev, 1 << inode->i_blkbits);
-
-        rc = filemap_fdatasync(inode->i_mapping);
-        if (rc == 0)
-                rc = fsync_inode_data_buffers(inode);
-        if (rc == 0)
-                rc = filemap_fdatawait(inode->i_mapping);
+        rc = filter_clear_page_cache(inode, iobuf);
         if (rc < 0)
                 GOTO(cleanup, rc);
 
-        /* be careful to call this after fsync_inode_data_buffers has waited
-         * for IO to complete before we evict it from the cache */
-        filter_clear_page_cache(inode, iobuf);
-
         rc = fsfilt_send_bio(rw, obd, inode, iobuf);
 
         CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n",
@@ -420,7 +435,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
         if (err)
                 rc = err;
         if (obd_sync_filter && !err)
-                LASSERT(oti->oti_transno <= obd->obd_last_committed);
+                LASSERTF(oti->oti_transno <= obd->obd_last_committed,
+                         "oti_transno "LPU64" last_committed "LPU64"\n",
+                         oti->oti_transno, obd->obd_last_committed);
         fsfilt_check_slow(now, obd_timeout, "commitrw commit");
 
 cleanup:
index a248361..f844b67 100644 (file)
@@ -352,24 +352,55 @@ int filter_do_bio(struct obd_device *obd, struct inode *inode,
         RETURN(rc);
 }
 
-static void filter_clear_page_cache(struct inode *inode, struct dio_request *iobuf)
+/* These are our hacks to keep our directio/bh IO coherent with ext3's
+ * page cache use.  Most notably ext3 reads file data into the page
+ * cache when it is zeroing the tail of partial-block truncates and
+ * leaves it there, sometimes generating io from it at later truncates.
+ * This removes the partial page and its buffers from the page cache,
+ * so it should only ever cause a wait in rare cases, as otherwise we
+ * always do full-page IO to the OST.
+ *
+ * The call to truncate_complete_page() will call journal_invalidatepage()
+ * to free the buffers and drop the page from cache.  The buffers should
+ * not be dirty, because we already called fdatasync/fdatawait on them.
+ */
+static int filter_clear_page_cache(struct inode *inode,
+                                    struct dio_request *iobuf)
 {
         struct page *page;
-        int i;
+        int i, rc, rc2;
 
-        for (i = 0; i < iobuf->dr_npages ; i++) {
+        /* This is nearly generic_osync_inode, without the waiting on the inode
+        rc = generic_osync_inode(inode, inode->i_mapping,
+                                 OSYNC_DATA|OSYNC_METADATA);
+         */
+        rc = filemap_fdatawrite(inode->i_mapping);
+        rc2 = sync_mapping_buffers(inode->i_mapping);
+        if (rc == 0)
+                rc = rc2;
+        rc2 = filemap_fdatawait(inode->i_mapping);
+        if (rc == 0)
+                rc = rc2;
+        if (rc != 0)
+                RETURN(rc);
+
+        /* be careful to call this after fsync_inode_data_buffers has waited
+         * for IO to complete before we evict it from the cache */
+        for (i = 0; i < iobuf->dr_npages; i++) {
                 page = find_lock_page(inode->i_mapping,
                                       iobuf->dr_pages[i]->index);
                 if (page == NULL)
                         continue;
                 if (page->mapping != NULL) {
-                        block_invalidatepage(page, 0);
+                        wait_on_page_writeback(page);
                         ll_truncate_complete_page(page);
                 }
 
                 unlock_page(page);
                 page_cache_release(page);
         }
+
+        return 0;
 }
 
 static int filter_quota_enforcement(struct obd_device *obd,
@@ -511,24 +542,10 @@ remap:
                         RETURN(rc);
         }
 
-        /* This is nearly osync_inode, without the waiting
-        rc = generic_osync_inode(inode, inode->i_mapping,
-                                 OSYNC_DATA|OSYNC_METADATA); */
-        rc = filemap_fdatawrite(inode->i_mapping);
-        rc2 = sync_mapping_buffers(inode->i_mapping);
-        if (rc == 0)
-                rc = rc2;
-        rc2 = filemap_fdatawait(inode->i_mapping);
-        if (rc == 0)
-                rc = rc2;
-
+        rc = filter_clear_page_cache(inode, dreq);
         if (rc != 0)
                 RETURN(rc);
 
-        /* be careful to call this after fsync_inode_data_buffers has waited
-         * for IO to complete before we evict it from the cache */
-        filter_clear_page_cache(inode, dreq);
-
         RETURN(filter_do_bio(obd, inode, dreq, rw));
 }
 
@@ -662,7 +679,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                 rc = err;
 
         if (obd_sync_filter && !err)
-                LASSERT(oti->oti_transno <= obd->obd_last_committed);
+                LASSERTF(oti->oti_transno <= obd->obd_last_committed,
+                         "oti_transno "LPU64" last_committed "LPU64"\n",
+                         oti->oti_transno, obd->obd_last_committed);
 
         fsfilt_check_slow(now, obd_timeout, "commitrw commit");