Whamcloud - gitweb
b=17397
authoralex <alex>
Sat, 1 Nov 2008 17:43:50 +0000 (17:43 +0000)
committeralex <alex>
Sat, 1 Nov 2008 17:43:50 +0000 (17:43 +0000)
i=adilger

 - always flush page left by partial truncate

lustre/autoconf/lustre-core.m4
lustre/include/linux/lustre_compat25.h
lustre/llite/rw26.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_io.c
lustre/obdfilter/filter_io_26.c
lustre/tests/acceptance-small.sh

index 6c963ad..4899310 100644 (file)
@@ -1269,7 +1269,12 @@ AC_DEFINE(HAVE___D_MOVE, 1,
 ])
 ])
 
-
+#
+# LC_EXPORT_INVALIDATE_MAPPING_PAGES
+#
+# SLES9, RHEL4, RHEL5, vanilla 2.6.24 export invalidate_mapping_pages() but
+# SLES10 2.6.16 does not, for some reason.  For filter cache invalidation.
+#
 AC_DEFUN([LC_EXPORT_INVALIDATE_MAPPING_PAGES],
     [LB_CHECK_SYMBOL_EXPORT([invalidate_mapping_pages], [mm/truncate.c], [
          AC_DEFINE(HAVE_INVALIDATE_MAPPING_PAGES, 1,
@@ -1282,6 +1287,20 @@ AC_DEFUN([LC_EXPORT_INVALIDATE_MAPPING_PAGES],
     ],[])
 ])
 
+#
+# LC_EXPORT_FILEMAP_FDATASYNC_RANGE
+#
+# No standard kernels export this
+#
+AC_DEFUN([LC_EXPORT_FILEMAP_FDATAWRITE_RANGE],
+[LB_CHECK_SYMBOL_EXPORT([filemap_fdatawrite_range],
+[mm/filemap.c],[
+AC_DEFINE(HAVE_FILEMAP_FDATAWRITE_RANGE, 1,
+            [filemap_fdatawrite_range is exported by the kernel])
+],[
+])
+])
+
 # The actual symbol exported varies among architectures, so we need
 # to check many symbols (but only in the current architecture.)  No
 # matter what symbol is exported, the kernel #defines node_to_cpumask
@@ -1608,6 +1627,7 @@ AC_DEFUN([LC_PROG_LINUX],
          LC_UMOUNTBEGIN_HAS_VFSMOUNT
          if test x$enable_server = xyes ; then
                 LC_EXPORT_INVALIDATE_MAPPING_PAGES
+                LC_EXPORT_FILEMAP_FDATAWRITE_RANGE
          fi
 
          #2.6.18 + RHEL5 (fc6)
index 1e59fe7..4bd7b0c 100644 (file)
@@ -299,6 +299,45 @@ static inline int mapping_has_pages(struct address_space *mapping)
 #define filemap_fdatawrite(mapping)      filemap_fdatasync(mapping)
 #endif
 
+#include <linux/mpage.h>        /* for generic_writepages */
+#ifndef HAVE_FILEMAP_FDATAWRITE_RANGE
+#include <linux/backing-dev.h>  /* for mapping->backing_dev_info */
+static inline int filemap_fdatawrite_range(struct address_space *mapping,
+                                           loff_t start, loff_t end)
+{
+        int rc;
+        struct writeback_control wbc = {
+                .sync_mode = WB_SYNC_ALL,
+                .nr_to_write = (end - start + PAGE_SIZE - 1) >> PAGE_SHIFT,
+        };
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18)
+        wbc.range_start = start;
+        wbc.range_end = end;
+#else
+        wbc.start = start;
+        wbc.end = end;
+#endif
+
+#ifdef mapping_cap_writeback_dirty
+        if (!mapping_cap_writeback_dirty(mapping))
+               rc = 0;
+#else
+        if (mapping->backing_dev_info->memory_backed)
+                rc = 0;
+#endif
+        /* do_writepages() */
+        else if (mapping->a_ops->writepages)
+                rc = mapping->a_ops->writepages(mapping, &wbc);
+        else
+                rc = generic_writepages(mapping, &wbc);
+        return rc;
+}
+#else
+int filemap_fdatawrite_range(struct address_space *mapping,
+                             loff_t start, loff_t end);
+#endif
+
 #ifdef HAVE_VFS_KERN_MOUNT
 static inline 
 struct vfsmount *
index f82c41b..1b0d1bc 100644 (file)
@@ -54,7 +54,6 @@
 
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
-#include <linux/mpage.h>
 #include <linux/writeback.h>
 #include <linux/stat.h>
 #include <asm/uaccess.h>
index 0e10ec8..2809116 100644 (file)
@@ -2031,7 +2031,7 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
         sema_init(&filter->fo_alloc_lock, 1);
         init_brw_stats(&filter->fo_filter_stats);
         filter->fo_read_cache = 1; /* enable read-only cache by default */
-        filter->fo_writethrough_cache = 0; /* disable writethrough cache */
+        filter->fo_writethrough_cache = 1; /* disable writethrough cache */
         filter->fo_readcache_max_filesize = FILTER_MAX_CACHE_SIZE;
         filter->fo_fmd_max_num = FILTER_FMD_MAX_NUM_DEFAULT;
         filter->fo_fmd_max_age = FILTER_FMD_MAX_AGE_DEFAULT;
@@ -3142,6 +3142,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
         struct llog_cookie *fcc = NULL;
         struct filter_obd *filter;
         int rc, err, locked = 0, sync = 0;
+        loff_t old_size = 0;
         unsigned int ia_valid;
         struct inode *inode;
         struct iattr iattr;
@@ -3165,6 +3166,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
         }
 
         if (ia_valid & ATTR_SIZE || ia_valid & (ATTR_UID | ATTR_GID)) {
+                old_size = i_size_read(inode);
                 DQUOT_INIT(inode);
                 LOCK_INODE_MUTEX(inode);
                 locked = 1;
@@ -3247,9 +3249,17 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
                 fcc = NULL;
         }
 
+        /* For a partial-page truncate flush the page to disk immediately
+         * to avoid data corruption during direct disk write. b=17397 */
+        if (!sync && (iattr.ia_valid & ATTR_SIZE) &&
+            old_size != iattr.ia_size && (iattr.ia_size & ~CFS_PAGE_MASK)) {
+                err = filemap_fdatawrite_range(inode->i_mapping, iattr.ia_size,
+                                               iattr.ia_size + 1);
+                if (!rc)
+                        rc = err;
+        }
+
         if (locked) {
-                /* truncate can leave dirty pages in the cache.
-                 * we'll take care of them in write path -bzzz */
                 UNLOCK_INODE_MUTEX(inode);
                 locked = 0;
         }
index c75135a..74d5aa8 100644 (file)
@@ -719,7 +719,9 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
                  * be able to proceed in filter_commitrw_write(). thus let's
                  * just wait for writeout completion, should be rare enough.
                  * -bzzz */
-                wait_on_page_writeback(lnb->page);
+                if (obd->u.filter.fo_writethrough_cache)
+                        wait_on_page_writeback(lnb->page);
+                BUG_ON(PageWriteback(lnb->page));
 
                 /* If the filter writes a partial page, then has the file
                  * extended, the client will read in the whole page.  the
index c7adae7..8940804 100644 (file)
@@ -609,8 +609,10 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                 LASSERT(PageLocked(lnb->page));
                 LASSERT(!PageWriteback(lnb->page));
 
-                /* truncate might leave tail dirty */
-                clear_page_dirty_for_io(lnb->page);
+                /* preceding filemap_write_and_wait() should have clean pages */
+                if (fo->fo_writethrough_cache)
+                        clear_page_dirty_for_io(lnb->page);
+                LASSERT(!PageDirty(lnb->page));
 
                 SetPageUptodate(lnb->page);
 
index 357bce4..ca92df4 100755 (executable)
@@ -262,7 +262,10 @@ for NAME in $CONFIGS; do
                SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
                [ $SPACE -lt $SIZE ] && SIZE=$((SPACE * 3 / 4))
                $DEBUG_OFF
-               ./fsx -c 50 -p 1000 -P $TMP -l $SIZE \
+               FSX_SEED=${FSX_SEED:-$RANDOM}
+               rm -f $MOUNT/fsxfile
+               $LFS setstripe -c -1 $MOUNT/fsxfile
+               ./fsx -c 50 -p 1000 -S $FSX_SEED -P $TMP -l $SIZE \
                        -N $(($COUNT * 100)) $MOUNT/fsxfile
                $DEBUG_ON
                $CLEANUP