--- /dev/null
+.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
+.new.........fs/nfs/dir.c
+.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
+.new.........fs/nfs/dir.c
+Index: linux-2.6.4-51.0/fs/nfs/dir.c
+===================================================================
+--- linux-2.6.4-51.0.orig/fs/nfs/dir.c 2004-04-05 17:09:16.000000000 -0400
++++ linux-2.6.4-51.0/fs/nfs/dir.c 2004-04-05 17:09:23.000000000 -0400
+@@ -782,7 +782,7 @@
+ if (nd->flags & LOOKUP_DIRECTORY)
+ return 0;
+ /* Are we trying to write to a read only partition? */
+- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
+ return 0;
+ return 1;
+ }
+@@ -803,7 +803,7 @@
+ dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+
+ /* Let vfs_create() deal with O_EXCL */
+- if (nd->intent.open.flags & O_EXCL)
++ if (nd->intent.it_flags & O_EXCL)
+ goto no_entry;
+
+ /* Open the file on the server */
+@@ -811,7 +811,7 @@
+ /* Revalidate parent directory attribute cache */
+ nfs_revalidate_inode(NFS_SERVER(dir), dir);
+
+- if (nd->intent.open.flags & O_CREAT) {
++ if (nd->intent.it_flags & O_CREAT) {
+ nfs_begin_data_update(dir);
+ inode = nfs4_atomic_open(dir, dentry, nd);
+ nfs_end_data_update(dir);
+@@ -827,7 +827,7 @@
+ break;
+ /* This turned out not to be a regular file */
+ case -ELOOP:
+- if (!(nd->intent.open.flags & O_NOFOLLOW))
++ if (!(nd->intent.it_flags & O_NOFOLLOW))
+ goto no_open;
+ /* case -EISDIR: */
+ /* case -EINVAL: */
+@@ -861,7 +861,7 @@
+ dir = parent->d_inode;
+ if (!is_atomic_open(dir, nd))
+ goto no_open;
+- openflags = nd->intent.open.flags;
++ openflags = nd->intent.it_flags;
+ if (openflags & O_CREAT) {
+ /* If this is a negative dentry, just drop it */
+ if (!inode)
+Index: linux-2.6.4-51.0/fs/nfs/nfs4proc.c
+===================================================================
+--- linux-2.6.4-51.0.orig/fs/nfs/nfs4proc.c 2004-04-05 12:41:59.000000000 -0400
++++ linux-2.6.4-51.0/fs/nfs/nfs4proc.c 2004-04-05 17:09:23.000000000 -0400
+@@ -792,17 +792,17 @@
+ struct nfs4_state *state;
+
+ if (nd->flags & LOOKUP_CREATE) {
+- attr.ia_mode = nd->intent.open.create_mode;
++ attr.ia_mode = nd->intent.it_create_mode;
+ attr.ia_valid = ATTR_MODE;
+ if (!IS_POSIXACL(dir))
+ attr.ia_mode &= ~current->fs->umask;
+ } else {
+ attr.ia_valid = 0;
+- BUG_ON(nd->intent.open.flags & O_CREAT);
++ BUG_ON(nd->intent.it_flags & O_CREAT);
+ }
+
+ cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
++ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state))
+ return (struct inode *)state;
+Index: linux-2.6.4-51.0/fs/cifs/dir.c
+===================================================================
+--- linux-2.6.4-51.0.orig/fs/cifs/dir.c 2004-04-05 12:41:59.000000000 -0400
++++ linux-2.6.4-51.0/fs/cifs/dir.c 2004-04-05 17:13:47.000000000 -0400
+@@ -146,18 +146,18 @@
+ if(nd) {
+ cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path));
+
+- if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY)
++ if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY)
+ desiredAccess = GENERIC_READ;
+- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY)
++ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY)
+ desiredAccess = GENERIC_WRITE;
+- else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR)
++ else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR)
+ desiredAccess = GENERIC_ALL;
+
+- if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
++ if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+ disposition = FILE_CREATE;
+- else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
++ else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+ disposition = FILE_OVERWRITE_IF;
+- else if((nd->intent.open.flags & O_CREAT) == O_CREAT)
++ else if((nd->intent.it_flags & O_CREAT) == O_CREAT)
+ disposition = FILE_OPEN_IF;
+ else {
+ cFYI(1,("Create flag not set in create function"));
+@@ -319,7 +319,7 @@
+ parent_dir_inode, direntry->d_name.name, direntry));
+
+ if(nd) { /* BB removeme */
+- cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.open.flags));
++ cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.it_flags));
+ } /* BB removeme BB */
+ /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */
+
--- /dev/null
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+# 2004/04/12 13:09:10-07:00 akpm@osdl.org
+# [PATCH] O_DIRECT data exposure fixes
+#
+# From: Badari Pulavarty, Suparna Bhattacharya, Andrew Morton
+#
+# Forward port of Stephen Tweedie's DIO fixes from 2.4, to fix various DIO vs
+# buffered IO exposures involving races causing:
+#
+# (a) stale data from uninstantiated blocks to be read, e.g.
+#
+# - O_DIRECT reads against buffered writes to a sparse region
+#
+# - O_DIRECT writes to a sparse region against buffered reads
+#
+# (b) potential data corruption with
+#
+# - O_DIRECT IOs against truncate
+#
+# due to writes to truncated blocks (which may have been reallocated to
+# another file).
+#
+# Summary of fixes:
+#
+# 1) All the changes affect only regular files. RAW/O_DIRECT on block are
+# unaffected.
+#
+# 2) The DIO code will not fill in sparse regions on a write. Instead
+# -ENOTBLK is returned and the generic file write code would fallthrough to
+# buffered IO in this case followed by writing through the pages to disk
+# using filemap_fdatawrite/wait.
+#
+# 3) i_sem is held during both DIO reads and writes. For reads, and writes
+# to already allocated blocks, it is released right after IO is issued,
+# while for writes to newly allocated blocks (e.g file extending writes and
+# hole overwrites) it is held all the way through until IO completes (and
+# data is committed to disk).
+#
+# 4) filemap_fdatawrite/wait are called under i_sem to synchronize buffered
+# pages to disk blocks before issuing DIO.
+#
+# 5) A new rwsem (i_alloc_sem) is held in shared mode all the while a DIO
+# (read or write) is in progress, and in exclusive mode by truncate to guard
+# against deallocation of data blocks during DIO.
+#
+# 6) All this new locking has been pushed down into blockdev_direct_IO to
+# avoid interfering with NFS direct IO. The locks are taken in the order
+# i_sem followed by i_alloc_sem. While i_sem may be released after IO
+# submission in some cases, i_alloc_sem is held through until dio_complete
+# (in the case of AIO-DIO this happens through the IO completion callback).
+#
+# 7) i_sem and i_alloc_sem are not held for the _nolock versions of write
+# routines, as used by blockdev and XFS. Filesystems can specify the
+# needs_special_locking parameter to __blockdev_direct_IO from their direct
+# IO address space op accordingly.
+#
+# Note from Badari:
+# Here is the locking (when needs_special_locking is true):
+#
+# (1) generic_file_*_write() holds i_sem (as before) and calls
+# ->direct_IO(). blockdev_direct_IO gets i_alloc_sem and call
+# direct_io_worker().
+#
+# (2) generic_file_*_read() does not hold any locks. blockdev_direct_IO()
+# gets i_sem and then i_alloc_sem and calls direct_io_worker() to do the
+# work
+#
+# (3) direct_io_worker() does the work and drops i_sem after submitting IOs
+# if appropriate and drops i_alloc_sem after completing IOs.
+#
+# fs/direct-io.c
+# 2004/04/12 10:54:33-07:00 akpm@osdl.org +80 -13
+# O_DIRECT data exposure fixes
+#
+# fs/inode.c
+# 2004/04/12 10:54:33-07:00 akpm@osdl.org +1 -0
+# O_DIRECT data exposure fixes
+#
+# fs/open.c
+# 2004/04/12 10:54:33-07:00 akpm@osdl.org +2 -0
+# O_DIRECT data exposure fixes
+#
+# fs/xfs/linux/xfs_aops.c
+# 2004/04/12 10:54:33-07:00 akpm@osdl.org +2 -1
+# O_DIRECT data exposure fixes
+#
+# include/linux/fs.h
+# 2004/04/12 10:54:33-07:00 akpm@osdl.org +28 -3
+# O_DIRECT data exposure fixes
+#
+# mm/filemap.c
+# 2004/04/12 10:54:33-07:00 akpm@osdl.org +41 -12
+# O_DIRECT data exposure fixes
+#
+diff -Nru a/fs/direct-io.c b/fs/direct-io.c
+--- a/fs/direct-io.c Mon May 3 16:20:32 2004
++++ b/fs/direct-io.c Mon May 3 16:20:32 2004
+@@ -52,6 +52,10 @@
+ *
+ * If blkfactor is zero then the user's request was aligned to the filesystem's
+ * blocksize.
++ *
++ * needs_locking is set for regular files on direct-IO-naive filesystems. It
++ * determines whether we need to do the fancy locking which prevents direct-IO
++ * from being able to read uninitialised disk blocks.
+ */
+
+ struct dio {
+@@ -59,6 +63,7 @@
+ struct bio *bio; /* bio under assembly */
+ struct inode *inode;
+ int rw;
++ int needs_locking; /* doesn't change */
+ unsigned blkbits; /* doesn't change */
+ unsigned blkfactor; /* When we're using an alignment which
+ is finer than the filesystem's soft
+@@ -206,6 +211,8 @@
+ {
+ if (dio->end_io)
+ dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
++ if (dio->needs_locking)
++ up_read(&dio->inode->i_alloc_sem);
+ }
+
+ /*
+@@ -449,6 +456,7 @@
+ unsigned long fs_count; /* Number of filesystem-sized blocks */
+ unsigned long dio_count;/* Number of dio_block-sized blocks */
+ unsigned long blkmask;
++ int beyond_eof = 0;
+
+ /*
+ * If there was a memory error and we've overwritten all the
+@@ -466,8 +474,19 @@
+ if (dio_count & blkmask)
+ fs_count++;
+
++ if (dio->needs_locking) {
++ if (dio->block_in_file >= (i_size_read(dio->inode) >>
++ dio->blkbits))
++ beyond_eof = 1;
++ }
++ /*
++ * For writes inside i_size we forbid block creations: only
++ * overwrites are permitted. We fall back to buffered writes
++ * at a higher level for inside-i_size block-instantiating
++ * writes.
++ */
+ ret = (*dio->get_blocks)(dio->inode, fs_startblk, fs_count,
+- map_bh, dio->rw == WRITE);
++ map_bh, (dio->rw == WRITE) && beyond_eof);
+ }
+ return ret;
+ }
+@@ -774,6 +793,10 @@
+ if (!buffer_mapped(map_bh)) {
+ char *kaddr;
+
++ /* AKPM: eargh, -ENOTBLK is a hack */
++ if (dio->rw == WRITE)
++ return -ENOTBLK;
++
+ if (dio->block_in_file >=
+ i_size_read(dio->inode)>>blkbits) {
+ /* We hit eof */
+@@ -839,21 +862,21 @@
+ return ret;
+ }
+
++/*
++ * Releases both i_sem and i_alloc_sem
++ */
+ static int
+ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
+ const struct iovec *iov, loff_t offset, unsigned long nr_segs,
+- unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io)
++ unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io,
++ struct dio *dio)
+ {
+ unsigned long user_addr;
+ int seg;
+ int ret = 0;
+ int ret2;
+- struct dio *dio;
+ size_t bytes;
+
+- dio = kmalloc(sizeof(*dio), GFP_KERNEL);
+- if (!dio)
+- return -ENOMEM;
+ dio->is_async = !is_sync_kiocb(iocb);
+
+ dio->bio = NULL;
+@@ -864,7 +887,6 @@
+ dio->start_zero_done = 0;
+ dio->block_in_file = offset >> blkbits;
+ dio->blocks_available = 0;
+-
+ dio->cur_page = NULL;
+
+ dio->boundary = 0;
+@@ -953,6 +975,13 @@
+ dio_cleanup(dio);
+
+ /*
++ * All new block allocations have been performed. We can let i_sem
++ * go now.
++ */
++ if (dio->needs_locking)
++ up(&dio->inode->i_sem);
++
++ /*
+ * OK, all BIOs are submitted, so we can decrement bio_count to truly
+ * reflect the number of to-be-processed BIOs.
+ */
+@@ -987,11 +1016,17 @@
+
+ /*
+ * This is a library function for use by filesystem drivers.
++ *
++ * For writes to S_ISREG files, we are called under i_sem and return with i_sem
++ * held, even though it is internally dropped.
++ *
++ * For writes to S_ISBLK files, i_sem is not held on entry; it is never taken.
+ */
+ int
+-blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
++__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+ struct block_device *bdev, const struct iovec *iov, loff_t offset,
+- unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io)
++ unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io,
++ int needs_special_locking)
+ {
+ int seg;
+ size_t size;
+@@ -1000,6 +1035,8 @@
+ unsigned bdev_blkbits = 0;
+ unsigned blocksize_mask = (1 << blkbits) - 1;
+ ssize_t retval = -EINVAL;
++ struct dio *dio;
++ int needs_locking;
+
+ if (bdev)
+ bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
+@@ -1025,10 +1062,40 @@
+ }
+ }
+
+- retval = direct_io_worker(rw, iocb, inode, iov, offset,
+- nr_segs, blkbits, get_blocks, end_io);
++ dio = kmalloc(sizeof(*dio), GFP_KERNEL);
++ retval = -ENOMEM;
++ if (!dio)
++ goto out;
++
++ /*
++ * For regular files,
++ * readers need to grab i_sem and i_alloc_sem
++ * writers need to grab i_alloc_sem only (i_sem is already held)
++ */
++ needs_locking = 0;
++ if (S_ISREG(inode->i_mode) && needs_special_locking) {
++ needs_locking = 1;
++ if (rw == READ) {
++ struct address_space *mapping;
++
++ mapping = iocb->ki_filp->f_mapping;
++ down(&inode->i_sem);
++ retval = filemap_write_and_wait(mapping);
++ if (retval) {
++ up(&inode->i_sem);
++ kfree(dio);
++ goto out;
++ }
++ }
++ down_read(&inode->i_alloc_sem);
++ }
++ dio->needs_locking = needs_locking;
++
++ retval = direct_io_worker(rw, iocb, inode, iov, offset,
++ nr_segs, blkbits, get_blocks, end_io, dio);
++ if (needs_locking && rw == WRITE)
++ down(&inode->i_sem);
+ out:
+ return retval;
+ }
+-
+-EXPORT_SYMBOL(blockdev_direct_IO);
++EXPORT_SYMBOL(__blockdev_direct_IO);
+diff -Nru a/fs/inode.c b/fs/inode.c
+--- a/fs/inode.c Mon May 3 16:20:32 2004
++++ b/fs/inode.c Mon May 3 16:20:32 2004
+@@ -185,6 +185,7 @@
+ INIT_LIST_HEAD(&inode->i_dentry);
+ INIT_LIST_HEAD(&inode->i_devices);
+ sema_init(&inode->i_sem, 1);
++ init_rwsem(&inode->i_alloc_sem);
+ INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
+ spin_lock_init(&inode->i_data.page_lock);
+ init_MUTEX(&inode->i_data.i_shared_sem);
+diff -Nru a/fs/open.c b/fs/open.c
+--- a/fs/open.c Mon May 3 16:20:32 2004
++++ b/fs/open.c Mon May 3 16:20:32 2004
+@@ -192,7 +192,9 @@
+ newattrs.ia_size = length;
+ newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+ down(&dentry->d_inode->i_sem);
++ down_write(&dentry->d_inode->i_alloc_sem);
+ err = notify_change(dentry, &newattrs);
++ up_write(&dentry->d_inode->i_alloc_sem);
+ up(&dentry->d_inode->i_sem);
+ return err;
+ }
+diff -Nru a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c
+--- a/fs/xfs/linux/xfs_aops.c Mon May 3 16:20:32 2004
++++ b/fs/xfs/linux/xfs_aops.c Mon May 3 16:20:32 2004
+@@ -1032,7 +1032,8 @@
+ if (error)
+ return -error;
+
+- return blockdev_direct_IO(rw, iocb, inode, iomap.iomap_target->pbr_bdev,
++ return blockdev_direct_IO_no_locking(rw, iocb, inode,
++ iomap.iomap_target->pbr_bdev,
+ iov, offset, nr_segs,
+ linvfs_get_blocks_direct,
+ linvfs_unwritten_convert_direct);
+diff -Nru a/include/linux/fs.h b/include/linux/fs.h
+--- a/include/linux/fs.h Mon May 3 16:20:32 2004
++++ b/include/linux/fs.h Mon May 3 16:20:32 2004
+@@ -397,6 +397,7 @@
+ unsigned short i_bytes;
+ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
+ struct semaphore i_sem;
++ struct rw_semaphore i_alloc_sem;
+ struct inode_operations *i_op;
+ struct file_operations *i_fop; /* former ->i_op->default_file_ops */
+ struct super_block *i_sb;
+@@ -1235,6 +1236,7 @@
+ extern int filemap_fdatawrite(struct address_space *);
+ extern int filemap_flush(struct address_space *);
+ extern int filemap_fdatawait(struct address_space *);
++extern int filemap_write_and_wait(struct address_space *mapping);
+ extern void sync_supers(void);
+ extern void sync_filesystems(int wait);
+ extern void emergency_sync(void);
+@@ -1347,9 +1349,6 @@
+ file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
+ extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb,
+ const struct iovec *iov, loff_t offset, unsigned long nr_segs);
+-extern int blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+- struct block_device *bdev, const struct iovec *iov, loff_t offset,
+- unsigned long nr_segs, get_blocks_t *get_blocks, dio_iodone_t *end_io);
+ extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos);
+ ssize_t generic_file_writev(struct file *filp, const struct iovec *iov,
+@@ -1369,6 +1368,32 @@
+ ppos,
+ desc,
+ actor);
++}
++
++int __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
++ struct block_device *bdev, const struct iovec *iov, loff_t offset,
++ unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io,
++ int needs_special_locking);
++
++/*
++ * For filesystems which need locking between buffered and direct access
++ */
++static inline int blockdev_direct_IO(int rw, struct kiocb *iocb,
++ struct inode *inode, struct block_device *bdev, const struct iovec *iov,
++ loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks,
++ dio_iodone_t end_io)
++{
++ return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
++ nr_segs, get_blocks, end_io, 1);
++}
++
++static inline int blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
++ struct inode *inode, struct block_device *bdev, const struct iovec *iov,
++ loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks,
++ dio_iodone_t end_io)
++{
++ return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
++ nr_segs, get_blocks, end_io, 0);
+ }
+
+ extern struct file_operations generic_ro_fops;
+diff -Nru a/mm/filemap.c b/mm/filemap.c
+--- a/mm/filemap.c Mon May 3 16:20:32 2004
++++ b/mm/filemap.c Mon May 3 16:20:32 2004
+@@ -73,6 +73,9 @@
+ * ->mmap_sem
+ * ->i_sem (msync)
+ *
++ * ->i_sem
++ * ->i_alloc_sem (various)
++ *
+ * ->inode_lock
+ * ->sb_lock (fs/fs-writeback.c)
+ * ->mapping->page_lock (__sync_single_inode)
+@@ -228,6 +231,18 @@
+
+ EXPORT_SYMBOL(filemap_fdatawait);
+
++int filemap_write_and_wait(struct address_space *mapping)
++{
++ int retval = 0;
++
++ if (mapping->nrpages) {
++ retval = filemap_fdatawrite(mapping);
++ if (retval == 0)
++ retval = filemap_fdatawait(mapping);
++ }
++ return retval;
++}
++
+ /*
+ * This adds a page to the page cache, starting out as locked, unreferenced,
+ * not uptodate and with no errors.
+@@ -1716,6 +1731,7 @@
+
+ /*
+ * Write to a file through the page cache.
++ * Called under i_sem for S_ISREG files.
+ *
+ * We put everything into the page cache prior to writing it. This is not a
+ * problem when writing full pages. With partial pages, however, we first have
+@@ -1806,12 +1822,19 @@
+ /*
+ * Sync the fs metadata but not the minor inode changes and
+ * of course not the data as we did direct DMA for the IO.
++ * i_sem is held, which protects generic_osync_inode() from
++ * livelocking.
+ */
+ if (written >= 0 && file->f_flags & O_SYNC)
+ status = generic_osync_inode(inode, mapping, OSYNC_METADATA);
+ if (written >= 0 && !is_sync_kiocb(iocb))
+ written = -EIOCBQUEUED;
+- goto out_status;
++ if (written != -ENOTBLK)
++ goto out_status;
++ /*
++ * direct-io write to a hole: fall through to buffered I/O
++ */
++ written = 0;
+ }
+
+ buf = iov->iov_base;
+@@ -1900,6 +1923,14 @@
+ OSYNC_METADATA|OSYNC_DATA);
+ }
+
++ /*
++ * If we get here for O_DIRECT writes then we must have fallen through
++ * to buffered writes (block instantiation inside i_size). So we sync
++ * the file data here, to try to honour O_DIRECT expectations.
++ */
++ if (unlikely(file->f_flags & O_DIRECT) && written)
++ status = filemap_write_and_wait(mapping);
++
+ out_status:
+ err = written ? written : status;
+ out:
+@@ -1991,6 +2022,9 @@
+
+ EXPORT_SYMBOL(generic_file_writev);
+
++/*
++ * Called under i_sem for writes to S_ISREG files
++ */
+ ssize_t
+ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t offset, unsigned long nr_segs)
+@@ -1999,18 +2033,13 @@
+ struct address_space *mapping = file->f_mapping;
+ ssize_t retval;
+
+- if (mapping->nrpages) {
+- retval = filemap_fdatawrite(mapping);
+- if (retval == 0)
+- retval = filemap_fdatawait(mapping);
+- if (retval)
+- goto out;
++ retval = filemap_write_and_wait(mapping);
++ if (retval == 0) {
++ retval = mapping->a_ops->direct_IO(rw, iocb, iov,
++ offset, nr_segs);
++ if (rw == WRITE && mapping->nrpages)
++ invalidate_inode_pages2(mapping);
+ }
+-
+- retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs);
+- if (rw == WRITE && mapping->nrpages)
+- invalidate_inode_pages2(mapping);
+-out:
+ return retval;
+ }
+