Whamcloud - gitweb
Adding patch set for vanilla 2.6.5, had to port over one of the suse patched from...
authorefelix <efelix>
Wed, 5 May 2004 15:37:42 +0000 (15:37 +0000)
committerefelix <efelix>
Wed, 5 May 2004 15:37:42 +0000 (15:37 +0000)
lustre/kernel_patches/patches/cifs_2.6.5.patch [new file with mode: 0644]
lustre/kernel_patches/patches/nfs-cifs-intent-vanilla-2.6.patch [new file with mode: 0644]
lustre/kernel_patches/patches/o_direct-2.6.5.patch [new file with mode: 0644]
lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch [new file with mode: 0644]
lustre/kernel_patches/series/vanilla-2.6.5 [new file with mode: 0644]

diff --git a/lustre/kernel_patches/patches/cifs_2.6.5.patch b/lustre/kernel_patches/patches/cifs_2.6.5.patch
new file mode 100644 (file)
index 0000000..35db137
--- /dev/null
@@ -0,0 +1,65 @@
+#EJF i took out part of this change set
+
+
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+#   2004/01/30 17:20:19-06:00 stevef@stevef95.austin.ibm.com 
+#   Relax requested CIFS permissions on open to simply request GENERIC_READ and GENERIC_WRITE (instead of GENERIC_ALL which
+#   can unnecessarily conflict with share permissions by asking implicitly for take ownership and other unneeded flags)
+# 
+# fs/cifs/dir.c
+#   2004/01/30 17:20:13-06:00 stevef@stevef95.austin.ibm.com +7 -3
+#   Relax requested CIFS permissions on open to simply request GENERIC_READ and GENERIC_WRITE (instead of GENERIC_ALL which
+#   can unnecessarily conflict with share permissions by asking implicitly for take ownership and other unneeded flags)
+# 
+# fs/cifs/file.c
+#   2004/01/30 17:20:13-06:00 stevef@stevef95.austin.ibm.com +12 -4
+#   Relax requested CIFS permissions on open to simply request GENERIC_READ and GENERIC_WRITE (instead of GENERIC_ALL which
+#   can unnecessarily conflict with share permissions by asking implicitly for take ownership and other unneeded flags)
+# 
+diff -Nru a/fs/cifs/dir.c b/fs/cifs/dir.c
+--- a/fs/cifs/dir.c    Mon May  3 16:32:08 2004
++++ b/fs/cifs/dir.c    Mon May  3 16:32:08 2004
+@@ -125,7 +125,7 @@
+       int rc = -ENOENT;
+       int xid;
+       int oplock = 0;
+-      int desiredAccess = GENERIC_ALL;
++      int desiredAccess = GENERIC_READ | GENERIC_WRITE;
+       __u16 fileHandle;
+       struct cifs_sb_info *cifs_sb;
+       struct cifsTconInfo *pTcon;
+@@ -150,8 +150,12 @@
+                       desiredAccess = GENERIC_READ;
+               else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY)
+                       desiredAccess = GENERIC_WRITE;
+-              else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR)
+-                      desiredAccess = GENERIC_ALL;
++              else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) {
++                      /* GENERIC_ALL is too much permission to request */
++                      /* can cause unnecessary access denied on create */
++                      /* desiredAccess = GENERIC_ALL; */
++                      desiredAccess = GENERIC_READ | GENERIC_WRITE;
++              }
+               if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+                       disposition = FILE_CREATE;
+diff -Nru a/fs/cifs/file.c b/fs/cifs/file.c
+--- a/fs/cifs/file.c   Mon May  3 16:32:08 2004
++++ b/fs/cifs/file.c   Mon May  3 16:32:08 2004
+@@ -87,8 +87,12 @@
+               desiredAccess = GENERIC_READ;
+       else if ((file->f_flags & O_ACCMODE) == O_WRONLY)
+               desiredAccess = GENERIC_WRITE;
+-      else if ((file->f_flags & O_ACCMODE) == O_RDWR)
+-              desiredAccess = GENERIC_ALL;
++      else if ((file->f_flags & O_ACCMODE) == O_RDWR) {
++              /* GENERIC_ALL is too much permission to request */
++              /* can cause unnecessary access denied on create */
++              /* desiredAccess = GENERIC_ALL; */
++              desiredAccess = GENERIC_READ | GENERIC_WRITE;
++      }
+ /*********************************************************************
+  *  open flag mapping table:
diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-vanilla-2.6.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-vanilla-2.6.patch
new file mode 100644 (file)
index 0000000..5bcd4f0
--- /dev/null
@@ -0,0 +1,116 @@
+.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
+.new.........fs/nfs/dir.c
+.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
+.new.........fs/nfs/dir.c
+Index: linux-2.6.4-51.0/fs/nfs/dir.c
+===================================================================
+--- linux-2.6.4-51.0.orig/fs/nfs/dir.c 2004-04-05 17:09:16.000000000 -0400
++++ linux-2.6.4-51.0/fs/nfs/dir.c      2004-04-05 17:09:23.000000000 -0400
+@@ -782,7 +782,7 @@
+       if (nd->flags & LOOKUP_DIRECTORY)
+               return 0;
+       /* Are we trying to write to a read only partition? */
+-      if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
++      if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
+               return 0;
+       return 1;
+ }
+@@ -803,7 +803,7 @@
+       dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+       /* Let vfs_create() deal with O_EXCL */
+-      if (nd->intent.open.flags & O_EXCL)
++      if (nd->intent.it_flags & O_EXCL)
+               goto no_entry;
+       /* Open the file on the server */
+@@ -811,7 +811,7 @@
+       /* Revalidate parent directory attribute cache */
+       nfs_revalidate_inode(NFS_SERVER(dir), dir);
+-      if (nd->intent.open.flags & O_CREAT) {
++      if (nd->intent.it_flags & O_CREAT) {
+               nfs_begin_data_update(dir);
+               inode = nfs4_atomic_open(dir, dentry, nd);
+               nfs_end_data_update(dir);
+@@ -827,7 +827,7 @@
+                               break;
+                       /* This turned out not to be a regular file */
+                       case -ELOOP:
+-                              if (!(nd->intent.open.flags & O_NOFOLLOW))
++                              if (!(nd->intent.it_flags & O_NOFOLLOW))
+                                       goto no_open;
+                       /* case -EISDIR: */
+                       /* case -EINVAL: */
+@@ -861,7 +861,7 @@
+       dir = parent->d_inode;
+       if (!is_atomic_open(dir, nd))
+               goto no_open;
+-      openflags = nd->intent.open.flags;
++      openflags = nd->intent.it_flags;
+       if (openflags & O_CREAT) {
+               /* If this is a negative dentry, just drop it */
+               if (!inode)
+Index: linux-2.6.4-51.0/fs/nfs/nfs4proc.c
+===================================================================
+--- linux-2.6.4-51.0.orig/fs/nfs/nfs4proc.c    2004-04-05 12:41:59.000000000 -0400
++++ linux-2.6.4-51.0/fs/nfs/nfs4proc.c 2004-04-05 17:09:23.000000000 -0400
+@@ -792,17 +792,17 @@
+       struct nfs4_state *state;
+       if (nd->flags & LOOKUP_CREATE) {
+-              attr.ia_mode = nd->intent.open.create_mode;
++              attr.ia_mode = nd->intent.it_create_mode;
+               attr.ia_valid = ATTR_MODE;
+               if (!IS_POSIXACL(dir))
+                       attr.ia_mode &= ~current->fs->umask;
+       } else {
+               attr.ia_valid = 0;
+-              BUG_ON(nd->intent.open.flags & O_CREAT);
++              BUG_ON(nd->intent.it_flags & O_CREAT);
+       }
+       cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+-      state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
++      state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred);
+       put_rpccred(cred);
+       if (IS_ERR(state))
+               return (struct inode *)state;
+Index: linux-2.6.4-51.0/fs/cifs/dir.c
+===================================================================
+--- linux-2.6.4-51.0.orig/fs/cifs/dir.c        2004-04-05 12:41:59.000000000 -0400
++++ linux-2.6.4-51.0/fs/cifs/dir.c     2004-04-05 17:13:47.000000000 -0400
+@@ -146,18 +146,18 @@
+       if(nd) { 
+               cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path));
+-              if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY)
++              if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY)
+                       desiredAccess = GENERIC_READ;
+-              else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY)
++              else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY)
+                       desiredAccess = GENERIC_WRITE;
+-              else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR)
++              else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR)
+                       desiredAccess = GENERIC_ALL;
+-              if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
++              if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+                       disposition = FILE_CREATE;
+-              else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
++              else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+                       disposition = FILE_OVERWRITE_IF;
+-              else if((nd->intent.open.flags & O_CREAT) == O_CREAT)
++              else if((nd->intent.it_flags & O_CREAT) == O_CREAT)
+                       disposition = FILE_OPEN_IF;
+               else {
+                       cFYI(1,("Create flag not set in create function"));
+@@ -319,7 +319,7 @@
+             parent_dir_inode, direntry->d_name.name, direntry));
+       if(nd) {  /* BB removeme */
+-              cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.open.flags));
++              cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.it_flags));
+       } /* BB removeme BB */
+       /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */
diff --git a/lustre/kernel_patches/patches/o_direct-2.6.5.patch b/lustre/kernel_patches/patches/o_direct-2.6.5.patch
new file mode 100644 (file)
index 0000000..76a9399
--- /dev/null
@@ -0,0 +1,500 @@
+# This is a BitKeeper generated diff -Nru style patch.
+#
+# ChangeSet
+#   2004/04/12 13:09:10-07:00 akpm@osdl.org 
+#   [PATCH] O_DIRECT data exposure fixes
+#   
+#   From: Badari Pulavarty, Suparna Bhattacharya, Andrew Morton
+#   
+#   Forward port of Stephen Tweedie's DIO fixes from 2.4, to fix various DIO vs
+#   buffered IO exposures involving races causing:
+#   
+#   (a) stale data from uninstantiated blocks to be read, e.g.
+#   
+#       - O_DIRECT reads against buffered writes to a sparse region
+#   
+#       - O_DIRECT writes to a sparse region against buffered reads
+#   
+#   (b) potential data corruption with
+#   
+#       - O_DIRECT IOs against truncate
+#   
+#       due to writes to truncated blocks (which may have been reallocated to
+#       another file).
+#   
+#   Summary of fixes:
+#   
+#   1) All the changes affect only regular files.  RAW/O_DIRECT on block are
+#      unaffected. 
+#   
+#   2) The DIO code will not fill in sparse regions on a write.  Instead
+#      -ENOTBLK is returned and the generic file write code would fallthrough to
+#      buffered IO in this case followed by writing through the pages to disk
+#      using filemap_fdatawrite/wait.
+#   
+#   3) i_sem is held during both DIO reads and writes.  For reads, and writes
+#      to already allocated blocks, it is released right after IO is issued,
+#      while for writes to newly allocated blocks (e.g file extending writes and
+#      hole overwrites) it is held all the way through until IO completes (and
+#      data is committed to disk).
+#   
+#   4) filemap_fdatawrite/wait are called under i_sem to synchronize buffered
+#      pages to disk blocks before issuing DIO.
+#   
+#   5) A new rwsem (i_alloc_sem) is held in shared mode all the while a DIO
+#      (read or write) is in progress, and in exclusive mode by truncate to guard
+#      against deallocation of data blocks during DIO. 
+#   
+#   6) All this new locking has been pushed down into blockdev_direct_IO to
+#      avoid interfering with NFS direct IO.  The locks are taken in the order
+#      i_sem followed by i_alloc_sem.  While i_sem may be released after IO
+#      submission in some cases, i_alloc_sem is held through until dio_complete
+#      (in the case of AIO-DIO this happens through the IO completion callback).
+#   
+#   7) i_sem and i_alloc_sem are not held for the _nolock versions of write
+#      routines, as used by blockdev and XFS.  Filesystems can specify the
+#      needs_special_locking parameter to __blockdev_direct_IO from their direct
+#      IO address space op accordingly.
+#   
+#   Note from Badari:
+#   Here is the locking (when needs_special_locking is true):
+#   
+#   (1) generic_file_*_write() holds i_sem (as before) and calls
+#       ->direct_IO().  blockdev_direct_IO gets i_alloc_sem and call
+#       direct_io_worker().
+#   
+#   (2) generic_file_*_read() does not hold any locks.  blockdev_direct_IO()
+#       gets i_sem and then i_alloc_sem and calls direct_io_worker() to do the
+#       work
+#   
+#   (3) direct_io_worker() does the work and drops i_sem after submitting IOs
+#       if appropriate and drops i_alloc_sem after completing IOs.
+# 
+# fs/direct-io.c
+#   2004/04/12 10:54:33-07:00 akpm@osdl.org +80 -13
+#   O_DIRECT data exposure fixes
+# 
+# fs/inode.c
+#   2004/04/12 10:54:33-07:00 akpm@osdl.org +1 -0
+#   O_DIRECT data exposure fixes
+# 
+# fs/open.c
+#   2004/04/12 10:54:33-07:00 akpm@osdl.org +2 -0
+#   O_DIRECT data exposure fixes
+# 
+# fs/xfs/linux/xfs_aops.c
+#   2004/04/12 10:54:33-07:00 akpm@osdl.org +2 -1
+#   O_DIRECT data exposure fixes
+# 
+# include/linux/fs.h
+#   2004/04/12 10:54:33-07:00 akpm@osdl.org +28 -3
+#   O_DIRECT data exposure fixes
+# 
+# mm/filemap.c
+#   2004/04/12 10:54:33-07:00 akpm@osdl.org +41 -12
+#   O_DIRECT data exposure fixes
+# 
+diff -Nru a/fs/direct-io.c b/fs/direct-io.c
+--- a/fs/direct-io.c   Mon May  3 16:20:32 2004
++++ b/fs/direct-io.c   Mon May  3 16:20:32 2004
+@@ -52,6 +52,10 @@
+  *
+  * If blkfactor is zero then the user's request was aligned to the filesystem's
+  * blocksize.
++ *
++ * needs_locking is set for regular files on direct-IO-naive filesystems.  It
++ * determines whether we need to do the fancy locking which prevents direct-IO
++ * from being able to read uninitialised disk blocks.
+  */
+ struct dio {
+@@ -59,6 +63,7 @@
+       struct bio *bio;                /* bio under assembly */
+       struct inode *inode;
+       int rw;
++      int needs_locking;              /* doesn't change */
+       unsigned blkbits;               /* doesn't change */
+       unsigned blkfactor;             /* When we're using an alignment which
+                                          is finer than the filesystem's soft
+@@ -206,6 +211,8 @@
+ {
+       if (dio->end_io)
+               dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
++      if (dio->needs_locking)
++              up_read(&dio->inode->i_alloc_sem);
+ }
+ /*
+@@ -449,6 +456,7 @@
+       unsigned long fs_count; /* Number of filesystem-sized blocks */
+       unsigned long dio_count;/* Number of dio_block-sized blocks */
+       unsigned long blkmask;
++      int beyond_eof = 0;
+       /*
+        * If there was a memory error and we've overwritten all the
+@@ -466,8 +474,19 @@
+               if (dio_count & blkmask)        
+                       fs_count++;
++              if (dio->needs_locking) {
++                      if (dio->block_in_file >= (i_size_read(dio->inode) >>
++                                                      dio->blkbits))
++                              beyond_eof = 1;
++              }
++              /*
++               * For writes inside i_size we forbid block creations: only
++               * overwrites are permitted.  We fall back to buffered writes
++               * at a higher level for inside-i_size block-instantiating
++               * writes.
++               */
+               ret = (*dio->get_blocks)(dio->inode, fs_startblk, fs_count,
+-                              map_bh, dio->rw == WRITE);
++                              map_bh, (dio->rw == WRITE) && beyond_eof);
+       }
+       return ret;
+ }
+@@ -774,6 +793,10 @@
+                       if (!buffer_mapped(map_bh)) {
+                               char *kaddr;
++                              /* AKPM: eargh, -ENOTBLK is a hack */
++                              if (dio->rw == WRITE)
++                                      return -ENOTBLK;
++
+                               if (dio->block_in_file >=
+                                       i_size_read(dio->inode)>>blkbits) {
+                                       /* We hit eof */
+@@ -839,21 +862,21 @@
+       return ret;
+ }
++/*
++ * Releases both i_sem and i_alloc_sem
++ */
+ static int
+ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 
+       const struct iovec *iov, loff_t offset, unsigned long nr_segs, 
+-      unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io)
++      unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io,
++      struct dio *dio)
+ {
+       unsigned long user_addr; 
+       int seg;
+       int ret = 0;
+       int ret2;
+-      struct dio *dio;
+       size_t bytes;
+-      dio = kmalloc(sizeof(*dio), GFP_KERNEL);
+-      if (!dio)
+-              return -ENOMEM;
+       dio->is_async = !is_sync_kiocb(iocb);
+       dio->bio = NULL;
+@@ -864,7 +887,6 @@
+       dio->start_zero_done = 0;
+       dio->block_in_file = offset >> blkbits;
+       dio->blocks_available = 0;
+-
+       dio->cur_page = NULL;
+       dio->boundary = 0;
+@@ -953,6 +975,13 @@
+       dio_cleanup(dio);
+       /*
++       * All new block allocations have been performed.  We can let i_sem
++       * go now.
++       */
++      if (dio->needs_locking)
++              up(&dio->inode->i_sem);
++
++      /*
+        * OK, all BIOs are submitted, so we can decrement bio_count to truly
+        * reflect the number of to-be-processed BIOs.
+        */
+@@ -987,11 +1016,17 @@
+ /*
+  * This is a library function for use by filesystem drivers.
++ *
++ * For writes to S_ISREG files, we are called under i_sem and return with i_sem
++ * held, even though it is internally dropped.
++ *
++ * For writes to S_ISBLK files, i_sem is not held on entry; it is never taken.
+  */
+ int
+-blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 
++__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+       struct block_device *bdev, const struct iovec *iov, loff_t offset, 
+-      unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io)
++      unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io,
++      int needs_special_locking)
+ {
+       int seg;
+       size_t size;
+@@ -1000,6 +1035,8 @@
+       unsigned bdev_blkbits = 0;
+       unsigned blocksize_mask = (1 << blkbits) - 1;
+       ssize_t retval = -EINVAL;
++      struct dio *dio;
++      int needs_locking;
+       if (bdev)
+               bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
+@@ -1025,10 +1062,40 @@
+               }
+       }
+-      retval = direct_io_worker(rw, iocb, inode, iov, offset, 
+-                              nr_segs, blkbits, get_blocks, end_io);
++      dio = kmalloc(sizeof(*dio), GFP_KERNEL);
++      retval = -ENOMEM;
++      if (!dio)
++              goto out;
++
++      /*
++       * For regular files,
++       *      readers need to grab i_sem and i_alloc_sem
++       *      writers need to grab i_alloc_sem only (i_sem is already held)
++       */
++      needs_locking = 0;
++      if (S_ISREG(inode->i_mode) && needs_special_locking) {
++              needs_locking = 1;
++              if (rw == READ) {
++                      struct address_space *mapping;
++
++                      mapping = iocb->ki_filp->f_mapping;
++                      down(&inode->i_sem);
++                      retval = filemap_write_and_wait(mapping);
++                      if (retval) {
++                              up(&inode->i_sem);
++                              kfree(dio);
++                              goto out;
++                      }
++              }
++              down_read(&inode->i_alloc_sem);
++      }
++      dio->needs_locking = needs_locking;
++
++      retval = direct_io_worker(rw, iocb, inode, iov, offset,
++                              nr_segs, blkbits, get_blocks, end_io, dio);
++      if (needs_locking && rw == WRITE)
++              down(&inode->i_sem);
+ out:
+       return retval;
+ }
+-
+-EXPORT_SYMBOL(blockdev_direct_IO);
++EXPORT_SYMBOL(__blockdev_direct_IO);
+diff -Nru a/fs/inode.c b/fs/inode.c
+--- a/fs/inode.c       Mon May  3 16:20:32 2004
++++ b/fs/inode.c       Mon May  3 16:20:32 2004
+@@ -185,6 +185,7 @@
+       INIT_LIST_HEAD(&inode->i_dentry);
+       INIT_LIST_HEAD(&inode->i_devices);
+       sema_init(&inode->i_sem, 1);
++      init_rwsem(&inode->i_alloc_sem);
+       INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
+       spin_lock_init(&inode->i_data.page_lock);
+       init_MUTEX(&inode->i_data.i_shared_sem);
+diff -Nru a/fs/open.c b/fs/open.c
+--- a/fs/open.c        Mon May  3 16:20:32 2004
++++ b/fs/open.c        Mon May  3 16:20:32 2004
+@@ -192,7 +192,9 @@
+       newattrs.ia_size = length;
+       newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+       down(&dentry->d_inode->i_sem);
++      down_write(&dentry->d_inode->i_alloc_sem);
+       err = notify_change(dentry, &newattrs);
++      up_write(&dentry->d_inode->i_alloc_sem);
+       up(&dentry->d_inode->i_sem);
+       return err;
+ }
+diff -Nru a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c
+--- a/fs/xfs/linux/xfs_aops.c  Mon May  3 16:20:32 2004
++++ b/fs/xfs/linux/xfs_aops.c  Mon May  3 16:20:32 2004
+@@ -1032,7 +1032,8 @@
+       if (error)
+               return -error;
+-      return blockdev_direct_IO(rw, iocb, inode, iomap.iomap_target->pbr_bdev,
++      return blockdev_direct_IO_no_locking(rw, iocb, inode,
++              iomap.iomap_target->pbr_bdev,
+               iov, offset, nr_segs,
+               linvfs_get_blocks_direct,
+               linvfs_unwritten_convert_direct);
+diff -Nru a/include/linux/fs.h b/include/linux/fs.h
+--- a/include/linux/fs.h       Mon May  3 16:20:32 2004
++++ b/include/linux/fs.h       Mon May  3 16:20:32 2004
+@@ -397,6 +397,7 @@
+       unsigned short          i_bytes;
+       spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
+       struct semaphore        i_sem;
++      struct rw_semaphore     i_alloc_sem;
+       struct inode_operations *i_op;
+       struct file_operations  *i_fop; /* former ->i_op->default_file_ops */
+       struct super_block      *i_sb;
+@@ -1235,6 +1236,7 @@
+ extern int filemap_fdatawrite(struct address_space *);
+ extern int filemap_flush(struct address_space *);
+ extern int filemap_fdatawait(struct address_space *);
++extern int filemap_write_and_wait(struct address_space *mapping);
+ extern void sync_supers(void);
+ extern void sync_filesystems(int wait);
+ extern void emergency_sync(void);
+@@ -1347,9 +1349,6 @@
+ file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
+ extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb,
+       const struct iovec *iov, loff_t offset, unsigned long nr_segs);
+-extern int blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 
+-      struct block_device *bdev, const struct iovec *iov, loff_t offset, 
+-      unsigned long nr_segs, get_blocks_t *get_blocks, dio_iodone_t *end_io);
+ extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, 
+       unsigned long nr_segs, loff_t *ppos);
+ ssize_t generic_file_writev(struct file *filp, const struct iovec *iov, 
+@@ -1369,6 +1368,32 @@
+                               ppos,
+                               desc,
+                               actor);
++}
++
++int __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
++      struct block_device *bdev, const struct iovec *iov, loff_t offset,
++      unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io,
++      int needs_special_locking);
++
++/*
++ * For filesystems which need locking between buffered and direct access
++ */
++static inline int blockdev_direct_IO(int rw, struct kiocb *iocb,
++      struct inode *inode, struct block_device *bdev, const struct iovec *iov,
++      loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks,
++      dio_iodone_t end_io)
++{
++      return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
++                              nr_segs, get_blocks, end_io, 1);
++}
++
++static inline int blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
++      struct inode *inode, struct block_device *bdev, const struct iovec *iov,
++      loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks,
++      dio_iodone_t end_io)
++{
++      return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
++                              nr_segs, get_blocks, end_io, 0);
+ }
+ extern struct file_operations generic_ro_fops;
+diff -Nru a/mm/filemap.c b/mm/filemap.c
+--- a/mm/filemap.c     Mon May  3 16:20:32 2004
++++ b/mm/filemap.c     Mon May  3 16:20:32 2004
+@@ -73,6 +73,9 @@
+  *  ->mmap_sem
+  *    ->i_sem                 (msync)
+  *
++ *  ->i_sem
++ *    ->i_alloc_sem             (various)
++ *
+  *  ->inode_lock
+  *    ->sb_lock                       (fs/fs-writeback.c)
+  *    ->mapping->page_lock    (__sync_single_inode)
+@@ -228,6 +231,18 @@
+ EXPORT_SYMBOL(filemap_fdatawait);
++int filemap_write_and_wait(struct address_space *mapping)
++{
++      int retval = 0;
++
++      if (mapping->nrpages) {
++              retval = filemap_fdatawrite(mapping);
++              if (retval == 0)
++                      retval = filemap_fdatawait(mapping);
++      }
++      return retval;
++}
++
+ /*
+  * This adds a page to the page cache, starting out as locked, unreferenced,
+  * not uptodate and with no errors.
+@@ -1716,6 +1731,7 @@
+ /*
+  * Write to a file through the page cache. 
++ * Called under i_sem for S_ISREG files.
+  *
+  * We put everything into the page cache prior to writing it. This is not a
+  * problem when writing full pages. With partial pages, however, we first have
+@@ -1806,12 +1822,19 @@
+               /*
+                * Sync the fs metadata but not the minor inode changes and
+                * of course not the data as we did direct DMA for the IO.
++               * i_sem is held, which protects generic_osync_inode() from
++               * livelocking.
+                */
+               if (written >= 0 && file->f_flags & O_SYNC)
+                       status = generic_osync_inode(inode, mapping, OSYNC_METADATA);
+               if (written >= 0 && !is_sync_kiocb(iocb))
+                       written = -EIOCBQUEUED;
+-              goto out_status;
++              if (written != -ENOTBLK)
++                      goto out_status;
++              /*
++               * direct-io write to a hole: fall through to buffered I/O
++               */
++              written = 0;
+       }
+       buf = iov->iov_base;
+@@ -1900,6 +1923,14 @@
+                                       OSYNC_METADATA|OSYNC_DATA);
+       }
+       
++      /*
++       * If we get here for O_DIRECT writes then we must have fallen through
++       * to buffered writes (block instantiation inside i_size).  So we sync
++       * the file data here, to try to honour O_DIRECT expectations.
++       */
++      if (unlikely(file->f_flags & O_DIRECT) && written)
++              status = filemap_write_and_wait(mapping);
++
+ out_status:   
+       err = written ? written : status;
+ out:
+@@ -1991,6 +2022,9 @@
+ EXPORT_SYMBOL(generic_file_writev);
++/*
++ * Called under i_sem for writes to S_ISREG files
++ */
+ ssize_t
+ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+       loff_t offset, unsigned long nr_segs)
+@@ -1999,18 +2033,13 @@
+       struct address_space *mapping = file->f_mapping;
+       ssize_t retval;
+-      if (mapping->nrpages) {
+-              retval = filemap_fdatawrite(mapping);
+-              if (retval == 0)
+-                      retval = filemap_fdatawait(mapping);
+-              if (retval)
+-                      goto out;
++      retval = filemap_write_and_wait(mapping);
++      if (retval == 0) {
++              retval = mapping->a_ops->direct_IO(rw, iocb, iov,
++                                              offset, nr_segs);
++              if (rw == WRITE && mapping->nrpages)
++                      invalidate_inode_pages2(mapping);
+       }
+-
+-      retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs);
+-      if (rw == WRITE && mapping->nrpages)
+-              invalidate_inode_pages2(mapping);
+-out:
+       return retval;
+ }
diff --git a/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch b/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch
new file mode 100644 (file)
index 0000000..9cdd51a
--- /dev/null
@@ -0,0 +1,31 @@
+ include/linux/fs.h |    1 +
+ mm/filemap.c       |    3 +++
+ 2 files changed, 4 insertions(+)
+
+Index: linux-2.6.4-30.1/include/linux/fs.h
+===================================================================
+--- linux-2.6.4-30.1.orig/include/linux/fs.h   2004-04-02 03:20:19.000000000 -0500
++++ linux-2.6.4-30.1/include/linux/fs.h        2004-04-02 03:20:19.000000000 -0500
+@@ -320,6 +320,7 @@
+       int (*releasepage) (struct page *, int);
+       int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
+                       loff_t offset, unsigned long nr_segs);
++      void (*removepage)(struct page *); /* called when page gets removed from the inode */
+ };
+ struct backing_dev_info;
+Index: linux-2.6.4-30.1/mm/filemap.c
+===================================================================
+--- linux-2.6.4-30.1.orig/mm/filemap.c 2004-04-02 03:19:42.000000000 -0500
++++ linux-2.6.4-30.1/mm/filemap.c      2004-04-02 03:23:10.000000000 -0500
+@@ -102,6 +102,9 @@
+ {
+       struct address_space *mapping = page->mapping;
++      if (mapping->a_ops->removepage)
++              mapping->a_ops->removepage(page);
++
+       radix_tree_delete(&mapping->page_tree, page->index);
+       list_del(&page->list);
+       page->mapping = NULL;
+
diff --git a/lustre/kernel_patches/series/vanilla-2.6.5 b/lustre/kernel_patches/series/vanilla-2.6.5
new file mode 100644 (file)
index 0000000..7a85e72
--- /dev/null
@@ -0,0 +1,14 @@
+lustre_version.patch
+o_direct-2.6.5.patch
+vfs_intent-2.6-suse.patch 
+vfs_nointent-2.6-suse.patch 
+vfs_races-2.6-suse.patch
+ext3-wantedi-misc-2.6-suse.patch 
+nfs-cifs-intent-vanilla-2.6.patch
+iopen-misc-2.6-suse.patch 
+export-truncate-2.6-suse.patch 
+export_symbols-2.6-suse.patch 
+removepage-vanilla-2.6.5.patch
+dev_read_only-2.6-suse.patch 
+export-2.6-suse.patch
+header-guards-2.6-suse.patch