Whamcloud - gitweb
Merge b1_5 from b1_4 (20060621_1641)
authorscjody <scjody>
Wed, 21 Jun 2006 21:22:47 +0000 (21:22 +0000)
committerscjody <scjody>
Wed, 21 Jun 2006 21:22:47 +0000 (21:22 +0000)
17 files changed:
ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series
lustre/ChangeLog
lustre/doc/lfs.1
lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch [new file with mode: 0644]
lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series
lustre/kernel_patches/series/ldiskfs-2.6-suse.series
lustre/liblustre/rw.c
lustre/mds/mds_fs.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_internal.h
lustre/obdfilter/filter_io_24.c
lustre/obdfilter/filter_io_26.c
lustre/osc/lproc_osc.c

diff --git a/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch b/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch
new file mode 100644 (file)
index 0000000..dca4676
--- /dev/null
@@ -0,0 +1,101 @@
+Index: linux-2.6.5-7.201/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/ext3/super.c     2006-06-20 19:40:44.000000000 +0400
++++ linux-2.6.5-7.201/fs/ext3/super.c  2006-06-20 19:42:08.000000000 +0400
+@@ -39,7 +39,7 @@
+ static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
+ static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+                              int);
+-static void ext3_commit_super (struct super_block * sb,
++void ext3_commit_super (struct super_block * sb,
+                              struct ext3_super_block * es,
+                              int sync);
+ static void ext3_mark_recovery_complete(struct super_block * sb,
+@@ -1781,7 +1781,7 @@ static int ext3_create_journal(struct su
+       return 0;
+ }
+-static void ext3_commit_super (struct super_block * sb,
++void ext3_commit_super (struct super_block * sb,
+                              struct ext3_super_block * es,
+                              int sync)
+ {
+Index: linux-2.6.5-7.201/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/ext3/namei.c     2006-06-20 19:40:44.000000000 +0400
++++ linux-2.6.5-7.201/fs/ext3/namei.c  2006-06-20 19:42:08.000000000 +0400
+@@ -1598,7 +1598,7 @@ static int ext3_delete_entry (handle_t *
+                             struct buffer_head * bh)
+ {
+       struct ext3_dir_entry_2 * de, * pde;
+-      int i;
++      int i, err;
+       i = 0;
+       pde = NULL;
+@@ -1608,7 +1608,9 @@ static int ext3_delete_entry (handle_t *
+                       return -EIO;
+               if (de == de_del)  {
+                       BUFFER_TRACE(bh, "get_write_access");
+-                      ext3_journal_get_write_access(handle, bh);
++                      err = ext3_journal_get_write_access(handle, bh);
++                      if (err)
++                              return err;
+                       if (pde)
+                               pde->rec_len =
+                                       cpu_to_le16(le16_to_cpu(pde->rec_len) +
+Index: linux-2.6.5-7.201/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/ext3/xattr.c     2006-06-20 19:40:44.000000000 +0400
++++ linux-2.6.5-7.201/fs/ext3/xattr.c  2006-06-20 19:42:30.000000000 +0400
+@@ -107,7 +107,7 @@ ext3_xattr_register(int name_index, stru
+ {
+       int error = -EINVAL;
+-      if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
++      if (name_index > 0 && name_index < EXT3_XATTR_INDEX_MAX) {
+               write_lock(&ext3_handler_lock);
+               if (!ext3_xattr_handlers[name_index-1]) {
+                       ext3_xattr_handlers[name_index-1] = handler;
+Index: linux-2.6.5-7.201/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/ext3/inode.c     2006-06-20 19:40:44.000000000 +0400
++++ linux-2.6.5-7.201/fs/ext3/inode.c  2006-06-20 19:42:08.000000000 +0400
+@@ -1517,9 +1517,14 @@ out_stop:
+                       if (end > inode->i_size) {
+                               ei->i_disksize = end;
+                               i_size_write(inode, end);
+-                              err = ext3_mark_inode_dirty(handle, inode);
+-                              if (!ret) 
+-                                      ret = err;
++                              /*
++                               * We're going to return a positive `ret'
++                               * here due to non-zero-length I/O, so there's
++                               * no way of reporting error returns from
++                               * ext3_mark_inode_dirty() to userspace.  So
++                               * ignore it.
++                               */
++                              ext3_mark_inode_dirty(handle, inode);
+                       }
+               }
+               err = ext3_journal_stop(handle);
+@@ -1811,8 +1816,18 @@ ext3_clear_blocks(handle_t *handle, stru
+               ext3_mark_inode_dirty(handle, inode);
+               ext3_journal_test_restart(handle, inode);
+               if (bh) {
++                      int err;
+                       BUFFER_TRACE(bh, "retaking write access");
+-                      ext3_journal_get_write_access(handle, bh);
++                      err = ext3_journal_get_write_access(handle, bh);
++                      if (err) {
++                              struct super_block *sb = inode->i_sb;
++                              struct ext3_super_block *es = EXT3_SB(sb)->s_es;
++                              printk (KERN_CRIT"EXT3-fs: can't continue truncate\n");
++                              EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
++                              es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
++                              ext3_commit_super(sb, es, 1);
++                              return;
++                      }
+               }
+       }
diff --git a/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch b/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch
new file mode 100644 (file)
index 0000000..df3d2ea
--- /dev/null
@@ -0,0 +1,101 @@
+Index: linux-2.6.9-full/fs/ext3/super.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/super.c      2006-06-02 23:37:51.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c   2006-06-02 23:56:29.000000000 +0400
+@@ -43,7 +43,7 @@ static int ext3_load_journal(struct supe
+                            unsigned long journal_devnum);
+ static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+                              int);
+-static void ext3_commit_super (struct super_block * sb,
++void ext3_commit_super (struct super_block * sb,
+                              struct ext3_super_block * es,
+                              int sync);
+ static void ext3_mark_recovery_complete(struct super_block * sb,
+@@ -1991,7 +1991,7 @@ static int ext3_create_journal(struct su
+       return 0;
+ }
+-static void ext3_commit_super (struct super_block * sb,
++void ext3_commit_super (struct super_block * sb,
+                              struct ext3_super_block * es,
+                              int sync)
+ {
+Index: linux-2.6.9-full/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/namei.c      2006-06-02 23:37:49.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/namei.c   2006-06-02 23:43:31.000000000 +0400
+@@ -1599,7 +1599,7 @@ static int ext3_delete_entry (handle_t *
+                             struct buffer_head * bh)
+ {
+       struct ext3_dir_entry_2 * de, * pde;
+-      int i;
++      int i, err;
+       i = 0;
+       pde = NULL;
+@@ -1609,7 +1609,9 @@ static int ext3_delete_entry (handle_t *
+                       return -EIO;
+               if (de == de_del)  {
+                       BUFFER_TRACE(bh, "get_write_access");
+-                      ext3_journal_get_write_access(handle, bh);
++                      err = ext3_journal_get_write_access(handle, bh);
++                      if (err)
++                              return err;
+                       if (pde)
+                               pde->rec_len =
+                                       cpu_to_le16(le16_to_cpu(pde->rec_len) +
+Index: linux-2.6.9-full/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/xattr.c      2006-06-01 14:58:48.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/xattr.c   2006-06-03 00:02:00.000000000 +0400
+@@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index)
+ {
+       struct xattr_handler *handler = NULL;
+-      if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX)
++      if (name_index > 0 && name_index < EXT3_XATTR_INDEX_MAX)
+               handler = ext3_xattr_handler_map[name_index];
+       return handler;
+ }
+Index: linux-2.6.9-full/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/inode.c      2006-06-02 23:37:38.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/inode.c   2006-06-03 00:27:41.000000000 +0400
+@@ -1513,9 +1513,14 @@ out_stop:
+                       if (end > inode->i_size) {
+                               ei->i_disksize = end;
+                               i_size_write(inode, end);
+-                              err = ext3_mark_inode_dirty(handle, inode);
+-                              if (!ret) 
+-                                      ret = err;
++                              /*
++                               * We're going to return a positive `ret'
++                               * here due to non-zero-length I/O, so there's
++                               * no way of reporting error returns from
++                               * ext3_mark_inode_dirty() to userspace.  So
++                               * ignore it.
++                               */
++                              ext3_mark_inode_dirty(handle, inode);
+                       }
+               }
+               err = ext3_journal_stop(handle);
+@@ -1807,8 +1812,18 @@ ext3_clear_blocks(handle_t *handle, stru
+               ext3_mark_inode_dirty(handle, inode);
+               ext3_journal_test_restart(handle, inode);
+               if (bh) {
++                      int err;
+                       BUFFER_TRACE(bh, "retaking write access");
+-                      ext3_journal_get_write_access(handle, bh);
++                      err = ext3_journal_get_write_access(handle, bh);
++                      if (err) {
++                              struct super_block *sb = inode->i_sb;
++                              struct ext3_super_block *es = EXT3_SB(sb)->s_es;
++                              printk (KERN_CRIT"EXT3-fs: can't continue truncate\n");
++                              EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
++                              es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
++                              ext3_commit_super(sb, es, 1);
++                              return;
++                      }
+               }
+       }
index ea1389d..7829040 100644 (file)
@@ -12,3 +12,4 @@ ext3-nlinks-2.6.9.patch
 ext3-ialloc-2.6.patch
 ext3-lookup-dotdot-2.6.9.patch
 ext3-sector_t-overflow-2.6.9-rhel4.patch
+ext3-check-jbd-errors-2.6.9.patch
index 8fbb715..f3be0ea 100644 (file)
@@ -14,3 +14,4 @@ ext3-htree-dot-2.6.5-suse.patch
 ext3-ialloc-2.6.patch
 ext3-lookup-dotdot-2.6.9.patch
 ext3-sector_t-overflow-2.6.5-suse.patch
+ext3-check-jbd-errors-2.6.5.patch
index 6a49dcb..95126ee 100644 (file)
@@ -378,6 +378,23 @@ Description: support up to 1024 configured devices on one node
 Details    : change obd_dev array from statically allocated to dynamically
             allocated structs as they are first used to reduce memory usage
 
+Severity   : minor
+Frequency  : rare
+Bugzilla   : 10437
+Description: Flush dirty partially truncated pages during truncate
+Details    : Immediatelly flush partially truncated pages in filter_setattr,
+            this way we completely avoid having any pages in page cache on OST
+            and can retire ugly workarounds during writes to flush such pages.
+
+Severity   : minor
+Frequency  : rare
+Bugzilla   : 10409
+Description: i_sem vs transaction deadlock in mds_obd_destroy during unlink.
+Details    : protect inode from truncation within vfs_unlink() context
+             just take a reference before calling vfs_unlink() and release it
+             when parent's i_sem is free.
+
+
 ------------------------------------------------------------------------------
 
 02-14-2006  Cluster File Systems, Inc. <info@clusterfs.com>
index 8cae6e6..959d7b5 100644 (file)
@@ -45,10 +45,10 @@ The various options supported by lctl are listed and explained below:
 To create a new file with a specific striping pattern
 .TP
 .B find 
-To search the directory tree rooted at the given dir/file name for the files that match the given parameters: \fB--atime\fR (file was last accessed N*24 hours ago), \fB--ctime\fR (file's status was last changed N*24 hours ago), \fB--mtime\fR (file's data was last modified N*24 hours ago), \fB--obd\fR (file has an object on a specific OST). The option \fB--maxdepth\fR allows find to decend at most N levels of directory tree. The options \fB--print\fR and \fB--print0\fR print full file name, followed by a newline and null character correspondingly.
+To search the directory tree rooted at the given dir/file name for the files that match the given parameters: \fB--atime\fR (file was last accessed N*24 hours ago), \fB--ctime\fR (file's status was last changed N*24 hours ago), \fB--mtime\fR (file's data was last modified N*24 hours ago), \fB--obd\fR (file has an object on a specific OST). The option \fB--maxdepth\fR allows find to decend at most N levels of directory tree. The options \fB--print\fR and \fB--print0\fR print full file name, followed by a newline and null character correspondingly.  Using one of these options works in the new (filename only) mode.
 .TP
 .B find
-To list the striping info for a given filename or files in a directory or recursively for all files in a directory tree use one of the following options: \fB[--quiet|-q] [--verbose|-v] [--recursive|-r]\fR. If one of these options is given find works in old (obsolete, please use getstripe instead) mode.
+To list the striping info for a given filename or files in a directory or recursively for all files in a directory tree use one of the following options: \fB[--quiet|-q] [--verbose|-v] [--recursive|-r]\fR. If one of these options is given find works in old (obsolete, please use getstripe instead) filename and striping mode.
 .TP
 .B getstripe 
 To list the striping info for given filename or files in a directory or recursively for all files in a directory tree. It can also be used to list the files that have objects on a specific OST.
@@ -78,7 +78,7 @@ Display the status of MDS or OSTs (as specified in the command) or all the serve
 List all the OSTs for the filesystem
 .TP
 .B df
-Report filesystem disk space usage or inodes usage of each MDS/OSD.
+Report filesystem disk space usage or inodes usage of each MDT/OST.
 .TP
 .B help 
 Provides brief help on the various arguments
@@ -87,48 +87,46 @@ Provides brief help on the various arguments
 Quit the interactive lfs session
 .SH EXAMPLES
 .TP
-.B $lfs setstripe /mnt/lustre/file1 131072 0 1
-This creats a file striped on one OST
+.B $ lfs setstripe /mnt/lustre/file1 131072 -1 2
+This creats a file striped on two OSTs with 128kB on each stripe.
 .TP
-.B $lfs find /mnt/lustre/file1
-Lists the extended attributes of a given file
+.B $ lfs find /mnt/lustre/file1
+Lists the object allocation of a given file
 .TP
-.B $lfs find /mnt/lustre/
-Lists the extended attributes of all files in a given directory
+.B $ lfs find /mnt/lustre/
+Lists the object allocationss of all files in a given directory
 .TP
-.B $lfs find -r /mnt/lustre/
-Recursively list the extended attributes of all files in a given directory tree
+.B $ lfs find -r /mnt/lustre/
+Recursively list the objects of all files in a given directory tree
 .TP
-.B $lfs find -r --obd OST2-UUID /mnt/lustre/
-List all the files that have objects on a specific OST
+.B $ lfs find -r --obd OST2-UUID /mnt/lustre/
+Recursively list all files in a given directory that have objects on OST2-UUID.
 .TP
-.B $lfs quotachown -i /mnt/lustre
+.B $ lfs quotachown -i /mnt/lustre
 Change file owner and group
 .TP
-.B $lfs quotacheck -ug /mnt/lustre
+.B $ lfs quotacheck -ug /mnt/lustre
 Quotacheck for user and group
 .TP
-.B $lfs quotaon -ug /mnt/lustre
+.B $ lfs quotaon -ug /mnt/lustre
 Turn quotas of user and group on
 .TP
-.B $lfs quotaoff -ug /mnt/lustre
+.B $ lfs quotaoff -ug /mnt/lustre
 Turn quotas of user and group off
 .TP
-.B $lfs setquota -u bob 0 1000000 0 10000 /mnt/lustre
+.B $ lfs setquota -u bob 0 1000000 0 10000 /mnt/lustre
 Set quotas of user `bob': 1GB block quota and 10,000 file quota
 .TP
-.B $lfs quota -u bob /mnt/lustre
+.B $ lfs quota -u bob /mnt/lustre
 List quotas of user `bob'
 .TP
-.B $ lfs find -r --obd OST2-UUID /mnt/lustre/
-.TP
-.B $lfs check servers 
-Check the status of all servers(mds, osts)
+.B $ lfs check servers 
+Check the status of all servers (MDT, OST)
 .TP
-.B $lfs osts
+.B $ lfs osts
 List all the OSTs
 .TP
-.B $lfs df -i 
-Lists inode consumpton per OST and MDS
+.B $ lfs df -i 
+Lists inode consumpton per OST and MDT
 .SH BUGS
 None are known.
diff --git a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch
new file mode 100644 (file)
index 0000000..dca4676
--- /dev/null
@@ -0,0 +1,101 @@
+Index: linux-2.6.5-7.201/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/ext3/super.c     2006-06-20 19:40:44.000000000 +0400
++++ linux-2.6.5-7.201/fs/ext3/super.c  2006-06-20 19:42:08.000000000 +0400
+@@ -39,7 +39,7 @@
+ static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
+ static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+                              int);
+-static void ext3_commit_super (struct super_block * sb,
++void ext3_commit_super (struct super_block * sb,
+                              struct ext3_super_block * es,
+                              int sync);
+ static void ext3_mark_recovery_complete(struct super_block * sb,
+@@ -1781,7 +1781,7 @@ static int ext3_create_journal(struct su
+       return 0;
+ }
+-static void ext3_commit_super (struct super_block * sb,
++void ext3_commit_super (struct super_block * sb,
+                              struct ext3_super_block * es,
+                              int sync)
+ {
+Index: linux-2.6.5-7.201/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/ext3/namei.c     2006-06-20 19:40:44.000000000 +0400
++++ linux-2.6.5-7.201/fs/ext3/namei.c  2006-06-20 19:42:08.000000000 +0400
+@@ -1598,7 +1598,7 @@ static int ext3_delete_entry (handle_t *
+                             struct buffer_head * bh)
+ {
+       struct ext3_dir_entry_2 * de, * pde;
+-      int i;
++      int i, err;
+       i = 0;
+       pde = NULL;
+@@ -1608,7 +1608,9 @@ static int ext3_delete_entry (handle_t *
+                       return -EIO;
+               if (de == de_del)  {
+                       BUFFER_TRACE(bh, "get_write_access");
+-                      ext3_journal_get_write_access(handle, bh);
++                      err = ext3_journal_get_write_access(handle, bh);
++                      if (err)
++                              return err;
+                       if (pde)
+                               pde->rec_len =
+                                       cpu_to_le16(le16_to_cpu(pde->rec_len) +
+Index: linux-2.6.5-7.201/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/ext3/xattr.c     2006-06-20 19:40:44.000000000 +0400
++++ linux-2.6.5-7.201/fs/ext3/xattr.c  2006-06-20 19:42:30.000000000 +0400
+@@ -107,7 +107,7 @@ ext3_xattr_register(int name_index, stru
+ {
+       int error = -EINVAL;
+-      if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
++      if (name_index > 0 && name_index < EXT3_XATTR_INDEX_MAX) {
+               write_lock(&ext3_handler_lock);
+               if (!ext3_xattr_handlers[name_index-1]) {
+                       ext3_xattr_handlers[name_index-1] = handler;
+Index: linux-2.6.5-7.201/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/ext3/inode.c     2006-06-20 19:40:44.000000000 +0400
++++ linux-2.6.5-7.201/fs/ext3/inode.c  2006-06-20 19:42:08.000000000 +0400
+@@ -1517,9 +1517,14 @@ out_stop:
+                       if (end > inode->i_size) {
+                               ei->i_disksize = end;
+                               i_size_write(inode, end);
+-                              err = ext3_mark_inode_dirty(handle, inode);
+-                              if (!ret) 
+-                                      ret = err;
++                              /*
++                               * We're going to return a positive `ret'
++                               * here due to non-zero-length I/O, so there's
++                               * no way of reporting error returns from
++                               * ext3_mark_inode_dirty() to userspace.  So
++                               * ignore it.
++                               */
++                              ext3_mark_inode_dirty(handle, inode);
+                       }
+               }
+               err = ext3_journal_stop(handle);
+@@ -1811,8 +1816,18 @@ ext3_clear_blocks(handle_t *handle, stru
+               ext3_mark_inode_dirty(handle, inode);
+               ext3_journal_test_restart(handle, inode);
+               if (bh) {
++                      int err;
+                       BUFFER_TRACE(bh, "retaking write access");
+-                      ext3_journal_get_write_access(handle, bh);
++                      err = ext3_journal_get_write_access(handle, bh);
++                      if (err) {
++                              struct super_block *sb = inode->i_sb;
++                              struct ext3_super_block *es = EXT3_SB(sb)->s_es;
++                              printk (KERN_CRIT"EXT3-fs: can't continue truncate\n");
++                              EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
++                              es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
++                              ext3_commit_super(sb, es, 1);
++                              return;
++                      }
+               }
+       }
diff --git a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch
new file mode 100644 (file)
index 0000000..df3d2ea
--- /dev/null
@@ -0,0 +1,101 @@
+Index: linux-2.6.9-full/fs/ext3/super.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/super.c      2006-06-02 23:37:51.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c   2006-06-02 23:56:29.000000000 +0400
+@@ -43,7 +43,7 @@ static int ext3_load_journal(struct supe
+                            unsigned long journal_devnum);
+ static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+                              int);
+-static void ext3_commit_super (struct super_block * sb,
++void ext3_commit_super (struct super_block * sb,
+                              struct ext3_super_block * es,
+                              int sync);
+ static void ext3_mark_recovery_complete(struct super_block * sb,
+@@ -1991,7 +1991,7 @@ static int ext3_create_journal(struct su
+       return 0;
+ }
+-static void ext3_commit_super (struct super_block * sb,
++void ext3_commit_super (struct super_block * sb,
+                              struct ext3_super_block * es,
+                              int sync)
+ {
+Index: linux-2.6.9-full/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/namei.c      2006-06-02 23:37:49.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/namei.c   2006-06-02 23:43:31.000000000 +0400
+@@ -1599,7 +1599,7 @@ static int ext3_delete_entry (handle_t *
+                             struct buffer_head * bh)
+ {
+       struct ext3_dir_entry_2 * de, * pde;
+-      int i;
++      int i, err;
+       i = 0;
+       pde = NULL;
+@@ -1609,7 +1609,9 @@ static int ext3_delete_entry (handle_t *
+                       return -EIO;
+               if (de == de_del)  {
+                       BUFFER_TRACE(bh, "get_write_access");
+-                      ext3_journal_get_write_access(handle, bh);
++                      err = ext3_journal_get_write_access(handle, bh);
++                      if (err)
++                              return err;
+                       if (pde)
+                               pde->rec_len =
+                                       cpu_to_le16(le16_to_cpu(pde->rec_len) +
+Index: linux-2.6.9-full/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/xattr.c      2006-06-01 14:58:48.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/xattr.c   2006-06-03 00:02:00.000000000 +0400
+@@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index)
+ {
+       struct xattr_handler *handler = NULL;
+-      if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX)
++      if (name_index > 0 && name_index < EXT3_XATTR_INDEX_MAX)
+               handler = ext3_xattr_handler_map[name_index];
+       return handler;
+ }
+Index: linux-2.6.9-full/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/inode.c      2006-06-02 23:37:38.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/inode.c   2006-06-03 00:27:41.000000000 +0400
+@@ -1513,9 +1513,14 @@ out_stop:
+                       if (end > inode->i_size) {
+                               ei->i_disksize = end;
+                               i_size_write(inode, end);
+-                              err = ext3_mark_inode_dirty(handle, inode);
+-                              if (!ret) 
+-                                      ret = err;
++                              /*
++                               * We're going to return a positive `ret'
++                               * here due to non-zero-length I/O, so there's
++                               * no way of reporting error returns from
++                               * ext3_mark_inode_dirty() to userspace.  So
++                               * ignore it.
++                               */
++                              ext3_mark_inode_dirty(handle, inode);
+                       }
+               }
+               err = ext3_journal_stop(handle);
+@@ -1807,8 +1812,18 @@ ext3_clear_blocks(handle_t *handle, stru
+               ext3_mark_inode_dirty(handle, inode);
+               ext3_journal_test_restart(handle, inode);
+               if (bh) {
++                      int err;
+                       BUFFER_TRACE(bh, "retaking write access");
+-                      ext3_journal_get_write_access(handle, bh);
++                      err = ext3_journal_get_write_access(handle, bh);
++                      if (err) {
++                              struct super_block *sb = inode->i_sb;
++                              struct ext3_super_block *es = EXT3_SB(sb)->s_es;
++                              printk (KERN_CRIT"EXT3-fs: can't continue truncate\n");
++                              EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
++                              es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
++                              ext3_commit_super(sb, es, 1);
++                              return;
++                      }
+               }
+       }
index ea1389d..7829040 100644 (file)
@@ -12,3 +12,4 @@ ext3-nlinks-2.6.9.patch
 ext3-ialloc-2.6.patch
 ext3-lookup-dotdot-2.6.9.patch
 ext3-sector_t-overflow-2.6.9-rhel4.patch
+ext3-check-jbd-errors-2.6.9.patch
index 8fbb715..f3be0ea 100644 (file)
@@ -14,3 +14,4 @@ ext3-htree-dot-2.6.5-suse.patch
 ext3-ialloc-2.6.patch
 ext3-lookup-dotdot-2.6.9.patch
 ext3-sector_t-overflow-2.6.5-suse.patch
+ext3-check-jbd-errors-2.6.5.patch
index 85de234..a2bf615 100644 (file)
@@ -377,6 +377,18 @@ static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
         EXIT;
 }
 
+static void llu_ap_update_obdo(void *data, int cmd, struct obdo *oa,
+                               obd_valid valid)
+{
+        struct ll_async_page *llap;
+        ENTRY;
+
+        llap = LLAP_FROM_COOKIE(data);
+        obdo_from_inode(oa, llap->llap_inode, valid);
+
+        EXIT;
+}
+
 /* called for each page in a completed rpc.*/
 static int llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
 {
@@ -400,6 +412,7 @@ static struct obd_async_page_ops llu_async_page_ops = {
         .ap_make_ready =        NULL,
         .ap_refresh_count =     NULL,
         .ap_fill_obdo =         llu_ap_fill_obdo,
+        .ap_update_obdo =       llu_ap_update_obdo,
         .ap_completion =        llu_ap_completion,
 };
 
index b3215c4..e217d1e 100644 (file)
@@ -729,6 +729,7 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
         struct lvfs_run_ctxt saved;
         struct lvfs_ucred ucred = { 0 };
         char fidname[LL_FID_NAMELEN];
+        struct inode *inode = NULL;
         struct dentry *de;
         void *handle;
         int err, namelen, rc = 0;
@@ -762,6 +763,10 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
         if (IS_ERR(handle))
                 GOTO(out_dput, rc = PTR_ERR(handle));
 
+        /* take a reference to protect inode from truncation within
+           vfs_unlink() context. bug 10409 */
+        inode = de->d_inode;
+        atomic_inc(&inode->i_count);
         rc = vfs_unlink(mds->mds_objects_dir->d_inode, de);
         if (rc)
                 CERROR("error destroying object "LPU64":%u: rc %d\n",
@@ -775,6 +780,9 @@ out_dput:
                 l_dput(de);
         UNLOCK_INODE_MUTEX(parent_inode);
 
+        if (inode)
+                iput(inode);
+
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &ucred);
         RETURN(rc);
 }
index dfa6300..a6564b2 100644 (file)
@@ -2396,11 +2396,6 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
                 }
         }
 
-        if (locked) {
-                UNLOCK_INODE_MUTEX(inode);
-                locked = 0;
-        }
-
         rc = filter_finish_transno(exp, oti, rc);
 
         err = fsfilt_commit(exp->exp_obd, inode, handle, 0);
@@ -2409,6 +2404,19 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
                 if (!rc)
                         rc = err;
         }
+
+        if (locked) {
+                /* Let's flush truncated page on disk immediately, then we can
+                 * avoid need to search for page aliases before directio writes
+                 * and this sort of stuff at expense of somewhat slower
+                 * truncates not on a page boundary. I believe this is the only
+                 * place in filter code that can lead to pages getting to
+                 * pagecache so far. */
+                filter_clear_truncated_page(inode);
+                UNLOCK_INODE_MUTEX(inode);
+                locked = 0;
+        }
+
         EXIT;
 out_unlock:
         if (locked)
index 83faf9c..1edbff6 100644 (file)
@@ -152,6 +152,7 @@ void filter_iobuf_put(struct filter_obd *filter, struct filter_iobuf *iobuf,
 int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf,
                      struct obd_export *exp, struct iattr *attr,
                      struct obd_trans_info *oti, void **wait_handle);
+int filter_clear_truncated_page(struct inode *inode);
 
 /* filter_log.c */
 struct ost_filterdata {
index 0e1a84e..b69a458 100644 (file)
@@ -109,13 +109,9 @@ static void dump_page(int rw, unsigned long block, struct page *page)
  * free the buffers and drop the page from cache.  The buffers should not
  * be dirty, because we already called fdatasync/fdatawait on them.
  */
-static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
+static int filter_sync_inode_data(struct inode *inode)
 {
-        struct page *page;
-        int i, rc, rc2;
-
-        check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages,
-                          inode->i_dev, 1 << inode->i_blkbits);
+        int rc, rc2;
 
         /* This is nearly generic_osync_inode, without the waiting on the inode
         rc = generic_osync_inode(inode, inode->i_mapping,
@@ -128,6 +124,19 @@ static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
         rc2 = filemap_fdatawait(inode->i_mapping);
         if (rc == 0)
                 rc = rc2;
+
+        return rc;
+}
+
+static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
+{
+        struct page *page;
+        int i, rc;
+
+        check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages,
+                          inode->i_dev, 1 << inode->i_blkbits);
+
+        rc = filter_sync_inode_data(inode);
         if (rc != 0)
                 RETURN(rc);
 
@@ -138,6 +147,39 @@ static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
                                       iobuf->maplist[i]->index);
                 if (page == NULL)
                         continue;
+                if (page->mapping != NULL) {
+                        /* Now that the only source of such pages in truncate
+                         * path flushes these pages to disk and and then
+                         * discards, this is error condition */
+                        CERROR("Data page in page cache during write!\n");
+                        ll_truncate_complete_page(page);
+                }
+
+                unlock_page(page);
+                page_cache_release(page);
+        }
+
+        return 0;
+}
+
+int filter_clear_truncated_page(struct inode *inode)
+{
+        struct page *page;
+        int rc;
+
+        /* Truncate on page boundary, so nothing to flush? */
+        if (!(inode->i_size & (PAGE_CACHE_SIZE-1)))
+                return 0;
+
+        rc = filter_sync_inode_data(inode);
+        if (rc != 0)
+                RETURN(rc);
+
+        /* be careful to call this after fsync_inode_data_buffers has waited
+         * for IO to complete before we evict it from the cache */
+        page = find_lock_page(inode->i_mapping,
+                              inode->i_size >> PAGE_CACHE_SHIFT);
+        if (page) {
                 if (page->mapping != NULL)
                         ll_truncate_complete_page(page);
 
index c3c2585..22d1f1b 100644 (file)
@@ -380,17 +380,16 @@ int filter_do_bio(struct obd_device *obd, struct inode *inode,
  * to free the buffers and drop the page from cache.  The buffers should
  * not be dirty, because we already called fdatasync/fdatawait on them.
  */
-static int filter_clear_page_cache(struct inode *inode,
-                                   struct filter_iobuf *iobuf)
+static int filter_sync_inode_data(struct inode *inode, int locked)
 {
-        struct page *page;
-        int i, rc = 0;
+        int rc = 0;
 
         /* This is nearly do_fsync(), without the waiting on the inode */
         /* XXX: in 2.6.16 (at least) we don't need to hold i_mutex over
          * filemap_fdatawrite() and filemap_fdatawait(), so we may no longer
          * need this lock here at all. */
-        LOCK_INODE_MUTEX(inode);
+        if (!locked)
+                LOCK_INODE_MUTEX(inode);
         if (inode->i_mapping->nrpages) {
                 current->flags |= PF_SYNCWRITE;
                 rc = filemap_fdatawrite(inode->i_mapping);
@@ -398,7 +397,19 @@ static int filter_clear_page_cache(struct inode *inode,
                         rc = filemap_fdatawait(inode->i_mapping);
                 current->flags &= ~PF_SYNCWRITE;
         }
-        UNLOCK_INODE_MUTEX(inode);
+        if (!locked)
+                UNLOCK_INODE_MUTEX(inode);
+
+        return rc;
+}
+
+static int filter_clear_page_cache(struct inode *inode,
+                                   struct filter_iobuf *iobuf)
+{
+        struct page *page;
+        int i, rc;
+
+        rc = filter_sync_inode_data(inode, 0);
         if (rc != 0)
                 RETURN(rc);
 
@@ -410,6 +421,10 @@ static int filter_clear_page_cache(struct inode *inode,
                 if (page == NULL)
                         continue;
                 if (page->mapping != NULL) {
+                        /* Now that the only source of such pages in truncate
+                         * path flushes these pages to disk and and then
+                         * discards, this is error condition */
+                        CERROR("Data page in page cache during write!\n");
                         wait_on_page_writeback(page);
                         ll_truncate_complete_page(page);
                 }
@@ -421,6 +436,35 @@ static int filter_clear_page_cache(struct inode *inode,
         return 0;
 }
 
+int filter_clear_truncated_page(struct inode *inode)
+{
+        struct page *page;
+        int rc;
+
+        /* Truncate on page boundary, so nothing to flush? */
+        if (!(inode->i_size & (PAGE_CACHE_SIZE-1)))
+                return 0;
+
+        rc = filter_sync_inode_data(inode, 1);
+        if (rc != 0)
+                RETURN(rc);
+
+        /* be careful to call this after fsync_inode_data_buffers has waited
+         * for IO to complete before we evict it from the cache */
+        page = find_lock_page(inode->i_mapping,
+                              inode->i_size >> PAGE_CACHE_SHIFT);
+        if (page) {
+                if (page->mapping != NULL) {
+                        wait_on_page_writeback(page);
+                        ll_truncate_complete_page(page);
+                }
+                unlock_page(page);
+                page_cache_release(page);
+        }
+
+        return 0;
+}
+
 /* Must be called with i_mutex taken for writes; this will drop it */
 int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf,
                      struct obd_export *exp, struct iattr *attr,
index 0df7dfe..25c201f 100644 (file)
@@ -116,7 +116,6 @@ static int osc_rd_max_dirty_mb(char *page, char **start, off_t off, int count,
 
         client_obd_list_lock(&cli->cl_loi_list_lock);
         val = cli->cl_dirty_max;
-        spin_unlock(&cli->cl_loi_list_lock);
         client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         mult = 1 << 20;