Whamcloud - gitweb
LU-264 Integrate upstream ext4 MMP chnages
authorNiu Yawei <niu@whamcloud.com>
Fri, 27 May 2011 14:27:20 +0000 (07:27 -0700)
committerOleg Drokin <green@whamcloud.com>
Mon, 6 Jun 2011 17:32:46 +0000 (10:32 -0700)
Integrate the changes made to the MMP patch when it was submitted upstream
for the 2.6.40 kernel.  Mostly cosmetic changes were done (improved comments, etc),
along with moving the MMP functionality into a separate mmp.c file in order to
avoid increasing the size of super.c too much.

Signed-off-by: Niu Yawei <niu@whamcloud.com>
Change-Id: I6909f3e44b077d65d653994ed1ac3ee11fcacb7f
Reviewed-on: http://review.whamcloud.com/865
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
ldiskfs/kernel_patches/patches/ext4-dynlocks-2.6-rhel5.patch
ldiskfs/kernel_patches/patches/ext4-dynlocks-common-rhel6.patch
ldiskfs/kernel_patches/patches/ext4-mmp-rhel5.patch
ldiskfs/kernel_patches/patches/ext4-mmp-rhel6.patch
ldiskfs/ldiskfs/Makefile.in

index d39b4f9..cecbbb1 100644 (file)
@@ -1,20 +1,21 @@
-diff -rupN linux-2.6.27.21-0.1_1//fs/ext4/Makefile linux-2.6.27.21-0.1_2//fs/ext4/Makefile
---- linux-2.6.27.21-0.1_1//fs/ext4/Makefile    2009-08-21 15:12:51.000000000 +0530
-+++ linux-2.6.27.21-0.1_2//fs/ext4/Makefile    2009-08-21 15:13:23.000000000 +0530
-@@ -6,7 +6,8 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
+Index: linux-stage/fs/ext4/Makefile
+===================================================================
+--- linux-stage.orig/fs/ext4/Makefile
++++ linux-stage/fs/ext4/Makefile
+@@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
+ ext4-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+               ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
+               ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
+-              mmp.o
++              mmp.o dynlocks.o
  
- ext4-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-               ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
--              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
-+              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
-+              dynlocks.o
-
  ext4-$(CONFIG_EXT4_FS_XATTR)          += xattr.o xattr_user.o xattr_trusted.o
  ext4-$(CONFIG_EXT4_FS_POSIX_ACL)      += acl.o
-diff -rupN linux-2.6.27.21-0.1_1//fs/ext4/super.c linux-2.6.27.21-0.1_2//fs/ext4/super.c
---- linux-2.6.27.21-0.1_1//fs/ext4/super.c     2009-08-21 15:12:51.000000000 +0530
-+++ linux-2.6.27.21-0.1_2//fs/ext4/super.c     2009-08-21 15:18:18.000000000 +0530
-@@ -4126,6 +4126,7 @@ static int __init init_ext4_fs(void)
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -4159,6 +4159,7 @@ static int __init init_ext4_fs(void)
        err = init_inodecache();
        if (err)
                goto out1;
@@ -22,12 +23,11 @@ diff -rupN linux-2.6.27.21-0.1_1//fs/ext4/super.c linux-2.6.27.21-0.1_2//fs/ext4
        err = register_filesystem(&ext4_fs_type);
        if (err)
                goto out;
-@@ -4149,6 +4150,7 @@ static void __exit exit_ext4_fs(void)
-       unregister_filesystem(&ext4_fs_type);
+@@ -4195,6 +4196,7 @@ static void __exit exit_ext4_fs(void)
        unregister_filesystem(&ext4dev_fs_type);
+ #endif
        destroy_inodecache();
 +      dynlock_cache_exit();
        exit_ext4_xattr();
        exit_ext4_mballoc();
-       remove_proc_entry("fs/ext4", NULL);
-
+       __free_page(ext4_zero_page);
index 0a66c86..17ed593 100644 (file)
@@ -1,7 +1,7 @@
 Index: linux-stage/fs/ext4/dynlocks.c
 ===================================================================
---- /dev/null  1970-01-01 00:00:00.000000000 +0000
-+++ linux-stage/fs/ext4/dynlocks.c     2011-03-03 15:25:04.025526781 +0800
+--- /dev/null
++++ linux-stage/fs/ext4/dynlocks.c
 @@ -0,0 +1,236 @@
 +/*
 + * Dynamic Locks
@@ -241,8 +241,8 @@ Index: linux-stage/fs/ext4/dynlocks.c
 +EXPORT_SYMBOL(dynlock_is_locked);
 Index: linux-stage/include/linux/dynlocks.h
 ===================================================================
---- /dev/null  1970-01-01 00:00:00.000000000 +0000
-+++ linux-stage/include/linux/dynlocks.h       2011-03-03 15:25:04.055526552 +0800
+--- /dev/null
++++ linux-stage/include/linux/dynlocks.h
 @@ -0,0 +1,34 @@
 +#ifndef _LINUX_DYNLOCKS_H
 +#define _LINUX_DYNLOCKS_H
@@ -280,23 +280,22 @@ Index: linux-stage/include/linux/dynlocks.h
 +
 Index: linux-stage/fs/ext4/Makefile
 ===================================================================
---- linux-stage.orig/fs/ext4/Makefile  2011-03-05 11:50:43.000000000 +0800
-+++ linux-stage/fs/ext4/Makefile       2011-03-05 11:52:42.349154982 +0800
-@@ -6,7 +6,8 @@
- ext4-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+--- linux-stage.orig/fs/ext4/Makefile
++++ linux-stage/fs/ext4/Makefile
+@@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
+ ext4-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
                ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
--              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
-+              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
-+              dynlocks.o
+               ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
+-              mmp.o
++              mmp.o dynlocks.o
  
  ext4-$(CONFIG_EXT4_FS_XATTR)          += xattr.o xattr_user.o xattr_trusted.o
  ext4-$(CONFIG_EXT4_FS_POSIX_ACL)      += acl.o
 Index: linux-stage/fs/ext4/super.c
 ===================================================================
---- linux-stage.orig/fs/ext4/super.c   2011-03-05 11:50:43.000000000 +0800
-+++ linux-stage/fs/ext4/super.c        2011-03-05 11:57:33.632869451 +0800
-@@ -4457,17 +4457,20 @@
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -4125,32 +4125,37 @@ static int __init init_ext4_fs(void)
                return err;
        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
        if (!ext4_kset)
@@ -320,8 +319,7 @@ Index: linux-stage/fs/ext4/super.c
                goto out1;
        err = register_filesystem(&ext4_fs_type);
        if (err)
-@@ -4477,15 +4480,17 @@
+               goto out;
        return 0;
  out:
 -      destroy_inodecache();
@@ -342,7 +340,7 @@ Index: linux-stage/fs/ext4/super.c
        exit_ext4_system_zone();
        return err;
  }
-@@ -4493,6 +4498,7 @@
+@@ -4158,6 +4163,7 @@ out4:
  static void __exit exit_ext4_fs(void)
  {
        unregister_filesystem(&ext4_fs_type);
index 5997d62..352549f 100644 (file)
-Index: linux-stage/fs/ext4/super.c
+Prevent an ext4 filesystem from being mounted multiple times.
+A sequence number is stored on disk and is periodically updated (every 5
+seconds by default) by a mounted filesystem.
+At mount time, we now wait for s_mmp_update_interval seconds to make sure
+that the MMP sequence does not change.
+In case of failure, the nodename, bdevname and the time at which the MMP
+block was last updated is displayed.
+Move all mmp code to a dedicated file (mmp.c).
+
+Signed-off-by: Andreas Dilger <adilger <at> whamcloud.com>
+Signed-off-by: Johann Lombardi <johann <at> whamcloud.com>
+---
+ fs/ext4/Makefile |    3 +-
+ fs/ext4/ext4.h   |   76 ++++++++++++-
+ fs/ext4/mmp.c    |  351 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/ext4/super.c  |   18 +++-
+ 4 files changed, 444 insertions(+), 4 deletions(-)
+ create mode 100644 fs/ext4/mmp.c
+
+Index: linux-stage/fs/ext4/Makefile
 ===================================================================
---- linux-stage.orig/fs/ext4/super.c
-+++ linux-stage/fs/ext4/super.c
-@@ -40,6 +40,8 @@
- #include <linux/log2.h>
- #include <linux/crc16.h>
- #include <asm/uaccess.h>
-+#include <linux/kthread.h>
-+#include <linux/utsname.h>
+--- linux-stage.orig/fs/ext4/Makefile
++++ linux-stage/fs/ext4/Makefile
+@@ -6,7 +6,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
  
- #include "ext4.h"
- #include "ext4_jbd2.h"
-@@ -660,6 +662,8 @@ static void ext4_put_super(struct super_
-               invalidate_bdev(sbi->journal_bdev, 0);
-               ext4_blkdev_remove(sbi);
-       }
-+      if (sbi->s_mmp_tsk)
-+              kthread_stop(sbi->s_mmp_tsk);
-       sb->s_fs_info = NULL;
-       /*
-        * Now that we are completely done shutting down the
-@@ -921,6 +925,354 @@ static int ext4_show_options(struct seq_
-       return 0;
+ ext4-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+               ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
+-              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
++              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
++              mmp.o
+ ext4-$(CONFIG_EXT4_FS_XATTR)          += xattr.o xattr_user.o xattr_trusted.o
+ ext4-$(CONFIG_EXT4_FS_POSIX_ACL)      += acl.o
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -878,7 +878,7 @@ struct ext4_super_block {
+       __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
+       __le32  s_flags;                /* Miscellaneous flags */
+       __le16  s_raid_stride;          /* RAID stride */
+-      __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
++      __le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
+       __le64  s_mmp_block;            /* Block for multi-mount protection */
+       __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
+       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
+@@ -1032,6 +1032,9 @@ struct ext4_sb_info {
+       /* workqueue for dio unwritten */
+       struct workqueue_struct *dio_unwritten_wq;
++
++      /* Kernel thread for multiple mount protection */
++      struct task_struct *s_mmp_tsk;
+ };
+ static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
+@@ -1169,7 +1172,8 @@ static inline void ext4_clear_inode_stat
+                                        EXT4_FEATURE_INCOMPAT_META_BG| \
+                                        EXT4_FEATURE_INCOMPAT_EXTENTS| \
+                                        EXT4_FEATURE_INCOMPAT_64BIT| \
+-                                       EXT4_FEATURE_INCOMPAT_FLEX_BG)
++                                       EXT4_FEATURE_INCOMPAT_FLEX_BG| \
++                                       EXT4_FEATURE_INCOMPAT_MMP)
+ #define EXT4_FEATURE_RO_COMPAT_SUPP   (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
+@@ -1376,6 +1380,67 @@ void ext4_get_group_no_and_offset(struct
+ extern struct proc_dir_entry *ext4_proc_root;
+ /*
++ * This structure will be used for multiple mount protection. It will be
++ * written into the block number saved in the s_mmp_block field in the
++ * superblock. Programs that check MMP should assume that if
++ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
++ * to use the filesystem, regardless of how old the timestamp is.
++ */
++#define EXT4_MMP_MAGIC     0x004D4D50U /* ASCII for MMP */
++#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
++#define EXT4_MMP_SEQ_FSCK  0xE24D4D50U /* mmp_seq value when being fscked */
++#define EXT4_MMP_SEQ_MAX   0xE24D4D4FU /* maximum valid mmp_seq value */
++
++struct mmp_struct {
++       __le32  mmp_magic;              /* Magic number for MMP */
++       __le32  mmp_seq;                /* Sequence no. updated periodically */
++
++       /*
++        * mmp_time, mmp_nodename & mmp_bdevname are only used for information
++        * purposes and do not affect the correctness of the algorithm
++        */
++       __le64  mmp_time;               /* Time last updated */
++       char    mmp_nodename[64];       /* Node which last updated MMP block */
++       char    mmp_bdevname[32];       /* Bdev which last updated MMP block */
++
++       /*
++        * mmp_check_interval is used to verify if the MMP block has been
++        * updated on the block device. The value is updated based on the
++        * maximum time to write the MMP block during an update cycle.
++        */
++       __le16  mmp_check_interval;
++
++       __le16  mmp_pad1;
++       __le32  mmp_pad2[227];
++};
++
++/* arguments passed to the mmp thread */
++struct mmpd_data {
++       struct buffer_head *bh; /* bh from initial read_mmp_block() */
++       struct super_block *sb;  /* super block of the fs */
++};
++
++/*
++ * Check interval multiplier
++ * The MMP block is written every update interval and initially checked every
++ * update interval x the multiplier (the value is then adapted based on the
++ * write latency). The reason is that writes can be delayed under load and we
++ * don't want readers to incorrectly assume that the filesystem is no longer
++ * in use.
++ */
++#define EXT4_MMP_CHECK_MULT            2UL
++
++/*
++ * Minimum interval for MMP checking in seconds.
++ */
++#define EXT4_MMP_MIN_CHECK_INTERVAL    5UL
++
++/*
++ * Maximum interval for MMP checking in seconds.
++ */
++#define EXT4_MMP_MAX_CHECK_INTERVAL    300UL
++
++/*
+  * Function prototypes
+  */
+@@ -1547,6 +1612,10 @@ extern void __ext4_warning(struct super_
+ #define ext4_warning(sb, message...)   __ext4_warning(sb, __func__, ## message)
+ extern void ext4_msg(struct super_block *, const char *, const char *, ...)
+       __attribute__ ((format (printf, 3, 4)));
++extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
++                         const char *, const char *);
++#define dump_mmp_msg(sb, mmp, msg)     __dump_mmp_msg(sb, mmp, __func__, msg)
++
+ extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
+                               const char *, const char *, ...)
+       __attribute__ ((format (printf, 4, 5)));
+@@ -1784,6 +1853,9 @@ static inline void ext4_unlock_group(str
+       spin_unlock(ext4_group_lock_ptr(sb, group));
  }
  
++/* mmp.c */
++extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
++
+ /*
+  * Inodes and files operations
+  */
+Index: linux-stage/fs/ext4/mmp.c
+===================================================================
+--- /dev/null
++++ linux-stage/fs/ext4/mmp.c
+@@ -0,0 +1,351 @@
++#include <linux/fs.h>
++#include <linux/random.h>
++#include <linux/buffer_head.h>
++#include <linux/utsname.h>
++#include <linux/kthread.h>
++
++#include "ext4.h"
++
 +/*
 + * Write the MMP block using WRITE_SYNC to try to get the block on-disk
 + * faster.
 + */
 +static int write_mmp_block(struct buffer_head *bh)
 +{
-+      mark_buffer_dirty(bh);
-+      lock_buffer(bh);
-+      bh->b_end_io = end_buffer_write_sync;
-+      get_bh(bh);
-+      submit_bh(WRITE_SYNC, bh);
-+      wait_on_buffer(bh);
-+      if (unlikely(!buffer_uptodate(bh)))
-+              return 1;
-+
-+      return 0;
++       mark_buffer_dirty(bh);
++       lock_buffer(bh);
++       bh->b_end_io = end_buffer_write_sync;
++       get_bh(bh);
++       submit_bh(WRITE_SYNC, bh);
++       wait_on_buffer(bh);
++       if (unlikely(!buffer_uptodate(bh)))
++               return 1;
++
++       return 0;
 +}
 +
 +/*
@@ -47,56 +189,54 @@ Index: linux-stage/fs/ext4/super.c
 + * uptodate flag on the buffer.
 + */
 +static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
-+                        unsigned long mmp_block)
++                         ext4_fsblk_t mmp_block)
 +{
-+      struct mmp_struct *mmp;
-+
-+      if (*bh)
-+              clear_buffer_uptodate(*bh);
-+
-+#if 0
-+      brelse(*bh);
-+
-+      *bh = sb_bread(sb, mmp_block);
-+#else
-+      if (!*bh)
-+              *bh = sb_getblk(sb, mmp_block);
-+      if (*bh) {
-+              get_bh(*bh);
-+              lock_buffer(*bh);
-+              (*bh)->b_end_io = end_buffer_read_sync;
-+              submit_bh(READ_SYNC, *bh);
-+              wait_on_buffer(*bh);
-+              if (!buffer_uptodate(*bh)) {
-+                      brelse(*bh);
-+                      *bh = NULL;
-+              }
-+      }
-+#endif
-+      if (!*bh) {
-+              __ext4_warning(sb, __func__,
-+                             "Error while reading MMP block %lu", mmp_block);
-+              return -EIO;
-+      }
-+
-+      mmp = (struct mmp_struct *)((*bh)->b_data);
-+      if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
-+              return -EINVAL;
-+
-+      return 0;
++       struct mmp_struct *mmp;
++
++       if (*bh)
++               clear_buffer_uptodate(*bh);
++
++       /* This would be sb_bread(sb, mmp_block), except we need to be sure
++        * that the MD RAID device cache has been bypassed, and that the read
++        * is not blocked in the elevator. */
++       if (!*bh)
++               *bh = sb_getblk(sb, mmp_block);
++       if (*bh) {
++               get_bh(*bh);
++               lock_buffer(*bh);
++               (*bh)->b_end_io = end_buffer_read_sync;
++               submit_bh(READ_SYNC, *bh);
++               wait_on_buffer(*bh);
++               if (!buffer_uptodate(*bh)) {
++                       brelse(*bh);
++                       *bh = NULL;
++               }
++       }
++       if (!*bh) {
++               ext4_warning(sb, "Error while reading MMP block %llu",
++                            mmp_block);
++               return -EIO;
++       }
++
++       mmp = (struct mmp_struct *)((*bh)->b_data);
++       if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
++               return -EINVAL;
++
++       return 0;
 +}
 +
 +/*
 + * Dump as much information as possible to help the admin.
 + */
-+static void dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
-+                       const char *function, const char *msg)
++void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
++                   const char *function, const char *msg)
 +{
-+      __ext4_warning(sb, function, "%s", msg);
-+      __ext4_warning(sb, function, "MMP failure info: last update time: %llu, "
-+                     "last update node: %s, last update device: %s\n",
-+                     (long long unsigned int)le64_to_cpu(mmp->mmp_time),
-+                     mmp->mmp_nodename, mmp->mmp_bdevname);
++       __ext4_warning(sb, function, "%s", msg);
++       __ext4_warning(sb, function,
++                      "MMP failure info: last update time: %llu, last update "
++                      "node: %s, last update device: %s\n",
++                      (long long unsigned int) le64_to_cpu(mmp->mmp_time),
++                      mmp->mmp_nodename, mmp->mmp_bdevname);
 +}
 +
 +/*
@@ -104,129 +244,127 @@ Index: linux-stage/fs/ext4/super.c
 + */
 +static int kmmpd(void *data)
 +{
-+      struct super_block *sb = ((struct mmpd_data *) data)->sb;
-+      struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
-+      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-+      struct mmp_struct *mmp;
-+      unsigned long mmp_block;
-+      u32 seq = 0;
-+      unsigned long failed_writes = 0;
-+      int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
-+      unsigned mmp_check_interval;
-+      unsigned long last_update_time;
-+      unsigned long diff;
-+      int retval;
-+
-+      mmp_block = le64_to_cpu(es->s_mmp_block);
-+      mmp = (struct mmp_struct *)(bh->b_data);
-+      mmp->mmp_time = cpu_to_le64(get_seconds());
-+      /*
-+       * Start with the higher mmp_check_interval and reduce it if
-+       * the MMP block is being updated on time.
-+       */
-+      mmp_check_interval = max(5UL * mmp_update_interval,
-+                               EXT4_MMP_MIN_CHECK_INTERVAL);
-+      mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
-+      bdevname(bh->b_bdev, mmp->mmp_bdevname);
-+
-+      down_read(&uts_sem);
-+      memcpy(mmp->mmp_nodename, system_utsname.nodename,
-+             sizeof(mmp->mmp_nodename));
-+      up_read(&uts_sem);
-+
-+      while (!kthread_should_stop()) {
-+              if (++seq > EXT4_MMP_SEQ_MAX)
-+                      seq = 1;
-+
-+              mmp->mmp_seq = cpu_to_le32(seq);
-+              mmp->mmp_time = cpu_to_le64(get_seconds());
-+              last_update_time = jiffies;
-+
-+              retval = write_mmp_block(bh);
-+              /*
-+               * Don't spew too many error messages. Print one every
-+               * (s_mmp_update_interval * 60) seconds.
-+               */
-+              if (retval && (failed_writes % 60) == 0) {
-+                      __ext4_error(sb, __func__,
-+                                   "Error writing to MMP block");
-+                      failed_writes++;
-+              }
-+
-+              if (!(le32_to_cpu(es->s_feature_incompat) &
-+                  EXT4_FEATURE_INCOMPAT_MMP)) {
-+                      __ext4_warning(sb, __func__, "kmmpd being stopped "
-+                                     "since MMP feature has been disabled.");
-+                      EXT4_SB(sb)->s_mmp_tsk = NULL;
-+                      goto failed;
-+              }
-+
-+              if (sb->s_flags & MS_RDONLY) {
-+                      __ext4_warning(sb, __func__, "kmmpd being stopped "
-+                                     "since filesystem has been remounted as "
-+                                     "readonly.");
-+                      EXT4_SB(sb)->s_mmp_tsk = NULL;
-+                      goto failed;
-+              }
-+
-+              diff = jiffies - last_update_time;
-+              if (diff < mmp_update_interval * HZ)
-+                      schedule_timeout_interruptible(EXT4_MMP_UPDATE_INTERVAL*
-+                                                     HZ - diff);
-+
-+              /*
-+               * We need to make sure that more than mmp_check_interval
-+               * seconds have not passed since writing. If that has happened
-+               * we need to check if the MMP block is as we left it.
-+               */
-+              diff = jiffies - last_update_time;
-+              if (diff > mmp_check_interval * HZ) {
-+                      struct buffer_head *bh_check = NULL;
-+                      struct mmp_struct *mmp_check;
-+
-+                      retval = read_mmp_block(sb, &bh_check, mmp_block);
-+                      if (retval) {
-+                              __ext4_error(sb, __func__, "error reading MMP"
-+                                           "data: %d", retval);
-+                              EXT4_SB(sb)->s_mmp_tsk = NULL;
-+                              goto failed;
-+                      }
-+
-+                      mmp_check = (struct mmp_struct *)(bh_check->b_data);
-+                      if (mmp->mmp_seq != mmp_check->mmp_seq ||
-+                          memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
-+                                 sizeof(mmp->mmp_nodename))) {
-+                              dump_mmp_msg(sb, mmp_check, __func__,
-+                                           "Error while updating MMP info. "
-+                                           "The filesystem seems to have "
-+                                           "been multiply mounted.");
-+                              __ext4_error(sb, __func__, "abort");
-+                              goto failed;
-+                      }
-+                      put_bh(bh_check);
-+              }
-+
-+              /*
-+               * Adjust the mmp_check_interval depending on how much time
-+               * it took for the MMP block to be written.
-+               */
-+              mmp_check_interval = max(min(5 * diff / HZ,
-+                                           EXT4_MMP_MAX_CHECK_INTERVAL),
-+                                       EXT4_MMP_MIN_CHECK_INTERVAL);
-+              mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
-+      }
-+
-+      /*
-+       * Unmount seems to be clean.
-+       */
-+      mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
-+      mmp->mmp_time = cpu_to_le64(get_seconds());
-+
-+      retval = write_mmp_block(bh);
++       struct super_block *sb = ((struct mmpd_data *) data)->sb;
++       struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
++       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++       struct mmp_struct *mmp;
++       ext4_fsblk_t mmp_block;
++       u32 seq = 0;
++       unsigned long failed_writes = 0;
++       int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
++       unsigned mmp_check_interval;
++       unsigned long last_update_time;
++       unsigned long diff;
++       int retval;
++
++       mmp_block = le64_to_cpu(es->s_mmp_block);
++       mmp = (struct mmp_struct *)(bh->b_data);
++       mmp->mmp_time = cpu_to_le64(get_seconds());
++       /*
++        * Start with the higher mmp_check_interval and reduce it if
++        * the MMP block is being updated on time.
++        */
++       mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
++                                EXT4_MMP_MIN_CHECK_INTERVAL);
++       mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
++       bdevname(bh->b_bdev, mmp->mmp_bdevname);
++
++       memcpy(mmp->mmp_nodename, init_utsname()->sysname,
++              sizeof(mmp->mmp_nodename));
++
++       while (!kthread_should_stop()) {
++               if (++seq > EXT4_MMP_SEQ_MAX)
++                       seq = 1;
++
++               mmp->mmp_seq = cpu_to_le32(seq);
++               mmp->mmp_time = cpu_to_le64(get_seconds());
++               last_update_time = jiffies;
++
++               retval = write_mmp_block(bh);
++               /*
++                * Don't spew too many error messages. Print one every
++                * (s_mmp_update_interval * 60) seconds.
++                */
++               if (retval && (failed_writes % 60) == 0) {
++                       ext4_error(sb, "Error writing to MMP block");
++                       failed_writes++;
++               }
++
++               if (!(le32_to_cpu(es->s_feature_incompat) &
++                   EXT4_FEATURE_INCOMPAT_MMP)) {
++                       ext4_warning(sb, "kmmpd being stopped since MMP feature"
++                                    " has been disabled.");
++                       EXT4_SB(sb)->s_mmp_tsk = NULL;
++                       goto failed;
++               }
++
++               if (sb->s_flags & MS_RDONLY) {
++                       ext4_warning(sb, "kmmpd being stopped since filesystem "
++                                    "has been remounted as readonly.");
++                       EXT4_SB(sb)->s_mmp_tsk = NULL;
++                       goto failed;
++               }
++
++               diff = jiffies - last_update_time;
++               if (diff < mmp_update_interval * HZ)
++                       schedule_timeout_interruptible(mmp_update_interval *
++                                                      HZ - diff);
++
++               /*
++                * We need to make sure that more than mmp_check_interval
++                * seconds have not passed since writing. If that has happened
++                * we need to check if the MMP block is as we left it.
++                */
++               diff = jiffies - last_update_time;
++               if (diff > mmp_check_interval * HZ) {
++                       struct buffer_head *bh_check = NULL;
++                       struct mmp_struct *mmp_check;
++
++                       retval = read_mmp_block(sb, &bh_check, mmp_block);
++                       if (retval) {
++                               ext4_error(sb, "error reading MMP data: %d",
++                                          retval);
++
++                               EXT4_SB(sb)->s_mmp_tsk = NULL;
++                               goto failed;
++                       }
++
++                       mmp_check = (struct mmp_struct *)(bh_check->b_data);
++                       if (mmp->mmp_seq != mmp_check->mmp_seq ||
++                           memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
++                                  sizeof(mmp->mmp_nodename))) {
++                               dump_mmp_msg(sb, mmp_check,
++                                            "Error while updating MMP info. "
++                                            "The filesystem seems to have been"
++                                            " multiply mounted.");
++                               ext4_error(sb, "abort");
++                               goto failed;
++                       }
++                       put_bh(bh_check);
++               }
++
++                /*
++                * Adjust the mmp_check_interval depending on how much time
++                * it took for the MMP block to be written.
++                */
++               mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
++                                            EXT4_MMP_MAX_CHECK_INTERVAL),
++                                        EXT4_MMP_MIN_CHECK_INTERVAL);
++               mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
++       }
++
++       /*
++        * Unmount seems to be clean.
++        */
++       mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
++       mmp->mmp_time = cpu_to_le64(get_seconds());
++
++       retval = write_mmp_block(bh);
 +
 +failed:
-+      brelse(bh);
-+      return retval;
++       kfree(data);
++       brelse(bh);
++       return retval;
 +}
 +
 +/*
@@ -235,155 +373,165 @@ Index: linux-stage/fs/ext4/super.c
 + */
 +static unsigned int mmp_new_seq(void)
 +{
-+      u32 new_seq;
++       u32 new_seq;
 +
-+      do {
-+              get_random_bytes(&new_seq, sizeof(u32));
-+      } while (new_seq > EXT4_MMP_SEQ_MAX);
++       do {
++               get_random_bytes(&new_seq, sizeof(u32));
++       } while (new_seq > EXT4_MMP_SEQ_MAX);
 +
-+      return new_seq;
++       return new_seq;
 +}
 +
 +/*
 + * Protect the filesystem from being mounted more than once.
 + */
-+static int ext4_multi_mount_protect(struct super_block *sb,
-+                                  unsigned long mmp_block)
++int ext4_multi_mount_protect(struct super_block *sb,
++                                   ext4_fsblk_t mmp_block)
 +{
-+      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-+      struct buffer_head *bh = NULL;
-+      struct mmp_struct *mmp = NULL;
-+      struct mmpd_data *mmpd_data;
-+      u32 seq;
-+      unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
-+      unsigned int wait_time = 0;
-+      int retval;
-+
-+      if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
-+          mmp_block >= ext4_blocks_count(es)) {
-+              __ext4_warning(sb, __func__,
-+                             "Invalid MMP block in superblock");
-+              goto failed;
-+      }
-+
-+      retval = read_mmp_block(sb, &bh, mmp_block);
-+      if (retval)
-+              goto failed;
-+
-+      mmp = (struct mmp_struct *)(bh->b_data);
-+
-+      if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
-+              mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
-+
-+      /*
-+       * If check_interval in MMP block is larger, use that instead of
-+       * update_interval from the superblock.
-+       */
-+      if (mmp->mmp_check_interval > mmp_check_interval)
-+              mmp_check_interval = mmp->mmp_check_interval;
-+
-+      seq = le32_to_cpu(mmp->mmp_seq);
-+      if (seq == EXT4_MMP_SEQ_CLEAN)
-+              goto skip;
-+
-+      if (seq == EXT4_MMP_SEQ_FSCK) {
-+              dump_mmp_msg(sb, mmp, __func__,
-+                           "fsck is running on the filesystem");
-+              goto failed;
-+      }
-+
-+      wait_time = min(mmp_check_interval * 2 + 1,
-+              mmp_check_interval + 60);
-+
-+      /* Print MMP interval if more than 20 secs. */
-+      if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
-+              __ext4_warning(sb, __func__, "MMP interval %u higher than "
-+                             "expected, please wait.\n", wait_time * 2);
-+
-+      if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
-+              __ext4_warning(sb, __func__, "MMP startup interrupted, failing "
-+                             "mount\n");
-+              goto failed;
-+      }
-+
-+      retval = read_mmp_block(sb, &bh, mmp_block);
-+      if (retval)
-+              goto failed;
-+      mmp = (struct mmp_struct *)(bh->b_data);
-+      if (seq != le32_to_cpu(mmp->mmp_seq)) {
-+              dump_mmp_msg(sb, mmp, __func__,
-+                           "Device is already active on another node.");
-+              goto failed;
-+      }
++       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++       struct buffer_head *bh = NULL;
++       struct mmp_struct *mmp = NULL;
++       struct mmpd_data *mmpd_data;
++       u32 seq;
++       unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
++       unsigned int wait_time = 0;
++       int retval;
++
++       if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
++           mmp_block >= ext4_blocks_count(es)) {
++               ext4_warning(sb, "Invalid MMP block in superblock");
++               goto failed;
++       }
++
++       retval = read_mmp_block(sb, &bh, mmp_block);
++       if (retval)
++               goto failed;
++
++       mmp = (struct mmp_struct *)(bh->b_data);
++
++       if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
++               mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
++
++       /*
++        * If check_interval in MMP block is larger, use that instead of
++        * update_interval from the superblock.
++        */
++       if (mmp->mmp_check_interval > mmp_check_interval)
++               mmp_check_interval = mmp->mmp_check_interval;
++
++       seq = le32_to_cpu(mmp->mmp_seq);
++       if (seq == EXT4_MMP_SEQ_CLEAN)
++               goto skip;
++
++       if (seq == EXT4_MMP_SEQ_FSCK) {
++               dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
++               goto failed;
++       }
++
++       wait_time = min(mmp_check_interval * 2 + 1,
++                       mmp_check_interval + 60);
++
++       /* Print MMP interval if more than 20 secs. */
++       if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
++               ext4_warning(sb, "MMP interval %u higher than expected, please"
++                            " wait.\n", wait_time * 2);
++
++       if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
++               ext4_warning(sb, "MMP startup interrupted, failing mount\n");
++               goto failed;
++       }
++
++       retval = read_mmp_block(sb, &bh, mmp_block);
++       if (retval)
++               goto failed;
++       mmp = (struct mmp_struct *)(bh->b_data);
++       if (seq != le32_to_cpu(mmp->mmp_seq)) {
++               dump_mmp_msg(sb, mmp,
++                            "Device is already active on another node.");
++               goto failed;
++       }
 +
 +skip:
-+      /*
-+       * write a new random sequence number.
-+       */
-+      mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
-+
-+      retval = write_mmp_block(bh);
-+      if (retval)
-+              goto failed;
-+
-+      /*
-+       * wait for MMP interval and check mmp_seq.
-+       */
-+      if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
-+              __ext4_warning(sb, __func__, "MMP startup interrupted, failing "
-+                             "mount\n");
-+              goto failed;
-+      }
-+
-+      retval = read_mmp_block(sb, &bh, mmp_block);
-+      if (retval)
-+              goto failed;
-+      mmp = (struct mmp_struct *)(bh->b_data);
-+      if (seq != le32_to_cpu(mmp->mmp_seq)) {
-+              dump_mmp_msg(sb, mmp, __func__,
-+                           "Device is already active on another node.");
-+              goto failed;
-+      }
-+
-+      mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
-+      if (!mmpd_data) {
-+              __ext4_warning(sb, KERN_ERR, "not enough memory for mmpd_data");
-+              goto failed;
-+      }
-+      mmpd_data->sb = sb;
-+      mmpd_data->bh = bh;
-+
-+      /*
-+       * Start a kernel thread to update the MMP block periodically.
-+       */
-+      EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
-+                                           bdevname(bh->b_bdev,
-+                                                    mmp->mmp_bdevname));
-+      if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
-+              EXT4_SB(sb)->s_mmp_tsk = NULL;
-+              __ext4_warning(sb, __func__, "Unable to create kmmpd thread "
-+                             "for %s.", sb->s_id);
-+              goto failed;
-+      }
-+
-+      return 0;
++       /*
++        * write a new random sequence number.
++        */
++       mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
++
++       retval = write_mmp_block(bh);
++       if (retval)
++               goto failed;
++
++       /*
++        * wait for MMP interval and check mmp_seq.
++        */
++       if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
++               ext4_warning(sb, "MMP startup interrupted, failing mount\n");
++               goto failed;
++       }
++
++       retval = read_mmp_block(sb, &bh, mmp_block);
++       if (retval)
++               goto failed;
++       mmp = (struct mmp_struct *)(bh->b_data);
++       if (seq != le32_to_cpu(mmp->mmp_seq)) {
++               dump_mmp_msg(sb, mmp,
++                            "Device is already active on another node.");
++               goto failed;
++       }
++
++       mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
++       if (!mmpd_data) {
++               ext4_warning(sb, "not enough memory for mmpd_data");
++               goto failed;
++       }
++       mmpd_data->sb = sb;
++       mmpd_data->bh = bh;
++
++       /*
++        * Start a kernel thread to update the MMP block periodically.
++        */
++       EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
++                                            bdevname(bh->b_bdev,
++                                                     mmp->mmp_bdevname));
++       if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
++               EXT4_SB(sb)->s_mmp_tsk = NULL;
++               kfree(mmpd_data);
++               ext4_warning(sb, "Unable to create kmmpd thread for %s.",
++                            sb->s_id);
++               goto failed;
++       }
++
++       return 0;
 +
 +failed:
-+      brelse(bh);
-+      return 1;
++       brelse(bh);
++       return 1;
 +}
++
++
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -40,6 +40,8 @@
+ #include <linux/log2.h>
+ #include <linux/crc16.h>
+ #include <asm/uaccess.h>
++#include <linux/kthread.h>
++#include <linux/utsname.h>
  
- static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp)
- {
-@@ -930,7 +1282,6 @@ static struct dentry *ext4_get_dentry(st
-       struct inode *inode;
-       struct dentry *result;
--
-       if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
-               return ERR_PTR(-ESTALE);
-       if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
-@@ -2740,6 +3091,11 @@ static int ext4_fill_super(struct super_
+ #include "ext4.h"
+ #include "ext4_jbd2.h"
+@@ -698,6 +700,8 @@ static void ext4_put_super(struct super_
+               invalidate_bdev(sbi->journal_bdev, 0);
+               ext4_blkdev_remove(sbi);
+       }
++      if (sbi->s_mmp_tsk)
++              kthread_stop(sbi->s_mmp_tsk);
+       sb->s_fs_info = NULL;
+       /*
+        * Now that we are completely done shutting down the
+@@ -2810,6 +2814,11 @@ static int ext4_fill_super(struct super_
                          EXT4_HAS_INCOMPAT_FEATURE(sb,
                                    EXT4_FEATURE_INCOMPAT_RECOVER));
  
@@ -395,7 +543,7 @@ Index: linux-stage/fs/ext4/super.c
        /*
         * The first inode we look at is the journal inode.  Don't try
         * root first: it may be modified in the journal!
-@@ -2978,6 +3334,8 @@ failed_mount3:
+@@ -3048,6 +3057,8 @@ failed_mount3:
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
        percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
@@ -404,7 +552,7 @@ Index: linux-stage/fs/ext4/super.c
  failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
-@@ -3488,7 +3846,7 @@ static int ext4_remount(struct super_blo
+@@ -3557,7 +3568,7 @@ static int ext4_remount(struct super_blo
        struct ext4_mount_options old_opts;
        ext4_group_t g;
        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@@ -413,101 +561,17 @@ Index: linux-stage/fs/ext4/super.c
  #ifdef CONFIG_QUOTA
        int i;
  #endif
-@@ -3607,6 +3965,13 @@ static int ext4_remount(struct super_blo
+@@ -3676,6 +3687,13 @@ static int ext4_remount(struct super_blo
                                goto restore_opts;
                        if (!ext4_setup_super(sb, es, 0))
                                sb->s_flags &= ~MS_RDONLY;
 +                      if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-+                                                  EXT4_FEATURE_INCOMPAT_MMP))
++                                              EXT4_FEATURE_INCOMPAT_MMP))
 +                              if (ext4_multi_mount_protect(sb,
-+                                              le64_to_cpu(es->s_mmp_block))) {
++                                      le64_to_cpu(es->s_mmp_block))) {
 +                                      err = -EROFS;
 +                                      goto restore_opts;
 +                              }
                }
        }
        ext4_setup_system_zone(sb);
-Index: linux-stage/fs/ext4/ext4.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4.h
-+++ linux-stage/fs/ext4/ext4.h
-@@ -851,7 +851,7 @@ struct ext4_super_block {
-       __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
-       __le32  s_flags;                /* Miscellaneous flags */
-       __le16  s_raid_stride;          /* RAID stride */
--      __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
-+      __le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
-       __le64  s_mmp_block;            /* Block for multi-mount protection */
-       __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
-       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
-@@ -1005,6 +1005,9 @@ struct ext4_sb_info {
-       /* workqueue for dio unwritten */
-       struct workqueue_struct *dio_unwritten_wq;
-+
-+      /* Kernel thread for multiple mount protection */
-+      struct task_struct *s_mmp_tsk;
- };
- static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
-@@ -1114,7 +1117,8 @@ static inline int ext4_valid_inum(struct
-                                        EXT4_FEATURE_INCOMPAT_META_BG| \
-                                        EXT4_FEATURE_INCOMPAT_EXTENTS| \
-                                        EXT4_FEATURE_INCOMPAT_64BIT| \
--                                       EXT4_FEATURE_INCOMPAT_FLEX_BG)
-+                                       EXT4_FEATURE_INCOMPAT_FLEX_BG| \
-+                                       EXT4_FEATURE_INCOMPAT_MMP)
- #define EXT4_FEATURE_RO_COMPAT_SUPP   (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
-                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
-@@ -1296,6 +1300,50 @@ void ext4_get_group_no_and_offset(struct
- extern struct proc_dir_entry *ext4_proc_root;
- /*
-+ * This structure will be used for multiple mount protection. It will be
-+ * written into the block number saved in the s_mmp_block field in the
-+ * superblock. Programs that check MMP should assume that if
-+ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
-+ * to use the filesystem, regardless of how old the timestamp is.
-+ */
-+#define EXT4_MMP_MAGIC     0x004D4D50U /* ASCII for MMP */
-+#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
-+#define EXT4_MMP_SEQ_FSCK  0xE24D4D50U /* mmp_seq value when being fscked */
-+#define EXT4_MMP_SEQ_MAX   0xE24D4D4FU /* maximum valid mmp_seq value */
-+
-+struct mmp_struct {
-+      __le32  mmp_magic;
-+      __le32  mmp_seq;
-+      __le64  mmp_time;
-+      char    mmp_nodename[64];
-+      char    mmp_bdevname[32];
-+      __le16  mmp_check_interval;
-+      __le16  mmp_pad1;
-+      __le32  mmp_pad2[227];
-+};
-+
-+/* arguments passed to the mmp thread */
-+struct mmpd_data {
-+      struct buffer_head *bh; /* bh from initial read_mmp_block() */
-+      struct super_block *sb;  /* super block of the fs */
-+};
-+
-+/*
-+ * Default interval in seconds to update the MMP sequence number.
-+ */
-+#define EXT4_MMP_UPDATE_INTERVAL   1
-+
-+/*
-+ * Minimum interval for MMP checking in seconds.
-+ */
-+#define EXT4_MMP_MIN_CHECK_INTERVAL   5UL
-+
-+/*
-+ * Maximum interval for MMP checking in seconds.
-+ */
-+#define EXT4_MMP_MAX_CHECK_INTERVAL   300UL
-+
-+/*
-  * Function prototypes
-  */
index f5fe952..e2942a9 100644 (file)
-Index: linux-stage/fs/ext4/super.c
+Prevent an ext4 filesystem from being mounted multiple times.
+A sequence number is stored on disk and is periodically updated (every 5
+seconds by default) by a mounted filesystem.
+At mount time, we now wait for s_mmp_update_interval seconds to make sure
+that the MMP sequence does not change.
+In case of failure, the nodename, bdevname and the time at which the MMP
+block was last updated is displayed.
+Move all mmp code to a dedicated file (mmp.c).
+
+Signed-off-by: Andreas Dilger <adilger <at> whamcloud.com>
+Signed-off-by: Johann Lombardi <johann <at> whamcloud.com>
+---
+ fs/ext4/Makefile |    3 +-
+ fs/ext4/ext4.h   |   76 ++++++++++++-
+ fs/ext4/mmp.c    |  351 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/ext4/super.c  |   18 +++-
+ 4 files changed, 444 insertions(+), 4 deletions(-)
+ create mode 100644 fs/ext4/mmp.c
+
+Index: linux-stage/fs/ext4/Makefile
 ===================================================================
---- linux-stage.orig/fs/ext4/super.c   2011-04-05 17:51:52.173385539 +0800
-+++ linux-stage/fs/ext4/super.c        2011-04-05 17:53:58.686223570 +0800
-@@ -40,6 +40,8 @@
- #include <linux/log2.h>
- #include <linux/crc16.h>
- #include <asm/uaccess.h>
-+#include <linux/kthread.h>
-+#include <linux/utsname.h>
+--- linux-stage.orig/fs/ext4/Makefile
++++ linux-stage/fs/ext4/Makefile
+@@ -6,7 +6,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
  
- #include "ext4.h"
- #include "ext4_jbd2.h"
-@@ -700,6 +702,8 @@
-               invalidate_bdev(sbi->journal_bdev);
-               ext4_blkdev_remove(sbi);
-       }
-+      if (sbi->s_mmp_tsk)
-+              kthread_stop(sbi->s_mmp_tsk);
-       sb->s_fs_info = NULL;
-       /*
-        * Now that we are completely done shutting down the
-@@ -970,6 +974,355 @@
-       return 0;
- }
+ ext4-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+               ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
+-              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
++              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
++              mmp.o
+ ext4-$(CONFIG_EXT4_FS_XATTR)          += xattr.o xattr_user.o xattr_trusted.o
+ ext4-$(CONFIG_EXT4_FS_POSIX_ACL)      += acl.o
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -893,7 +893,7 @@ struct ext4_super_block {
+       __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
+       __le32  s_flags;                /* Miscellaneous flags */
+       __le16  s_raid_stride;          /* RAID stride */
+-      __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
++      __le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
+       __le64  s_mmp_block;            /* Block for multi-mount protection */
+       __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
+       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
+@@ -1040,6 +1040,9 @@ struct ext4_sb_info {
+       /* workqueue for dio unwritten */
+       struct workqueue_struct *dio_unwritten_wq;
++
++      /* Kernel thread for multiple mount protection */
++      struct task_struct *s_mmp_tsk;
+ };
  
+ static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
+@@ -1176,7 +1179,8 @@ static inline void ext4_clear_inode_stat
+                                        EXT4_FEATURE_INCOMPAT_META_BG| \
+                                        EXT4_FEATURE_INCOMPAT_EXTENTS| \
+                                        EXT4_FEATURE_INCOMPAT_64BIT| \
+-                                       EXT4_FEATURE_INCOMPAT_FLEX_BG)
++                                       EXT4_FEATURE_INCOMPAT_FLEX_BG| \
++                                       EXT4_FEATURE_INCOMPAT_MMP)
+ #define EXT4_FEATURE_RO_COMPAT_SUPP   (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
+@@ -1383,6 +1387,67 @@ void ext4_get_group_no_and_offset(struct
+ extern struct proc_dir_entry *ext4_proc_root;
+ /*
++ * This structure will be used for multiple mount protection. It will be
++ * written into the block number saved in the s_mmp_block field in the
++ * superblock. Programs that check MMP should assume that if
++ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
++ * to use the filesystem, regardless of how old the timestamp is.
++ */
++#define EXT4_MMP_MAGIC     0x004D4D50U /* ASCII for MMP */
++#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
++#define EXT4_MMP_SEQ_FSCK  0xE24D4D50U /* mmp_seq value when being fscked */
++#define EXT4_MMP_SEQ_MAX   0xE24D4D4FU /* maximum valid mmp_seq value */
++
++struct mmp_struct {
++       __le32  mmp_magic;              /* Magic number for MMP */
++       __le32  mmp_seq;                /* Sequence no. updated periodically */
++
++       /*
++        * mmp_time, mmp_nodename & mmp_bdevname are only used for information
++        * purposes and do not affect the correctness of the algorithm
++        */
++       __le64  mmp_time;               /* Time last updated */
++       char    mmp_nodename[64];       /* Node which last updated MMP block */
++       char    mmp_bdevname[32];       /* Bdev which last updated MMP block */
++
++       /*
++        * mmp_check_interval is used to verify if the MMP block has been
++        * updated on the block device. The value is updated based on the
++        * maximum time to write the MMP block during an update cycle.
++        */
++       __le16  mmp_check_interval;
++
++       __le16  mmp_pad1;
++       __le32  mmp_pad2[227];
++};
++
++/* arguments passed to the mmp thread */
++struct mmpd_data {
++       struct buffer_head *bh; /* bh from initial read_mmp_block() */
++       struct super_block *sb;  /* super block of the fs */
++};
++
++/*
++ * Check interval multiplier
++ * The MMP block is written every update interval and initially checked every
++ * update interval x the multiplier (the value is then adapted based on the
++ * write latency). The reason is that writes can be delayed under load and we
++ * don't want readers to incorrectly assume that the filesystem is no longer
++ * in use.
++ */
++#define EXT4_MMP_CHECK_MULT            2UL
++
++/*
++ * Minimum interval for MMP checking in seconds.
++ */
++#define EXT4_MMP_MIN_CHECK_INTERVAL    5UL
++
++/*
++ * Maximum interval for MMP checking in seconds.
++ */
++#define EXT4_MMP_MAX_CHECK_INTERVAL    300UL
++
++/*
+  * Function prototypes
+  */
+@@ -1552,6 +1617,10 @@ extern void __ext4_warning(struct super_
+ #define ext4_warning(sb, message...)  __ext4_warning(sb, __func__, ## message)
+ extern void ext4_msg(struct super_block *, const char *, const char *, ...)
+       __attribute__ ((format (printf, 3, 4)));
++extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
++                         const char *, const char *);
++#define dump_mmp_msg(sb, mmp, msg)     __dump_mmp_msg(sb, mmp, __func__, \
++                                                      msg)
+ extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
+                               const char *, const char *, ...)
+       __attribute__ ((format (printf, 4, 5)));
+@@ -1833,6 +1902,8 @@ extern int ext4_move_extents(struct file
+                            __u64 start_orig, __u64 start_donor,
+                            __u64 len, __u64 *moved_len);
++/* mmp.c */
++extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
+ /*
+  * Add new method to test wether block and inode bitmaps are properly
+Index: linux-stage/fs/ext4/mmp.c
+===================================================================
+--- /dev/null
++++ linux-stage/fs/ext4/mmp.c
+@@ -0,0 +1,351 @@
++#include <linux/fs.h>
++#include <linux/random.h>
++#include <linux/buffer_head.h>
++#include <linux/utsname.h>
++#include <linux/kthread.h>
++
++#include "ext4.h"
++
 +/*
 + * Write the MMP block using WRITE_SYNC to try to get the block on-disk
 + * faster.
 + */
 +static int write_mmp_block(struct buffer_head *bh)
 +{
-+      mark_buffer_dirty(bh);
-+      lock_buffer(bh);
-+      bh->b_end_io = end_buffer_write_sync;
-+      get_bh(bh);
-+      submit_bh(WRITE_SYNC, bh);
-+      wait_on_buffer(bh);
-+      if (unlikely(!buffer_uptodate(bh)))
-+              return 1;
-+
-+      return 0;
++       mark_buffer_dirty(bh);
++       lock_buffer(bh);
++       bh->b_end_io = end_buffer_write_sync;
++       get_bh(bh);
++       submit_bh(WRITE_SYNC, bh);
++       wait_on_buffer(bh);
++       if (unlikely(!buffer_uptodate(bh)))
++               return 1;
++
++       return 0;
 +}
 +
 +/*
@@ -47,56 +188,54 @@ Index: linux-stage/fs/ext4/super.c
 + * uptodate flag on the buffer.
 + */
 +static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
-+                        unsigned long mmp_block)
++                         ext4_fsblk_t mmp_block)
 +{
-+      struct mmp_struct *mmp;
-+
-+      if (*bh)
-+              clear_buffer_uptodate(*bh);
-+
-+#if 0
-+      brelse(*bh);
-+
-+      *bh = sb_bread(sb, mmp_block);
-+#else
-+      if (!*bh)
-+              *bh = sb_getblk(sb, mmp_block);
-+      if (*bh) {
-+              get_bh(*bh);
-+              lock_buffer(*bh);
-+              (*bh)->b_end_io = end_buffer_read_sync;
-+              submit_bh(READ_SYNC, *bh);
-+              wait_on_buffer(*bh);
-+              if (!buffer_uptodate(*bh)) {
-+                      brelse(*bh);
-+                      *bh = NULL;
-+              }
-+      }
-+#endif
-+      if (!*bh) {
-+              ext4_warning(sb,
-+                           "Error while reading MMP block %lu", mmp_block);
-+              return -EIO;
-+      }
-+
-+      mmp = (struct mmp_struct *)((*bh)->b_data);
-+      if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
-+              return -EINVAL;
-+
-+      return 0;
++       struct mmp_struct *mmp;
++
++       if (*bh)
++               clear_buffer_uptodate(*bh);
++
++       /* This would be sb_bread(sb, mmp_block), except we need to be sure
++        * that the MD RAID device cache has been bypassed, and that the read
++        * is not blocked in the elevator. */
++       if (!*bh)
++               *bh = sb_getblk(sb, mmp_block);
++       if (*bh) {
++               get_bh(*bh);
++               lock_buffer(*bh);
++               (*bh)->b_end_io = end_buffer_read_sync;
++               submit_bh(READ_SYNC, *bh);
++               wait_on_buffer(*bh);
++               if (!buffer_uptodate(*bh)) {
++                       brelse(*bh);
++                       *bh = NULL;
++               }
++       }
++       if (!*bh) {
++               ext4_warning(sb, "Error while reading MMP block %llu",
++                            mmp_block);
++               return -EIO;
++       }
++
++       mmp = (struct mmp_struct *)((*bh)->b_data);
++       if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
++               return -EINVAL;
++
++       return 0;
 +}
 +
 +/*
 + * Dump as much information as possible to help the admin.
 + */
-+static void dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
-+                       const char *function, const char *msg)
++void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
++                   const char *function, const char *msg)
 +{
-+      __ext4_warning(sb, function, msg);
-+      __ext4_warning(sb, function, "MMP failure info: last update time: %llu, "
-+                   "last update node: %s, last update device: %s\n",
-+                   (long long unsigned int)le64_to_cpu(mmp->mmp_time),
-+                   mmp->mmp_nodename, mmp->mmp_bdevname);
++       __ext4_warning(sb, function, msg);
++       __ext4_warning(sb, function,
++                      "MMP failure info: last update time: %llu, last update "
++                      "node: %s, last update device: %s\n",
++                      (long long unsigned int) le64_to_cpu(mmp->mmp_time),
++                      mmp->mmp_nodename, mmp->mmp_bdevname);
 +}
 +
 +/*
@@ -104,128 +243,127 @@ Index: linux-stage/fs/ext4/super.c
 + */
 +static int kmmpd(void *data)
 +{
-+      struct super_block *sb = ((struct mmpd_data *) data)->sb;
-+      struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
-+      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-+      struct mmp_struct *mmp;
-+      unsigned long mmp_block;
-+      u32 seq = 0;
-+      unsigned long failed_writes = 0;
-+      int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
-+      unsigned mmp_check_interval;
-+      unsigned long last_update_time;
-+      unsigned long diff;
-+      int retval;
-+
-+      mmp_block = le64_to_cpu(es->s_mmp_block);
-+      mmp = (struct mmp_struct *)(bh->b_data);
-+      mmp->mmp_time = cpu_to_le64(get_seconds());
-+      /*
-+       * Start with the higher mmp_check_interval and reduce it if
-+       * the MMP block is being updated on time.
-+       */
-+      mmp_check_interval = max(5UL * mmp_update_interval,
-+                               EXT4_MMP_MIN_CHECK_INTERVAL);
-+      mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
-+      bdevname(bh->b_bdev, mmp->mmp_bdevname);
-+
-+      memcpy(mmp->mmp_nodename, init_utsname()->sysname,
-+             sizeof(mmp->mmp_nodename));
-+
-+      while (!kthread_should_stop()) {
-+              if (++seq > EXT4_MMP_SEQ_MAX)
-+                      seq = 1;
-+
-+              mmp->mmp_seq = cpu_to_le32(seq);
-+              mmp->mmp_time = cpu_to_le64(get_seconds());
-+              last_update_time = jiffies;
-+
-+              retval = write_mmp_block(bh);
-+              /*
-+               * Don't spew too many error messages. Print one every
-+               * (s_mmp_update_interval * 60) seconds.
-+               */
-+              if (retval && (failed_writes % 60) == 0) {
-+                      ext4_error(sb,
-+                                 "Error writing to MMP block");
-+                      failed_writes++;
-+              }
-+
-+              if (!(le32_to_cpu(es->s_feature_incompat) &
-+                  EXT4_FEATURE_INCOMPAT_MMP)) {
-+                      ext4_warning(sb, "kmmpd being stopped "
-+                                   "since MMP feature has been disabled.");
-+                      EXT4_SB(sb)->s_mmp_tsk = NULL;
-+                      goto failed;
-+              }
-+
-+              if (sb->s_flags & MS_RDONLY) {
-+                      ext4_warning(sb, "kmmpd being stopped "
-+                                   "since filesystem has been remounted as "
-+                                   "readonly.");
-+                      EXT4_SB(sb)->s_mmp_tsk = NULL;
-+                      goto failed;
-+              }
-+
-+              diff = jiffies - last_update_time;
-+              if (diff < mmp_update_interval * HZ)
-+                      schedule_timeout_interruptible(mmp_update_interval *
-+                                                     HZ - diff);
-+
-+              /*
-+               * We need to make sure that more than mmp_check_interval
-+               * seconds have not passed since writing. If that has happened
-+               * we need to check if the MMP block is as we left it.
-+               */
-+              diff = jiffies - last_update_time;
-+              if (diff > mmp_check_interval * HZ) {
-+                      struct buffer_head *bh_check = NULL;
-+                      struct mmp_struct *mmp_check;
-+
-+                      retval = read_mmp_block(sb, &bh_check, mmp_block);
-+                      if (retval) {
-+                              EXT4_SB(sb)->s_mmp_tsk = NULL;
-+                              ext4_error(sb, "error reading MMP data: %d",
-+                                         retval);
-+                              goto failed;
-+                      }
-+
-+                      mmp_check = (struct mmp_struct *)(bh_check->b_data);
-+                      if (mmp->mmp_seq != mmp_check->mmp_seq ||
-+                          memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
-+                                 sizeof(mmp->mmp_nodename))) {
-+                              dump_mmp_msg(sb, mmp_check, __func__,
-+                                           "Error while updating MMP info. "
-+                                           "The filesystem seems to have "
-+                                           "been multiply mounted.");
-+                              ext4_error(sb, "abort");
-+                              goto failed;
-+                      }
-+                      put_bh(bh_check);
-+              }
-+
-+              /*
-+               * Adjust the mmp_check_interval depending on how much time
-+               * it took for the MMP block to be written.
-+               */
-+              mmp_check_interval = max(min(5 * diff / HZ,
-+                                           EXT4_MMP_MAX_CHECK_INTERVAL),
-+                                       EXT4_MMP_MIN_CHECK_INTERVAL);
-+              mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
-+      }
-+
-+      /*
-+       * Unmount seems to be clean.
-+       */
-+      mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
-+      mmp->mmp_time = cpu_to_le64(get_seconds());
-+
-+      retval = write_mmp_block(bh);
++       struct super_block *sb = ((struct mmpd_data *) data)->sb;
++       struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
++       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++       struct mmp_struct *mmp;
++       ext4_fsblk_t mmp_block;
++       u32 seq = 0;
++       unsigned long failed_writes = 0;
++       int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
++       unsigned mmp_check_interval;
++       unsigned long last_update_time;
++       unsigned long diff;
++       int retval;
++
++       mmp_block = le64_to_cpu(es->s_mmp_block);
++       mmp = (struct mmp_struct *)(bh->b_data);
++       mmp->mmp_time = cpu_to_le64(get_seconds());
++       /*
++        * Start with the higher mmp_check_interval and reduce it if
++        * the MMP block is being updated on time.
++        */
++       mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
++                                EXT4_MMP_MIN_CHECK_INTERVAL);
++       mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
++       bdevname(bh->b_bdev, mmp->mmp_bdevname);
++
++       memcpy(mmp->mmp_nodename, init_utsname()->sysname,
++              sizeof(mmp->mmp_nodename));
++
++       while (!kthread_should_stop()) {
++               if (++seq > EXT4_MMP_SEQ_MAX)
++                       seq = 1;
++
++               mmp->mmp_seq = cpu_to_le32(seq);
++               mmp->mmp_time = cpu_to_le64(get_seconds());
++               last_update_time = jiffies;
++
++               retval = write_mmp_block(bh);
++               /*
++                * Don't spew too many error messages. Print one every
++                * (s_mmp_update_interval * 60) seconds.
++                */
++               if (retval && (failed_writes % 60) == 0) {
++                       ext4_error(sb, "Error writing to MMP block");
++                       failed_writes++;
++               }
++
++               if (!(le32_to_cpu(es->s_feature_incompat) &
++                   EXT4_FEATURE_INCOMPAT_MMP)) {
++                       ext4_warning(sb, "kmmpd being stopped since MMP feature"
++                                    " has been disabled.");
++                       EXT4_SB(sb)->s_mmp_tsk = NULL;
++                       goto failed;
++               }
++
++               if (sb->s_flags & MS_RDONLY) {
++                       ext4_warning(sb, "kmmpd being stopped since filesystem "
++                                    "has been remounted as readonly.");
++                       EXT4_SB(sb)->s_mmp_tsk = NULL;
++                       goto failed;
++               }
++
++               diff = jiffies - last_update_time;
++               if (diff < mmp_update_interval * HZ)
++                       schedule_timeout_interruptible(mmp_update_interval *
++                                                      HZ - diff);
++
++               /*
++                * We need to make sure that more than mmp_check_interval
++                * seconds have not passed since writing. If that has happened
++                * we need to check if the MMP block is as we left it.
++                */
++               diff = jiffies - last_update_time;
++               if (diff > mmp_check_interval * HZ) {
++                       struct buffer_head *bh_check = NULL;
++                       struct mmp_struct *mmp_check;
++
++                       retval = read_mmp_block(sb, &bh_check, mmp_block);
++                       if (retval) {
++                               ext4_error(sb, "error reading MMP data: %d",
++                                          retval);
++
++                               EXT4_SB(sb)->s_mmp_tsk = NULL;
++                               goto failed;
++                       }
++
++                       mmp_check = (struct mmp_struct *)(bh_check->b_data);
++                       if (mmp->mmp_seq != mmp_check->mmp_seq ||
++                           memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
++                                  sizeof(mmp->mmp_nodename))) {
++                               dump_mmp_msg(sb, mmp_check,
++                                            "Error while updating MMP info. "
++                                            "The filesystem seems to have been"
++                                            " multiply mounted.");
++                               ext4_error(sb, "abort");
++                               goto failed;
++                       }
++                       put_bh(bh_check);
++               }
++
++                /*
++                * Adjust the mmp_check_interval depending on how much time
++                * it took for the MMP block to be written.
++                */
++               mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
++                                            EXT4_MMP_MAX_CHECK_INTERVAL),
++                                        EXT4_MMP_MIN_CHECK_INTERVAL);
++               mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
++       }
++
++       /*
++        * Unmount seems to be clean.
++        */
++       mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
++       mmp->mmp_time = cpu_to_le64(get_seconds());
++
++       retval = write_mmp_block(bh);
 +
 +failed:
-+      kfree(data);
-+      brelse(bh);
-+      return retval;
++       kfree(data);
++       brelse(bh);
++       return retval;
 +}
 +
 +/*
@@ -234,161 +372,176 @@ Index: linux-stage/fs/ext4/super.c
 + */
 +static unsigned int mmp_new_seq(void)
 +{
-+      u32 new_seq;
++       u32 new_seq;
 +
-+      do {
-+              get_random_bytes(&new_seq, sizeof(u32));
-+      } while (new_seq > EXT4_MMP_SEQ_MAX);
++       do {
++               get_random_bytes(&new_seq, sizeof(u32));
++       } while (new_seq > EXT4_MMP_SEQ_MAX);
 +
-+      return new_seq;
++       return new_seq;
 +}
 +
 +/*
 + * Protect the filesystem from being mounted more than once.
 + */
-+static int ext4_multi_mount_protect(struct super_block *sb,
-+                                  unsigned long mmp_block)
++int ext4_multi_mount_protect(struct super_block *sb,
++                                   ext4_fsblk_t mmp_block)
 +{
-+      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-+      struct buffer_head *bh = NULL;
-+      struct mmp_struct *mmp = NULL;
-+      struct mmpd_data *mmpd_data;
-+      u32 seq;
-+      unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
-+      unsigned int wait_time = 0;
-+      int retval;
-+
-+      if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
-+          mmp_block >= ext4_blocks_count(es)) {
-+              ext4_warning(sb,
-+                           "Invalid MMP block in superblock");
-+              goto failed;
-+      }
-+
-+      retval = read_mmp_block(sb, &bh, mmp_block);
-+      if (retval)
-+              goto failed;
-+
-+      mmp = (struct mmp_struct *)(bh->b_data);
-+
-+      if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
-+              mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
-+
-+      /*
-+       * If check_interval in MMP block is larger, use that instead of
-+       * update_interval from the superblock.
-+       */
-+      if (mmp->mmp_check_interval > mmp_check_interval)
-+              mmp_check_interval = mmp->mmp_check_interval;
-+
-+      seq = le32_to_cpu(mmp->mmp_seq);
-+      if (seq == EXT4_MMP_SEQ_CLEAN)
-+              goto skip;
-+
-+      if (seq == EXT4_MMP_SEQ_FSCK) {
-+              dump_mmp_msg(sb, mmp, __func__,
-+                           "fsck is running on the filesystem");
-+              goto failed;
-+      }
-+
-+      wait_time = min(mmp_check_interval * 2 + 1,
-+              mmp_check_interval + 60);
-+
-+      /* Print MMP interval if more than 20 secs. */
-+      if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
-+              ext4_warning(sb, "MMP interval %u higher than "
-+                           "expected, please wait.\n", wait_time * 2);
-+
-+      if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
-+              ext4_warning(sb, "MMP startup interrupted, failing "
-+                           "mount\n");
-+              goto failed;
-+      }
-+
-+      retval = read_mmp_block(sb, &bh, mmp_block);
-+      if (retval)
-+              goto failed;
-+      mmp = (struct mmp_struct *)(bh->b_data);
-+      if (seq != le32_to_cpu(mmp->mmp_seq)) {
-+              dump_mmp_msg(sb, mmp, __func__,
-+                           "Device is already active on another node.");
-+              goto failed;
-+      }
++       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++       struct buffer_head *bh = NULL;
++       struct mmp_struct *mmp = NULL;
++       struct mmpd_data *mmpd_data;
++       u32 seq;
++       unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
++       unsigned int wait_time = 0;
++       int retval;
++
++       if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
++           mmp_block >= ext4_blocks_count(es)) {
++               ext4_warning(sb, "Invalid MMP block in superblock");
++               goto failed;
++       }
++
++       retval = read_mmp_block(sb, &bh, mmp_block);
++       if (retval)
++               goto failed;
++
++       mmp = (struct mmp_struct *)(bh->b_data);
++
++       if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
++               mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
++
++       /*
++        * If check_interval in MMP block is larger, use that instead of
++        * update_interval from the superblock.
++        */
++       if (mmp->mmp_check_interval > mmp_check_interval)
++               mmp_check_interval = mmp->mmp_check_interval;
++
++       seq = le32_to_cpu(mmp->mmp_seq);
++       if (seq == EXT4_MMP_SEQ_CLEAN)
++               goto skip;
++
++       if (seq == EXT4_MMP_SEQ_FSCK) {
++               dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
++               goto failed;
++       }
++
++       wait_time = min(mmp_check_interval * 2 + 1,
++                       mmp_check_interval + 60);
++
++       /* Print MMP interval if more than 20 secs. */
++       if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
++               ext4_warning(sb, "MMP interval %u higher than expected, please"
++                            " wait.\n", wait_time * 2);
++
++       if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
++               ext4_warning(sb, "MMP startup interrupted, failing mount\n");
++               goto failed;
++       }
++
++       retval = read_mmp_block(sb, &bh, mmp_block);
++       if (retval)
++               goto failed;
++       mmp = (struct mmp_struct *)(bh->b_data);
++       if (seq != le32_to_cpu(mmp->mmp_seq)) {
++               dump_mmp_msg(sb, mmp,
++                            "Device is already active on another node.");
++               goto failed;
++       }
 +
 +skip:
-+      /*
-+       * write a new random sequence number.
-+       */
-+      mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
-+
-+      retval = write_mmp_block(bh);
-+      if (retval)
-+              goto failed;
-+
-+      /*
-+       * wait for MMP interval and check mmp_seq.
-+       */
-+      if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
-+              ext4_warning(sb, "MMP startup interrupted, failing "
-+                           "mount\n");
-+              goto failed;
-+      }
-+
-+      retval = read_mmp_block(sb, &bh, mmp_block);
-+      if (retval)
-+              goto failed;
-+      mmp = (struct mmp_struct *)(bh->b_data);
-+      if (seq != le32_to_cpu(mmp->mmp_seq)) {
-+              dump_mmp_msg(sb, mmp, __func__,
-+                           "Device is already active on another node.");
-+              goto failed;
-+      }
-+
-+      mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
-+      if (!mmpd_data) {
-+              ext4_warning(sb, "not enough memory for mmpd_data");
-+              goto failed;
-+      }
-+      mmpd_data->sb = sb;
-+      mmpd_data->bh = bh;
-+
-+      /*
-+       * Start a kernel thread to update the MMP block periodically.
-+       */
-+      EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
-+                                           bdevname(bh->b_bdev,
-+                                                    mmp->mmp_bdevname));
-+      if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
-+              EXT4_SB(sb)->s_mmp_tsk = NULL;
-+              kfree(mmpd_data);
-+              ext4_warning(sb, "Unable to create kmmpd thread "
-+                           "for %s.", sb->s_id);
-+              goto failed;
-+      }
-+
-+      return 0;
++       /*
++        * write a new random sequence number.
++        */
++       mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
++
++       retval = write_mmp_block(bh);
++       if (retval)
++               goto failed;
++
++       /*
++        * wait for MMP interval and check mmp_seq.
++        */
++       if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
++               ext4_warning(sb, "MMP startup interrupted, failing mount\n");
++               goto failed;
++       }
++
++       retval = read_mmp_block(sb, &bh, mmp_block);
++       if (retval)
++               goto failed;
++       mmp = (struct mmp_struct *)(bh->b_data);
++       if (seq != le32_to_cpu(mmp->mmp_seq)) {
++               dump_mmp_msg(sb, mmp,
++                            "Device is already active on another node.");
++               goto failed;
++       }
++
++       mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
++       if (!mmpd_data) {
++               ext4_warning(sb, "not enough memory for mmpd_data");
++               goto failed;
++       }
++       mmpd_data->sb = sb;
++       mmpd_data->bh = bh;
++
++       /*
++        * Start a kernel thread to update the MMP block periodically.
++        */
++       EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
++                                            bdevname(bh->b_bdev,
++                                                     mmp->mmp_bdevname));
++       if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
++               EXT4_SB(sb)->s_mmp_tsk = NULL;
++               kfree(mmpd_data);
++               ext4_warning(sb, "Unable to create kmmpd thread for %s.",
++                            sb->s_id);
++               goto failed;
++       }
++
++       return 0;
 +
 +failed:
-+      brelse(bh);
-+      return 1;
++       brelse(bh);
++       return 1;
 +}
 +
- static struct inode *ext4_nfs_get_inode(struct super_block *sb,
-                                       u64 ino, u32 generation)
- {
-@@ -2800,6 +3153,11 @@
++
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -40,6 +40,8 @@
+ #include <linux/log2.h>
+ #include <linux/crc16.h>
+ #include <asm/uaccess.h>
++#include <linux/kthread.h>
++#include <linux/utsname.h>
+ #include "ext4.h"
+ #include "ext4_jbd2.h"
+@@ -700,6 +702,8 @@ static void ext4_put_super(struct super_
+               invalidate_bdev(sbi->journal_bdev);
+               ext4_blkdev_remove(sbi);
+       }
++      if (sbi->s_mmp_tsk)
++              kthread_stop(sbi->s_mmp_tsk);
+       sb->s_fs_info = NULL;
+       /*
+        * Now that we are completely done shutting down the
+@@ -2799,6 +2803,10 @@ static int ext4_fill_super(struct super_
+       needs_recovery = (es->s_last_orphan != 0 ||
                          EXT4_HAS_INCOMPAT_FEATURE(sb,
                                    EXT4_FEATURE_INCOMPAT_RECOVER));
 +      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
 +          !(sb->s_flags & MS_RDONLY))
 +              if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
 +                      goto failed_mount3;
-+
        /*
         * The first inode we look at is the journal inode.  Don't try
-        * root first: it may be modified in the journal!
-@@ -3036,6 +3394,8 @@
+@@ -3036,6 +3044,8 @@ failed_mount3:
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
        percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
@@ -397,7 +550,7 @@ Index: linux-stage/fs/ext4/super.c
  failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
-@@ -3544,7 +3904,7 @@
+@@ -3544,7 +3554,7 @@ static int ext4_remount(struct super_blo
        struct ext4_mount_options old_opts;
        ext4_group_t g;
        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@@ -406,7 +559,7 @@ Index: linux-stage/fs/ext4/super.c
  #ifdef CONFIG_QUOTA
        int i;
  #endif
-@@ -3666,6 +4026,13 @@
+@@ -3666,6 +3676,13 @@ static int ext4_remount(struct super_blo
                                goto restore_opts;
                        if (!ext4_setup_super(sb, es, 0))
                                sb->s_flags &= ~MS_RDONLY;
@@ -420,82 +573,3 @@ Index: linux-stage/fs/ext4/super.c
                }
        }
        ext4_setup_system_zone(sb);
-Index: linux-stage/fs/ext4/ext4.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4.h    2011-04-05 17:51:52.539390862 +0800
-+++ linux-stage/fs/ext4/ext4.h 2011-04-05 17:52:02.093529865 +0800
-@@ -893,7 +893,7 @@
-       __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
-       __le32  s_flags;                /* Miscellaneous flags */
-       __le16  s_raid_stride;          /* RAID stride */
--      __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
-+      __le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
-       __le64  s_mmp_block;            /* Block for multi-mount protection */
-       __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
-       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
-@@ -1040,6 +1040,9 @@
-       /* workqueue for dio unwritten */
-       struct workqueue_struct *dio_unwritten_wq;
-+
-+      /* Kernel thread for multiple mount protection */
-+      struct task_struct *s_mmp_tsk;
- };
- static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
-@@ -1176,7 +1179,8 @@
-                                        EXT4_FEATURE_INCOMPAT_META_BG| \
-                                        EXT4_FEATURE_INCOMPAT_EXTENTS| \
-                                        EXT4_FEATURE_INCOMPAT_64BIT| \
--                                       EXT4_FEATURE_INCOMPAT_FLEX_BG)
-+                                       EXT4_FEATURE_INCOMPAT_FLEX_BG| \
-+                                       EXT4_FEATURE_INCOMPAT_MMP)
- #define EXT4_FEATURE_RO_COMPAT_SUPP   (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
-                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
-@@ -1383,6 +1387,45 @@
- extern struct proc_dir_entry *ext4_proc_root;
- /*
-+ * This structure will be used for multiple mount protection. It will be
-+ * written into the block number saved in the s_mmp_block field in the
-+ * superblock. Programs that check MMP should assume that if
-+ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
-+ * to use the filesystem, regardless of how old the timestamp is.
-+ */
-+#define EXT4_MMP_MAGIC     0x004D4D50U /* ASCII for MMP */
-+#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
-+#define EXT4_MMP_SEQ_FSCK  0xE24D4D50U /* mmp_seq value when being fscked */
-+#define EXT4_MMP_SEQ_MAX   0xE24D4D4FU /* maximum valid mmp_seq value */
-+
-+struct mmp_struct {
-+      __le32  mmp_magic;
-+      __le32  mmp_seq;
-+      __le64  mmp_time;
-+      char    mmp_nodename[64];
-+      char    mmp_bdevname[32];
-+      __le16  mmp_check_interval;
-+      __le16  mmp_pad1;
-+      __le32  mmp_pad2[227];
-+};
-+
-+/* arguments passed to the mmp thread */
-+struct mmpd_data {
-+      struct buffer_head *bh; /* bh from initial read_mmp_block() */
-+      struct super_block *sb; /* super block of the fs */
-+};
-+
-+/*
-+ * Minimum interval for MMP checking in seconds.
-+ */
-+#define EXT4_MMP_MIN_CHECK_INTERVAL   5UL
-+
-+/*
-+ * Maximum interval for MMP checking in seconds.
-+ */
-+#define EXT4_MMP_MAX_CHECK_INTERVAL   300UL
-+
-+/*
-  * Function prototypes
-  */
index 840c97b..27584ac 100644 (file)
@@ -14,7 +14,7 @@ backfs_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/@BACKFS@/*.c))
 ext3_new_sources := extents.c mballoc.c group.h dynlocks.c fiemap.h
 ext3_new_headers := ext3_extents.h
 
-ext4_new_sources := dynlocks.c fiemap.h
+ext4_new_sources := dynlocks.c fiemap.h mmp.c
 ext4_new_headers :=
 
 new_sources := $(@BACKFS@_new_sources)