1 Index: linux-2.6.18-92.1.17/fs/ext3/super.c
2 ===================================================================
3 --- linux-2.6.18-92.1.17.orig/fs/ext3/super.c
4 +++ linux-2.6.18-92.1.17/fs/ext3/super.c
6 #include <linux/namei.h>
7 #include <linux/quotaops.h>
8 #include <linux/seq_file.h>
9 +#include <linux/kthread.h>
10 +#include <linux/utsname.h>
12 #include <asm/uaccess.h>
14 @@ -435,6 +437,8 @@ static void ext3_put_super (struct super
15 invalidate_bdev(sbi->journal_bdev, 0);
16 ext3_blkdev_remove(sbi);
19 + kthread_stop(sbi->s_mmp_tsk);
20 if (sbi->s_dev_proc) {
21 remove_proc_entry(sbi->s_dev_proc->name, proc_root_ext3);
22 sbi->s_dev_proc = NULL;
23 @@ -1543,6 +1547,313 @@ static ext3_fsblk_t descriptor_loc(struc
24 return (has_super + ext3_group_first_block_no(sb, bg));
28 + * Write the MMP block using WRITE_SYNC to try to get the block on-disk
31 +static int write_mmp_block(struct buffer_head *bh)
33 + mark_buffer_dirty(bh);
35 + bh->b_end_io = end_buffer_write_sync;
37 + submit_bh(WRITE_SYNC, bh);
39 + if (unlikely(!buffer_uptodate(bh)))
46 + * Read the MMP block. It _must_ be read from disk and hence we clear the
47 + * uptodate flag on the buffer.
49 +static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
50 + unsigned long mmp_block)
52 + struct mmp_struct *mmp;
55 + clear_buffer_uptodate(*bh);
59 + *bh = sb_bread(sb, mmp_block);
61 + ext3_warning(sb, __FUNCTION__,
62 + "Error while reading MMP block %lu", mmp_block);
66 + mmp = (struct mmp_struct *)((*bh)->b_data);
67 + if (le32_to_cpu(mmp->mmp_magic) != EXT3_MMP_MAGIC)
74 + * Dump as much information as possible to help the admin.
76 +static void dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
77 + const char *function, const char *msg)
79 + ext3_warning(sb, function, msg);
80 + ext3_warning(sb, function, "MMP failure info: last update time: %llu, "
81 + "last update node: %s, last update device: %s\n",
82 + le64_to_cpu(mmp->mmp_time), mmp->mmp_nodename,
87 + * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
89 +static int kmmpd(void *data)
91 + struct super_block *sb = (struct super_block *) data;
92 + struct ext3_super_block *es = EXT3_SB(sb)->s_es;
93 + struct buffer_head *bh = NULL;
94 + struct mmp_struct *mmp;
95 + unsigned long mmp_block;
97 + unsigned long failed_writes = 0;
98 + int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
99 + unsigned mmp_check_interval;
100 + unsigned long last_update_time;
101 + unsigned long diff;
104 + mmp_block = le64_to_cpu(es->s_mmp_block);
105 + retval = read_mmp_block(sb, &bh, mmp_block);
109 + mmp = (struct mmp_struct *)(bh->b_data);
110 + mmp->mmp_time = cpu_to_le64(get_seconds());
112 + * Start with the higher mmp_check_interval and reduce it if
113 + * the MMP block is being updated on time.
115 + mmp_check_interval = max(5 * mmp_update_interval,
116 + EXT3_MMP_MIN_CHECK_INTERVAL);
117 + mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
118 + bdevname(bh->b_bdev, mmp->mmp_bdevname);
120 + down_read(&uts_sem);
121 + memcpy(mmp->mmp_nodename, system_utsname.nodename,
122 + sizeof(mmp->mmp_nodename));
125 + while (!kthread_should_stop()) {
126 + if (++seq > EXT3_MMP_SEQ_MAX)
129 + mmp->mmp_seq = cpu_to_le32(seq);
130 + mmp->mmp_time = cpu_to_le64(get_seconds());
131 + last_update_time = jiffies;
133 + retval = write_mmp_block(bh);
135 + * Don't spew too many error messages. Print one every
136 + * (s_mmp_update_interval * 60) seconds.
138 + if (retval && (failed_writes % 60) == 0) {
139 + ext3_error(sb, __FUNCTION__,
140 + "Error writing to MMP block");
144 + if (!(le32_to_cpu(es->s_feature_incompat) &
145 + EXT3_FEATURE_INCOMPAT_MMP)) {
146 + ext3_warning(sb, __FUNCTION__, "kmmpd being stopped "
147 + "since MMP feature has been disabled.");
148 + EXT3_SB(sb)->s_mmp_tsk = 0;
152 + if (sb->s_flags & MS_RDONLY) {
153 + ext3_warning(sb, __FUNCTION__, "kmmpd being stopped "
154 + "since filesystem has been remounted as "
156 + EXT3_SB(sb)->s_mmp_tsk = 0;
160 + diff = jiffies - last_update_time;
161 + if (diff < mmp_update_interval * HZ)
162 + schedule_timeout_interruptible(EXT3_MMP_UPDATE_INTERVAL*
166 + * We need to make sure that more than mmp_check_interval
167 + * seconds have not passed since writing. If that has happened
168 + * we need to check if the MMP block is as we left it.
170 + diff = jiffies - last_update_time;
171 + if (diff > mmp_check_interval * HZ) {
172 + struct buffer_head *bh_check = NULL;
173 + struct mmp_struct *mmp_check;
175 + retval = read_mmp_block(sb, &bh_check, mmp_block);
177 + EXT3_SB(sb)->s_mmp_tsk = 0;
181 + mmp_check = (struct mmp_struct *)(bh_check->b_data);
182 + if (mmp->mmp_time != mmp_check->mmp_time ||
183 + memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
184 + sizeof(mmp->mmp_nodename)))
185 + dump_mmp_msg(sb, mmp_check, __FUNCTION__,
186 + "Error while updating MMP info. "
187 + "The filesystem seems to have "
188 + "been multiply mounted.");
194 + * Adjust the mmp_check_interval depending on how much time
195 + * it took for the MMP block to be written.
197 + mmp_check_interval = max(5 * diff / HZ,
198 + (unsigned long) EXT3_MMP_MIN_CHECK_INTERVAL);
199 + mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
203 + * Unmount seems to be clean.
205 + mmp->mmp_seq = cpu_to_le32(EXT3_MMP_SEQ_CLEAN);
206 + mmp->mmp_time = cpu_to_le64(get_seconds());
208 + retval = write_mmp_block(bh);
216 + * Get a random new sequence number but make sure it is not greater than
217 + * EXT3_MMP_SEQ_MAX.
219 +static unsigned int mmp_new_seq(void)
224 + get_random_bytes(&new_seq, sizeof(u32));
225 + } while (new_seq > EXT3_MMP_SEQ_MAX);
231 + * Protect the filesystem from being mounted more than once.
233 +static int ext3_multi_mount_protect(struct super_block *sb,
234 + unsigned long mmp_block)
236 + struct ext3_super_block *es = EXT3_SB(sb)->s_es;
237 + struct buffer_head *bh = NULL;
238 + struct mmp_struct *mmp = NULL;
240 + unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
243 + if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
244 + mmp_block >= le32_to_cpu(es->s_blocks_count)) {
245 + ext3_warning(sb, __FUNCTION__,
246 + "Invalid MMP block in superblock");
250 + retval = read_mmp_block(sb, &bh, mmp_block);
254 + mmp = (struct mmp_struct *)(bh->b_data);
256 + if (mmp_check_interval < EXT3_MMP_MIN_CHECK_INTERVAL)
257 + mmp_check_interval = EXT3_MMP_MIN_CHECK_INTERVAL;
260 + * If check_interval in MMP block is larger, use that instead of
261 + * update_interval from the superblock.
263 + if (mmp->mmp_check_interval > mmp_check_interval)
264 + mmp_check_interval = mmp->mmp_check_interval;
266 + seq = le32_to_cpu(mmp->mmp_seq);
267 + if (seq == EXT3_MMP_SEQ_CLEAN)
270 + if (seq == EXT3_MMP_SEQ_FSCK) {
271 + dump_mmp_msg(sb, mmp, __FUNCTION__,
272 + "fsck is running on the filesystem");
276 + schedule_timeout_uninterruptible(HZ * (2 * mmp_check_interval + 1));
278 + retval = read_mmp_block(sb, &bh, mmp_block);
281 + mmp = (struct mmp_struct *)(bh->b_data);
282 + if (seq != le32_to_cpu(mmp->mmp_seq)) {
283 + dump_mmp_msg(sb, mmp, __FUNCTION__,
284 + "Device is already active on another node.");
290 + * write a new random sequence number.
292 + mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
294 + retval = write_mmp_block(bh);
299 + * wait for MMP interval and check mmp_seq.
301 + schedule_timeout_uninterruptible(HZ * (2 * mmp_check_interval + 1));
303 + retval = read_mmp_block(sb, &bh, mmp_block);
306 + mmp = (struct mmp_struct *)(bh->b_data);
307 + if (seq != le32_to_cpu(mmp->mmp_seq)) {
308 + dump_mmp_msg(sb, mmp, __FUNCTION__,
309 + "Device is already active on another node.");
314 + * Start a kernel thread to update the MMP block periodically.
316 + EXT3_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%02x:%02x",
319 + if (IS_ERR(EXT3_SB(sb)->s_mmp_tsk)) {
320 + EXT3_SB(sb)->s_mmp_tsk = 0;
321 + ext3_warning(sb, __FUNCTION__, "Unable to create kmmpd thread "
322 + "for %s.", sb->s_id);
335 static int ext3_fill_super (struct super_block *sb, void *data, int silent)
337 @@ -1875,6 +2186,11 @@ static int ext3_fill_super (struct super
338 EXT3_HAS_INCOMPAT_FEATURE(sb,
339 EXT3_FEATURE_INCOMPAT_RECOVER));
341 + if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_MMP) &&
342 + !(sb->s_flags & MS_RDONLY))
343 + if (ext3_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
344 + goto failed_mount3;
347 * The first inode we look at is the journal inode. Don't try
348 * root first: it may be modified in the journal!
349 @@ -2007,6 +2323,8 @@ cantfind_ext3:
351 journal_destroy(sbi->s_journal);
353 + if (sbi->s_mmp_tsk)
354 + kthread_stop(sbi->s_mmp_tsk);
355 percpu_counter_destroy(&sbi->s_freeblocks_counter);
356 percpu_counter_destroy(&sbi->s_freeinodes_counter);
357 percpu_counter_destroy(&sbi->s_dirs_counter);
358 @@ -2482,7 +2800,7 @@ static int ext3_remount (struct super_bl
359 ext3_fsblk_t n_blocks_count = 0;
360 unsigned long old_sb_flags;
361 struct ext3_mount_options old_opts;
367 @@ -2580,6 +2898,11 @@ static int ext3_remount (struct super_bl
369 if (!ext3_setup_super (sb, es, 0))
370 sb->s_flags &= ~MS_RDONLY;
371 + if (EXT3_HAS_INCOMPAT_FEATURE(sb,
372 + EXT3_FEATURE_INCOMPAT_MMP))
373 + if (ext3_multi_mount_protect(sb,
374 + le64_to_cpu(es->s_mmp_block)))
379 Index: linux-2.6.18-92.1.17/include/linux/ext3_fs.h
380 ===================================================================
381 --- linux-2.6.18-92.1.17.orig/include/linux/ext3_fs.h
382 +++ linux-2.6.18-92.1.17/include/linux/ext3_fs.h
383 @@ -594,13 +594,17 @@ struct ext3_super_block {
384 __le32 s_first_meta_bg; /* First metablock block group */
385 __le32 s_mkfs_time; /* When the filesystem was created */
386 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
387 - __le32 s_blocks_count_hi; /* Blocks count high 32 bits */
388 +/*150*/ __le32 s_blocks_count_hi; /* Blocks count high 32 bits */
389 __le32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/
390 __le32 s_free_blocks_count_hi; /* Free blocks count high 32 bits */
391 __le16 s_min_extra_isize; /* All inodes have at least # bytes */
392 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
393 - __le32 s_flags; /* Miscellaneous flags */
394 - __u32 s_reserved[167]; /* Padding to the end of the block */
395 +/*160*/ __le32 s_flags; /* Miscellaneous flags */
396 + __le16 s_raid_stride; /* RAID stride */
397 + __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */
398 + __le64 s_mmp_block; /* Block for multi-mount protection */
399 +/*170*/ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
400 + __le32 s_reserved[163]; /* Padding to the end of the block */
404 @@ -703,12 +707,14 @@ static inline int ext3_valid_inum(struct
405 #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
406 #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010
407 #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
408 +#define EXT3_FEATURE_INCOMPAT_MMP 0x0100
410 #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
411 #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
412 EXT3_FEATURE_INCOMPAT_RECOVER| \
413 EXT3_FEATURE_INCOMPAT_META_BG| \
414 - EXT3_FEATURE_INCOMPAT_EXTENTS)
415 + EXT3_FEATURE_INCOMPAT_EXTENTS| \
416 + EXT3_FEATURE_INCOMPAT_MMP)
417 #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
418 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
419 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
420 @@ -871,6 +877,39 @@ ext3_group_first_block_no(struct super_b
421 #define ERR_BAD_DX_DIR -75000
424 + * This structure will be used for multiple mount protection. It will be
425 + * written into the block number saved in the s_mmp_block field in the
426 + * superblock. Programs that check MMP should assume that if
427 + * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
428 + * to use the filesystem, regardless of how old the timestamp is.
430 +#define EXT3_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */
431 +#define EXT3_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
432 +#define EXT3_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */
433 +#define EXT3_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */
439 + char mmp_nodename[64];
440 + char mmp_bdevname[32];
441 + __le16 mmp_check_interval;
443 + __le32 mmp_pad2[227];
447 + * Default interval in seconds to update the MMP sequence number.
449 +#define EXT3_MMP_UPDATE_INTERVAL 1
452 + * Minimum interval for MMP checking in seconds.
454 +#define EXT3_MMP_MIN_CHECK_INTERVAL 5
457 * Function prototypes
460 Index: linux-2.6.18-92.1.17/include/linux/ext3_fs_sb.h
461 ===================================================================
462 --- linux-2.6.18-92.1.17.orig/include/linux/ext3_fs_sb.h
463 +++ linux-2.6.18-92.1.17/include/linux/ext3_fs_sb.h
464 @@ -158,6 +158,7 @@ struct ext3_sb_info {
465 /* locality groups */
466 struct ext3_locality_group *s_locality_groups;
468 + struct task_struct *s_mmp_tsk; /* Kernel thread for multiple mount protection */
471 #define EXT3_GROUP_INFO(sb, group) \